;;; isd-turtle.el --- Utility to dump ideographic-structure as Turtle files
-;; Copyright (C) 2017 MORIOKA Tomohiko
+;; Copyright (C) 2017, 2018 MORIOKA Tomohiko
;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
;; Keywords: Ideographic Structures (漢字構造、解字), IDS, CHISE, RDF, Turtle
(setq est-coded-charset-priority-list
'(; =ucs
=mj
- =adobe-japan1
+ =adobe-japan1-0
+ =adobe-japan1-1
+ =adobe-japan1-2
+ =adobe-japan1-3
+ =adobe-japan1-4
+ =adobe-japan1-5
+ =adobe-japan1-6
=ucs@iso
=jis-x0208 =jis-x0208@1990
=jis-x0213-1
=jis-x0213-2
=jis-x0212
=gt
+ =hanyo-denshi/ks
=hanyo-denshi/tk
=ucs-itaiji-001
=ucs-itaiji-002
=ucs-itaiji-003
+ =ucs-itaiji-004
=ucs-itaiji-005
+ =ucs-itaiji-006
+ =ucs-itaiji-007
+ =ucs-itaiji-008
+ =ucs-itaiji-009
+ =ucs-itaiji-010
=ucs-itaiji-084
=ucs-var-001
=ucs-var-002
+ =ucs-var-003
=ucs-var-004
+ =ucs-var-005
=cns11643-1 =cns11643-2 =cns11643-3
=cns11643-4 =cns11643-5 =cns11643-6 =cns11643-7
=gb2312
=big5-cdp
+ =ks-x1001
=gt-k
=ucs@unicode
=ucs@JP/hanazono
=gb12345
+ =ucs@cns
+ =ucs@gb
=zinbun-oracle =>zinbun-oracle
=daikanwa
=ruimoku-v6
=cbeta =jef-china3
+ =daikanwa/+2p
+ =+>ucs@iso =+>ucs@unicode
+ =+>ucs@jis
+ =+>ucs@cns
+ =+>ucs@ks
+ =+>ucs@jis/1990
+ =>mj
=>jis-x0208 =>jis-x0213-1
=>jis-x0208@1997
=>ucs@iwds-1
+ =>ucs@cognate
=>ucs@component
=>iwds-1
=>ucs@iso
=>ucs@unicode
- =+>ucs@iso =+>ucs@unicode
=>ucs@jis =>ucs@cns =>ucs@ks
+ =>gt
+ =>gt-k
=>>ucs@iso =>>ucs@unicode
=>>ucs@jis =>>ucs@cns =>>ucs@ks
+ =>>gt-k
+ =>>hanyo-denshi/ks
==mj
+ ==ucs@iso
+ ==ucs@unicode
+ ==adobe-japan1-0
+ ==adobe-japan1-1
+ ==adobe-japan1-2
+ ==adobe-japan1-3
+ ==adobe-japan1-4
+ ==adobe-japan1-5
+ ==adobe-japan1-6
+ ==ks-x1001
+ ==hanyo-denshi/ks
+ ==hanyo-denshi/tk
+ ==ucs@jis
+ ==gt
+ ==cns11643-1 ==cns11643-2 ==cns11643-3
+ ==cns11643-4 ==cns11643-5 ==cns11643-6 ==cns11643-7
+ ==jis-x0212
+ ==ucs@cns
+ ==koseki
+ ==daikanwa
+ ==gt-k
+ ==ucs@gb
+ ==ucs-itaiji-003
+ ==ucs@JP/hanazono
+ ==daikanwa/+2p
=>>jis-x0208 =>>jis-x0213-1 =>>jis-x0213-2
=+>jis-x0208 =+>jis-x0213-1 =+>jis-x0213-2
+ =+>hanyo-denshi/jt
=+>jis-x0208@1978
=>>gt
=+>adobe-japan1
=>>adobe-japan1
=jis-x0208@1983 =jis-x0208@1978
=>ucs-itaiji-001
+ =>ucs-itaiji-002
+ =>ucs-itaiji-003
+ =>ucs-itaiji-004
=>ucs-itaiji-005
- ==ucs@unicode
+ =>ucs-itaiji-006
+ =>ucs-itaiji-007
+ =>ucs-itaiji-009
==>ucs@bucs
=big5
=>cbeta
+ ===mj
+ ===ucs@iso
+ ===ucs@unicode
+ ===hanyo-denshi/ks
+ ===ks-x1001
+ ===gt
+ ===gt-k
+ ===ucs@ks
+ ===ucs@gb
+ =shinjigen
+ =shinjigen@rev
+ =shinjigen@1ed
+ =shinjigen/+p@rev
+ ==shinjigen
+ ==shinjigen@rev
+ ==daikanwa/+p
+ ==shinjigen@1ed
+ ===daikanwa/+p
+ =>daikanwa/ho
+ ===daikanwa/ho
))
-(defvar isd-turtle-ccs-list nil)
+;; (defvar isd-turtle-ccs-list nil)
+(defvar chise-turtle-ccs-prefix-alist nil)
(defun charset-code-point-format-spec (ccs)
(cond ((memq ccs '(=ucs))
"0x%04X")
- ((memq ccs '(=gt
- =gt-k =daikanwa =adobe-japan1
- =cbeta =zinbun-oracle))
- "%05d")
- ((memq ccs '(=hanyo-denshi/ks
- =koseki =mj))
- "%06d")
- ((memq ccs '(=hanyo-denshi/tk ==hanyo-denshi/tk))
- "%08d")
(t
- "0x%X")))
-
-(defun isd-turtle-uri-encode-feature-name (feature-name)
+ (let ((ccs-name (symbol-name ccs)))
+ (cond
+ ((string-match
+ "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)"
+ ccs-name)
+ "%04d")
+ ((string-match
+ "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)"
+ ccs-name)
+ "%05d")
+ ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name)
+ "%06d")
+ ((string-match "hanyo-denshi/tk" ccs-name)
+ "%08d")
+ (t
+ "0x%X"))))))
+
+;; (defun isd-turtle-uri-encode-feature-name (feature-name)
+;; (cond
+;; ((eq '=ucs feature-name)
+;; "a.ucs")
+;; ((eq '==>ucs@bucs feature-name)
+;; "bucs")
+;; (t
+;; (mapconcat (lambda (c)
+;; (if (eq c ?@)
+;; "_"
+;; (char-to-string c)))
+;; (www-uri-encode-feature-name feature-name)
+;; ""))))
+(defun chise-turtle-uri-encode-ccs-name (feature-name)
(cond
((eq '=ucs feature-name)
"a.ucs")
+ ((eq '=big5 feature-name)
+ "a.big5")
((eq '==>ucs@bucs feature-name)
"bucs")
(t
(mapconcat (lambda (c)
- (if (eq c ?@)
- "_"
- (char-to-string c)))
+ (cond
+ ((eq c ?@)
+ "_")
+ ((eq c ?+)
+ "._.")
+ ((eq c ?=)
+ ".:.")
+ (t
+ (char-to-string c))))
(www-uri-encode-feature-name feature-name)
""))))
-(defun isd-turtle-format-ccs-code-point (ccs code-point)
- (unless (memq ccs isd-turtle-ccs-list)
- (setq isd-turtle-ccs-list (cons ccs isd-turtle-ccs-list)))
- (format "%s:%s"
- (isd-turtle-uri-encode-feature-name ccs)
- (format (charset-code-point-format-spec ccs)
- code-point)))
+;; (defun isd-turtle-format-ccs-code-point (ccs code-point)
+;; (unless (memq ccs isd-turtle-ccs-list)
+;; (setq isd-turtle-ccs-list (cons ccs isd-turtle-ccs-list)))
+;; (format "%s:%s"
+;; (isd-turtle-uri-encode-feature-name ccs)
+;; (format (charset-code-point-format-spec ccs)
+;; code-point)))
+(defun chise-turtle-format-ccs-code-point (ccs code-point)
+ (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs)))
+ (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist)
+ (setq chise-turtle-ccs-prefix-alist
+ (cons (cons ccs-uri ccs)
+ chise-turtle-ccs-prefix-alist)))
+ (format "%s:%s"
+ ccs-uri
+ (format (charset-code-point-format-spec ccs)
+ code-point))))
(defun isd-turtle-encode-char (object)
(let ((ccs-list est-coded-charset-priority-list)
ccs ret)
(if (setq ret (encode-char object '=ucs))
- (isd-turtle-format-ccs-code-point '=ucs ret)
+ (chise-turtle-format-ccs-code-point '=ucs ret)
(while (and ccs-list
(setq ccs (pop ccs-list))
(not (setq ret (encode-char object ccs 'defined-only)))))
(cond (ret
- (isd-turtle-format-ccs-code-point ccs ret)
+ (chise-turtle-format-ccs-code-point ccs ret)
)
((and (setq ccs (car (split-char object)))
(setq ret (encode-char object ccs)))
- (isd-turtle-format-ccs-code-point ccs ret)
+ (chise-turtle-format-ccs-code-point ccs ret)
)
(t
(format (if est-hide-cgi-mode
(encode-char object 'system-char-id))
)))))
-(defun isd-turtle-format-component (component separator level)
+(defun isd-turtle-format-component (component separator level prefix)
(cond ((characterp component)
(format "%s %c # %c"
(isd-turtle-encode-char component)
((setq ret (assq 'ideographic-structure component))
(if (eq separator ?\;)
(format "%s ;"
- (isd-turtle-format-char nil nil (cdr ret) (1+ level)))
- (isd-turtle-format-char nil nil (cdr ret) (1+ level)))))))))
+ (isd-turtle-format-char nil nil (cdr ret) (1+ level)
+ prefix))
+ (isd-turtle-format-char nil nil (cdr ret) (1+ level)
+ prefix))))))))
-(defun isd-turtle-format-char (ccs code-point &optional ids-list level)
+(defun isd-turtle-format-char (ccs code-point &optional ids-list level
+ prefix without-head-char)
(unless level
(setq level 0))
+ (unless prefix
+ (setq prefix ""))
(let ((indent (make-string (* level 4) ?\ ))
char
- idc
+ idc idc-str
p1 p2 p3
c1 c2 c3
ret)
((eq idc ?⿻)
(setq p1 'underlying
p2 'overlaying)
- ))
+ )
+ ((and idc (eq (encode-char idc '=ucs-itaiji-001) #x2FF6))
+ (setq idc-str "SLR")
+ (setq p1 'surround
+ p2 'filling)
+ )
+ ((and idc (eq (encode-char idc '=ucs-var-001) #x2FF0))
+ (setq idc-str "⿰・SLR")
+ (setq p1 'left
+ p2 'right)
+ )
+ ((and idc (eq (encode-char idc '=>iwds-1) 307))
+ (setq idc-str "⿰・⿺")
+ (setq p1 'left
+ p2 'right)
+ )
+ ((and idc (eq (encode-char idc '=>iwds-1) 305))
+ (setq idc-str "⿱・⿸")
+ (setq p1 'above
+ p2 'below)
+ )
+ ((and idc (eq (encode-char idc '=>ucs@component) #x2FF5))
+ (setq idc-str "⿱・⿵")
+ (setq p1 'above
+ p2 'below)
+ )
+ )
(cond
(p3
(format "%s
-%s :structure [ a idc:%c ;
-%s :%-8s %s
-%s :%-8s %s
-%s :%-8s %s
+%s %s:structure [ a idc:%s ;
+%s %s:%-8s %s
+%s %s:%-8s %s
+%s %s:%-8s %s
%s ]%s"
- (if (and ccs code-point)
- (format "%s # %c"
- (isd-turtle-format-ccs-code-point ccs code-point)
- char)
- "[")
- indent idc
- indent p1 (isd-turtle-format-component c1 ?\; (1+ level))
- indent p2 (isd-turtle-format-component c2 ?\; (1+ level))
- indent p3 (isd-turtle-format-component c3 ?\ (1+ level))
+ (if without-head-char
+ ""
+ (if (and ccs code-point)
+ (format "%s # %c"
+ (chise-turtle-format-ccs-code-point ccs code-point)
+ char)
+ "["))
+ indent prefix (or idc-str (char-to-string idc))
+ indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level) prefix)
+ indent prefix p2 (isd-turtle-format-component c2 ?\; (1+ level) prefix)
+ indent prefix p3 (isd-turtle-format-component c3 ?\ (1+ level) prefix)
indent
- (if (null char)
- (format "\n%s]"
- indent)
- ""))
+ (if without-head-char
+ ""
+ (if (null char)
+ (format "\n%s]"
+ indent)
+ "")))
)
(idc
(format "%s
-%s :structure [ a idc:%c ;
-%s :%-8s %s
-%s :%-8s %s
+%s %s:structure [ a idc:%s ;
+%s %s:%-8s %s
+%s %s:%-8s %s
%s ]%s"
- (if (and ccs code-point)
- (format "%s # %c"
- (isd-turtle-format-ccs-code-point ccs code-point)
- char)
- "[")
- indent idc
- indent p1 (isd-turtle-format-component c1 ?\; (1+ level))
- indent p2 (isd-turtle-format-component c2 ?\ (1+ level))
+ (if without-head-char
+ ""
+ (if (and ccs code-point)
+ (format "%s # %c"
+ (chise-turtle-format-ccs-code-point ccs code-point)
+ char)
+ "["))
+ indent prefix (or idc-str (char-to-string idc))
+ indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level) prefix)
+ indent prefix p2 (isd-turtle-format-component c2 ?\ (1+ level) prefix)
indent
- (if (null char)
- (format "\n%s]"
- indent)
- ""))))
+ (if without-head-char
+ ""
+ (if (null char)
+ (format "\n%s]"
+ indent)
+ "")))))
))
(defun isd-turtle-insert-char (ccs code-point)
(defun isd-turtle-dump-range (file path func &rest args)
(with-temp-buffer
(let ((coding-system-for-write 'utf-8-mcs-er)
- isd-turtle-ccs-list)
+ ;; isd-turtle-ccs-list
+ chise-turtle-ccs-prefix-alist)
(if (file-directory-p path)
(setq path (expand-file-name file path)))
(apply func args)
(goto-char (point-min))
- (dolist (ccs (sort isd-turtle-ccs-list
- #'char-attribute-name<))
- (insert (format "@prefix %s: <%s%s=> .\n"
- (isd-turtle-uri-encode-feature-name ccs)
- "http://www.chise.org/est/view/character/"
- (www-uri-encode-feature-name ccs))))
+ ;; (dolist (ccs (sort isd-turtle-ccs-list
+ ;; #'char-attribute-name<))
+ ;; (insert (format "@prefix %s: <%s%s=> .\n"
+ ;; (isd-turtle-uri-encode-feature-name ccs)
+ ;; "http://www.chise.org/est/view/character/"
+ ;; (www-uri-encode-feature-name ccs))))
+ (dolist (cell (sort chise-turtle-ccs-prefix-alist
+ (lambda (a b)
+ (char-attribute-name< (cdr a)(cdr b)))))
+ (insert (format "@prefix %s: <%s/%s=> .\n"
+ (car cell)
+ "http://www.chise.org/est/view/character"
+ (www-uri-encode-feature-name (cdr cell)))))
(insert "\n")
(goto-char (point-min))
(insert "# -*- coding: utf-8-mcs-er -*-\n")
#'isd-turtle-insert-ccs-ranges
'=mj '(60000 . 69999)))
+;;;###autoload
+(defun isd-turtle-dump-all (directory)
+ (interactive "DISD directory : ")
+ (isd-turtle-dump-ucs-basic directory)
+ (isd-turtle-dump-ucs-ext-a directory)
+ (isd-turtle-dump-ucs-ext-b-1 directory)
+ (isd-turtle-dump-ucs-ext-b-2 directory)
+ (isd-turtle-dump-ucs-ext-b-3 directory)
+ (isd-turtle-dump-ucs-ext-b-4 directory)
+ (isd-turtle-dump-ucs-ext-b-5 directory)
+ (isd-turtle-dump-ucs-ext-b-6 directory)
+ (isd-turtle-dump-ucs-ext-c directory)
+ (isd-turtle-dump-ucs-ext-d directory)
+ (isd-turtle-dump-ucs-ext-e directory)
+ (isd-turtle-dump-mj-0 directory)
+ (isd-turtle-dump-mj-1 directory)
+ (isd-turtle-dump-mj-2 directory)
+ (isd-turtle-dump-mj-3 directory)
+ (isd-turtle-dump-mj-4 directory)
+ (isd-turtle-dump-mj-5 directory)
+ (isd-turtle-dump-mj-6 directory)
+ )
+
;;; @ End.
;;;