;;; isd-turtle.el --- Utility to dump ideographic-structure as Turtle files
-;; Copyright (C) 2017 MORIOKA Tomohiko
+;; Copyright (C) 2017, 2018 MORIOKA Tomohiko
;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
;; Keywords: Ideographic Structures (漢字構造、解字), IDS, CHISE, RDF, Turtle
(setq est-coded-charset-priority-list
'(; =ucs
=mj
- =adobe-japan1
+ =adobe-japan1-0
+ =adobe-japan1-1
+ =adobe-japan1-2
+ =adobe-japan1-3
+ =adobe-japan1-4
+ =adobe-japan1-5
+ =adobe-japan1-6
=ucs@iso
=jis-x0208 =jis-x0208@1990
=jis-x0213-1
=jis-x0213-2
=jis-x0212
=gt
+ =hanyo-denshi/ks
=hanyo-denshi/tk
=ucs-itaiji-001
=ucs-itaiji-002
=ucs-itaiji-003
+ =ucs-itaiji-004
=ucs-itaiji-005
=ucs-itaiji-006
+ =ucs-itaiji-007
+ =ucs-itaiji-008
+ =ucs-itaiji-009
+ =ucs-itaiji-010
=ucs-itaiji-084
=ucs-var-001
=ucs-var-002
+ =ucs-var-003
=ucs-var-004
+ =ucs-var-005
=cns11643-1 =cns11643-2 =cns11643-3
=cns11643-4 =cns11643-5 =cns11643-6 =cns11643-7
=gb2312
=big5-cdp
+ =ks-x1001
=gt-k
=ucs@unicode
=ucs@JP/hanazono
=gb12345
+ =ucs@cns
+ =ucs@gb
=zinbun-oracle =>zinbun-oracle
=daikanwa
=ruimoku-v6
=cbeta =jef-china3
+ =daikanwa/+2p
+ =+>ucs@iso =+>ucs@unicode
+ =+>ucs@jis
+ =+>ucs@cns
+ =+>ucs@ks
+ =+>ucs@jis/1990
+ =>mj
=>jis-x0208 =>jis-x0213-1
=>jis-x0208@1997
=>ucs@iwds-1
+ =>ucs@cognate
=>ucs@component
=>iwds-1
=>ucs@iso
=>ucs@unicode
- =+>ucs@iso =+>ucs@unicode
=>ucs@jis =>ucs@cns =>ucs@ks
+ =>gt
+ =>gt-k
=>>ucs@iso =>>ucs@unicode
=>>ucs@jis =>>ucs@cns =>>ucs@ks
+ =>>gt-k
+ =>>hanyo-denshi/ks
==mj
+ ==ucs@iso
+ ==ucs@unicode
+ ==adobe-japan1-0
+ ==adobe-japan1-1
+ ==adobe-japan1-2
+ ==adobe-japan1-3
+ ==adobe-japan1-4
+ ==adobe-japan1-5
+ ==adobe-japan1-6
+ ==ks-x1001
+ ==hanyo-denshi/ks
+ ==hanyo-denshi/tk
+ ==ucs@jis
+ ==gt
+ ==cns11643-1 ==cns11643-2 ==cns11643-3
+ ==cns11643-4 ==cns11643-5 ==cns11643-6 ==cns11643-7
+ ==jis-x0212
+ ==ucs@cns
+ ==koseki
+ ==daikanwa
+ ==gt-k
+ ==ucs@gb
+ ==ucs-itaiji-003
+ ==ucs@JP/hanazono
+ ==daikanwa/+2p
=>>jis-x0208 =>>jis-x0213-1 =>>jis-x0213-2
=+>jis-x0208 =+>jis-x0213-1 =+>jis-x0213-2
+ =+>hanyo-denshi/jt
=+>jis-x0208@1978
=>>gt
=+>adobe-japan1
=jis-x0208@1983 =jis-x0208@1978
=>ucs-itaiji-001
=>ucs-itaiji-002
+ =>ucs-itaiji-003
+ =>ucs-itaiji-004
=>ucs-itaiji-005
- ==ucs@unicode
+ =>ucs-itaiji-006
+ =>ucs-itaiji-007
+ =>ucs-itaiji-009
==>ucs@bucs
=big5
=>cbeta
+ ===mj
+ ===ucs@iso
+ ===ucs@unicode
+ ===hanyo-denshi/ks
+ ===ks-x1001
+ ===gt
+ ===gt-k
+ ===ucs@ks
+ ===ucs@gb
+ =shinjigen
+ =shinjigen@rev
+ =shinjigen@1ed
+ =shinjigen/+p@rev
+ ==shinjigen
+ ==shinjigen@rev
+ ==daikanwa/+p
+ ==shinjigen@1ed
+ ===daikanwa/+p
+ =>daikanwa/ho
+ ===daikanwa/ho
))
;; (defvar isd-turtle-ccs-list nil)
(defun charset-code-point-format-spec (ccs)
(cond ((memq ccs '(=ucs))
"0x%04X")
- ((memq ccs '(=gt
- =gt-k =daikanwa =adobe-japan1
- =cbeta =zinbun-oracle))
- "%05d")
- ((memq ccs '(=hanyo-denshi/ks
- =koseki =mj))
- "%06d")
- ((memq ccs '(=hanyo-denshi/tk ==hanyo-denshi/tk))
- "%08d")
(t
- "0x%X")))
+ (let ((ccs-name (symbol-name ccs)))
+ (cond
+ ((string-match
+ "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)"
+ ccs-name)
+ "%04d")
+ ((string-match
+ "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)"
+ ccs-name)
+ "%05d")
+ ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name)
+ "%06d")
+ ((string-match "hanyo-denshi/tk" ccs-name)
+ "%08d")
+ (t
+ "0x%X"))))))
;; (defun isd-turtle-uri-encode-feature-name (feature-name)
;; (cond
(encode-char object 'system-char-id))
)))))
-(defun isd-turtle-format-component (component separator level)
+(defun isd-turtle-format-component (component separator level prefix)
(cond ((characterp component)
(format "%s %c # %c"
(isd-turtle-encode-char component)
((setq ret (assq 'ideographic-structure component))
(if (eq separator ?\;)
(format "%s ;"
- (isd-turtle-format-char nil nil (cdr ret) (1+ level)))
- (isd-turtle-format-char nil nil (cdr ret) (1+ level)))))))))
+ (isd-turtle-format-char nil nil (cdr ret) (1+ level)
+ prefix))
+ (isd-turtle-format-char nil nil (cdr ret) (1+ level)
+ prefix))))))))
(defun isd-turtle-format-char (ccs code-point &optional ids-list level
prefix without-head-char)
(setq prefix ""))
(let ((indent (make-string (* level 4) ?\ ))
char
- idc
+ idc idc-str
p1 p2 p3
c1 c2 c3
ret)
((eq idc ?⿻)
(setq p1 'underlying
p2 'overlaying)
- ))
+ )
+ ((and idc (eq (encode-char idc '=ucs-itaiji-001) #x2FF6))
+ (setq idc-str "SLR")
+ (setq p1 'surround
+ p2 'filling)
+ )
+ ((and idc (eq (encode-char idc '=ucs-var-001) #x2FF0))
+ (setq idc-str "⿰・SLR")
+ (setq p1 'left
+ p2 'right)
+ )
+ ((and idc (eq (encode-char idc '=>iwds-1) 307))
+ (setq idc-str "⿰・⿺")
+ (setq p1 'left
+ p2 'right)
+ )
+ ((and idc (eq (encode-char idc '=>iwds-1) 305))
+ (setq idc-str "⿱・⿸")
+ (setq p1 'above
+ p2 'below)
+ )
+ ((and idc (eq (encode-char idc '=>ucs@component) #x2FF5))
+ (setq idc-str "⿱・⿵")
+ (setq p1 'above
+ p2 'below)
+ )
+ )
(cond
(p3
(format "%s
-%s %s:structure [ a idc:%c ;
+%s %s:structure [ a idc:%s ;
%s %s:%-8s %s
%s %s:%-8s %s
%s %s:%-8s %s
(chise-turtle-format-ccs-code-point ccs code-point)
char)
"["))
- indent prefix idc
- indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level))
- indent prefix p2 (isd-turtle-format-component c2 ?\; (1+ level))
- indent prefix p3 (isd-turtle-format-component c3 ?\ (1+ level))
+ indent prefix (or idc-str (char-to-string idc))
+ indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level) prefix)
+ indent prefix p2 (isd-turtle-format-component c2 ?\; (1+ level) prefix)
+ indent prefix p3 (isd-turtle-format-component c3 ?\ (1+ level) prefix)
indent
(if without-head-char
""
)
(idc
(format "%s
-%s %s:structure [ a idc:%c ;
+%s %s:structure [ a idc:%s ;
%s %s:%-8s %s
%s %s:%-8s %s
%s ]%s"
(chise-turtle-format-ccs-code-point ccs code-point)
char)
"["))
- indent prefix idc
- indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level))
- indent prefix p2 (isd-turtle-format-component c2 ?\ (1+ level))
+ indent prefix (or idc-str (char-to-string idc))
+ indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level) prefix)
+ indent prefix p2 (isd-turtle-format-component c2 ?\ (1+ level) prefix)
indent
(if without-head-char
""