;;; isd-turtle.el --- Utility to dump ideographic-structure as Turtle files
-;; Copyright (C) 2017 MORIOKA Tomohiko
+;; Copyright (C) 2017, 2018 MORIOKA Tomohiko
;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
;; Keywords: Ideographic Structures (漢字構造、解字), IDS, CHISE, RDF, Turtle
=ucs-itaiji-004
=ucs-itaiji-005
=ucs-itaiji-006
+ =ucs-itaiji-007
+ =ucs-itaiji-008
=ucs-itaiji-009
+ =ucs-itaiji-010
=ucs-itaiji-084
=ucs-var-001
=ucs-var-002
=ucs-var-003
=ucs-var-004
+ =ucs-var-005
=cns11643-1 =cns11643-2 =cns11643-3
=cns11643-4 =cns11643-5 =cns11643-6 =cns11643-7
=gb2312
=>jis-x0208 =>jis-x0213-1
=>jis-x0208@1997
=>ucs@iwds-1
+ =>ucs@cognate
=>ucs@component
=>iwds-1
=>ucs@iso
=jis-x0208@1983 =jis-x0208@1978
=>ucs-itaiji-001
=>ucs-itaiji-002
+ =>ucs-itaiji-003
+ =>ucs-itaiji-004
=>ucs-itaiji-005
+ =>ucs-itaiji-006
+ =>ucs-itaiji-007
+ =>ucs-itaiji-009
==>ucs@bucs
=big5
=>cbeta
(defun charset-code-point-format-spec (ccs)
(cond ((memq ccs '(=ucs))
"0x%04X")
- ((memq ccs '(=gt
- =gt-k =daikanwa =adobe-japan1
- =cbeta =zinbun-oracle))
- "%05d")
- ((memq ccs '(=hanyo-denshi/ks
- =koseki =mj))
- "%06d")
- ((memq ccs '(=hanyo-denshi/tk ==hanyo-denshi/tk))
- "%08d")
(t
- "0x%X")))
+ (let ((ccs-name (symbol-name ccs)))
+ (cond
+ ((string-match
+ "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)"
+ ccs-name)
+ "%04d")
+ ((string-match
+ "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)"
+ ccs-name)
+ "%05d")
+ ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name)
+ "%06d")
+ ((string-match "hanyo-denshi/tk" ccs-name)
+ "%08d")
+ (t
+ "0x%X"))))))
;; (defun isd-turtle-uri-encode-feature-name (feature-name)
;; (cond
(encode-char object 'system-char-id))
)))))
-(defun isd-turtle-format-component (component separator level)
+(defun isd-turtle-format-component (component separator level prefix)
(cond ((characterp component)
(format "%s %c # %c"
(isd-turtle-encode-char component)
((setq ret (assq 'ideographic-structure component))
(if (eq separator ?\;)
(format "%s ;"
- (isd-turtle-format-char nil nil (cdr ret) (1+ level)))
- (isd-turtle-format-char nil nil (cdr ret) (1+ level)))))))))
+ (isd-turtle-format-char nil nil (cdr ret) (1+ level)
+ prefix))
+ (isd-turtle-format-char nil nil (cdr ret) (1+ level)
+ prefix))))))))
(defun isd-turtle-format-char (ccs code-point &optional ids-list level
prefix without-head-char)
(setq prefix ""))
(let ((indent (make-string (* level 4) ?\ ))
char
- idc
+ idc idc-str
p1 p2 p3
c1 c2 c3
ret)
((eq idc ?⿻)
(setq p1 'underlying
p2 'overlaying)
- ))
+ )
+ ((and idc (eq (encode-char idc '=ucs-itaiji-001) #x2FF6))
+ (setq idc-str "SLR")
+ (setq p1 'surround
+ p2 'filling)
+ )
+ ((and idc (eq (encode-char idc '=ucs-var-001) #x2FF0))
+ (setq idc-str "⿰・SLR")
+ (setq p1 'left
+ p2 'right)
+ )
+ ((and idc (eq (encode-char idc '=>iwds-1) 307))
+ (setq idc-str "⿰・⿺")
+ (setq p1 'left
+ p2 'right)
+ )
+ ((and idc (eq (encode-char idc '=>iwds-1) 305))
+ (setq idc-str "⿱・⿸")
+ (setq p1 'above
+ p2 'below)
+ )
+ ((and idc (eq (encode-char idc '=>ucs@component) #x2FF5))
+ (setq idc-str "⿱・⿵")
+ (setq p1 'above
+ p2 'below)
+ )
+ )
(cond
(p3
(format "%s
-%s %s:structure [ a idc:%c ;
+%s %s:structure [ a idc:%s ;
%s %s:%-8s %s
%s %s:%-8s %s
%s %s:%-8s %s
(chise-turtle-format-ccs-code-point ccs code-point)
char)
"["))
- indent prefix idc
- indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level))
- indent prefix p2 (isd-turtle-format-component c2 ?\; (1+ level))
- indent prefix p3 (isd-turtle-format-component c3 ?\ (1+ level))
+ indent prefix (or idc-str (char-to-string idc))
+ indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level) prefix)
+ indent prefix p2 (isd-turtle-format-component c2 ?\; (1+ level) prefix)
+ indent prefix p3 (isd-turtle-format-component c3 ?\ (1+ level) prefix)
indent
(if without-head-char
""
)
(idc
(format "%s
-%s %s:structure [ a idc:%c ;
+%s %s:structure [ a idc:%s ;
%s %s:%-8s %s
%s %s:%-8s %s
%s ]%s"
(chise-turtle-format-ccs-code-point ccs code-point)
char)
"["))
- indent prefix idc
- indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level))
- indent prefix p2 (isd-turtle-format-component c2 ?\ (1+ level))
+ indent prefix (or idc-str (char-to-string idc))
+ indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level) prefix)
+ indent prefix p2 (isd-turtle-format-component c2 ?\ (1+ level) prefix)
indent
(if without-head-char
""