update.
[chise/isd.git] / isd-turtle.el
index df749aa..93fea22 100644 (file)
@@ -1,6 +1,6 @@
 ;;; isd-turtle.el --- Utility to dump ideographic-structure as Turtle files
 
-;; Copyright (C) 2017 MORIOKA Tomohiko
+;; Copyright (C) 2017, 2018 MORIOKA Tomohiko
 
 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
 ;; Keywords: Ideographic Structures (漢字構造、解字), IDS, CHISE, RDF, Turtle
 (setq est-coded-charset-priority-list
   '(; =ucs
     =mj
-    =adobe-japan1
+    =adobe-japan1-0
+    =adobe-japan1-1
+    =adobe-japan1-2
+    =adobe-japan1-3
+    =adobe-japan1-4
+    =adobe-japan1-5
+    =adobe-japan1-6
     =ucs@iso
     =jis-x0208 =jis-x0208@1990
     =jis-x0213-1
     =jis-x0213-2
     =jis-x0212
     =gt
+    =hanyo-denshi/ks
     =hanyo-denshi/tk
     =ucs-itaiji-001
     =ucs-itaiji-002
     =ucs-itaiji-003
+    =ucs-itaiji-004
     =ucs-itaiji-005
     =ucs-itaiji-006
+    =ucs-itaiji-007
+    =ucs-itaiji-008
+    =ucs-itaiji-009
+    =ucs-itaiji-010
     =ucs-itaiji-084
     =ucs-var-001
     =ucs-var-002
+    =ucs-var-003
     =ucs-var-004
+    =ucs-var-005
     =cns11643-1 =cns11643-2 =cns11643-3
     =cns11643-4 =cns11643-5 =cns11643-6 =cns11643-7
     =gb2312
     =big5-cdp
+    =ks-x1001
     =gt-k
     =ucs@unicode
     =ucs@JP/hanazono
     =gb12345
+    =ucs@cns
+    =ucs@gb
     =zinbun-oracle =>zinbun-oracle
     =daikanwa
     =ruimoku-v6
     =cbeta =jef-china3
+    =daikanwa/+2p
+    =+>ucs@iso =+>ucs@unicode
+    =+>ucs@jis
+    =+>ucs@cns
+    =+>ucs@ks
+    =+>ucs@jis/1990
+    =>mj
     =>jis-x0208 =>jis-x0213-1
     =>jis-x0208@1997
     =>ucs@iwds-1
+    =>ucs@cognate
     =>ucs@component
     =>iwds-1
     =>ucs@iso
     =>ucs@unicode
-    =+>ucs@iso =+>ucs@unicode
     =>ucs@jis =>ucs@cns =>ucs@ks
+    =>gt
+    =>gt-k
     =>>ucs@iso =>>ucs@unicode
     =>>ucs@jis =>>ucs@cns =>>ucs@ks
+    =>>gt-k
+    =>>hanyo-denshi/ks
     ==mj
+    ==ucs@iso
+    ==ucs@unicode
+    ==adobe-japan1-0
+    ==adobe-japan1-1
+    ==adobe-japan1-2
+    ==adobe-japan1-3
+    ==adobe-japan1-4
+    ==adobe-japan1-5
+    ==adobe-japan1-6
+    ==ks-x1001
+    ==hanyo-denshi/ks
+    ==hanyo-denshi/tk
+    ==ucs@jis
+    ==gt
+    ==cns11643-1 ==cns11643-2 ==cns11643-3
+    ==cns11643-4 ==cns11643-5 ==cns11643-6 ==cns11643-7
+    ==jis-x0212
+    ==ucs@cns
+    ==koseki
+    ==daikanwa
+    ==gt-k
+    ==ucs@gb
+    ==ucs-itaiji-003
+    ==ucs@JP/hanazono
+    ==daikanwa/+2p
     =>>jis-x0208 =>>jis-x0213-1 =>>jis-x0213-2
     =+>jis-x0208 =+>jis-x0213-1 =+>jis-x0213-2
+    =+>hanyo-denshi/jt
     =+>jis-x0208@1978
     =>>gt
     =+>adobe-japan1
     =jis-x0208@1983 =jis-x0208@1978
     =>ucs-itaiji-001
     =>ucs-itaiji-002
+    =>ucs-itaiji-003
+    =>ucs-itaiji-004
     =>ucs-itaiji-005
-    ==ucs@unicode
+    =>ucs-itaiji-006
+    =>ucs-itaiji-007
+    =>ucs-itaiji-009
     ==>ucs@bucs
     =big5
     =>cbeta
+    ===mj
+    ===ucs@iso
+    ===ucs@unicode
+    ===hanyo-denshi/ks
+    ===ks-x1001
+    ===gt
+    ===gt-k
+    ===ucs@ks
+    ===ucs@gb
+    =shinjigen
+    =shinjigen@rev
+    =shinjigen@1ed
+    =shinjigen/+p@rev
+    ==shinjigen
+    ==shinjigen@rev
+    ==daikanwa/+p
+    ==shinjigen@1ed
+    ===daikanwa/+p
+    =>daikanwa/ho
+    ===daikanwa/ho
     ))
 
 ;; (defvar isd-turtle-ccs-list nil)
 (defun charset-code-point-format-spec (ccs)
   (cond ((memq ccs '(=ucs))
         "0x%04X")
-       ((memq ccs '(=gt
-                    =gt-k =daikanwa =adobe-japan1
-                    =cbeta =zinbun-oracle))
-        "%05d")
-       ((memq ccs '(=hanyo-denshi/ks
-                    =koseki =mj))
-        "%06d")
-       ((memq ccs '(=hanyo-denshi/tk ==hanyo-denshi/tk))
-        "%08d")
        (t
-        "0x%X")))
+        (let ((ccs-name (symbol-name ccs)))
+          (cond
+           ((string-match
+             "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)"
+             ccs-name)
+            "%04d")
+           ((string-match
+             "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)"
+             ccs-name)
+            "%05d")
+           ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name)
+            "%06d")
+           ((string-match "hanyo-denshi/tk" ccs-name)
+            "%08d")
+           (t
+            "0x%X"))))))
 
 ;; (defun isd-turtle-uri-encode-feature-name (feature-name)
 ;;   (cond
                     (encode-char object 'system-char-id))
             )))))
 
-(defun isd-turtle-format-component (component separator level)
+(defun isd-turtle-format-component (component separator level prefix)
   (cond ((characterp component)
         (format "%s %c # %c"
                 (isd-turtle-encode-char component)
                 ((setq ret (assq 'ideographic-structure component))
                  (if (eq separator ?\;)
                      (format "%s ;"
-                             (isd-turtle-format-char nil nil (cdr ret) (1+ level)))
-                   (isd-turtle-format-char nil nil (cdr ret) (1+ level)))))))))
+                             (isd-turtle-format-char nil nil (cdr ret) (1+ level)
+                                                     prefix))
+                   (isd-turtle-format-char nil nil (cdr ret) (1+ level)
+                                           prefix))))))))
 
 (defun isd-turtle-format-char (ccs code-point &optional ids-list level
                                   prefix without-head-char)
     (setq prefix ""))
   (let ((indent (make-string (* level 4) ?\ ))
        char
-       idc
+       idc idc-str
        p1 p2 p3
        c1 c2 c3
        ret)
      ((eq idc ?⿻)
       (setq p1 'underlying
            p2 'overlaying)
-      ))
+      )
+     ((and idc (eq (encode-char idc '=ucs-itaiji-001) #x2FF6))
+      (setq idc-str "SLR")
+      (setq p1 'surround
+           p2 'filling)
+      )
+     ((and idc (eq (encode-char idc '=ucs-var-001) #x2FF0))
+      (setq idc-str "⿰・SLR")
+      (setq p1 'left
+           p2 'right)
+      )
+     ((and idc (eq (encode-char idc '=>iwds-1) 307))
+      (setq idc-str "⿰・⿺")
+      (setq p1 'left
+           p2 'right)
+      )
+     ((and idc (eq (encode-char idc '=>iwds-1) 305))
+      (setq idc-str "⿱・⿸")
+      (setq p1 'above
+           p2 'below)
+      )
+     ((and idc (eq (encode-char idc '=>ucs@component) #x2FF5))
+      (setq idc-str "⿱・⿵")
+      (setq p1 'above
+           p2 'below)
+      )
+     )
     (cond
      (p3
       (format "%s
-%s    %s:structure [ a idc:%c ;
+%s    %s:structure [ a idc:%s ;
 %s        %s:%-8s %s
 %s        %s:%-8s %s
 %s        %s:%-8s %s
                            (chise-turtle-format-ccs-code-point ccs code-point)
                            char)
                  "["))
-             indent prefix idc
-             indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level))
-             indent prefix p2 (isd-turtle-format-component c2 ?\; (1+ level))
-             indent prefix p3 (isd-turtle-format-component c3 ?\  (1+ level))
+             indent prefix (or idc-str (char-to-string idc))
+             indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level) prefix)
+             indent prefix p2 (isd-turtle-format-component c2 ?\; (1+ level) prefix)
+             indent prefix p3 (isd-turtle-format-component c3 ?\  (1+ level) prefix)
              indent
              (if without-head-char
                  ""
       )
      (idc
       (format "%s
-%s    %s:structure [ a idc:%c ;
+%s    %s:structure [ a idc:%s ;
 %s        %s:%-8s %s
 %s        %s:%-8s %s
 %s    ]%s"
                            (chise-turtle-format-ccs-code-point ccs code-point)
                            char)
                  "["))
-             indent prefix idc
-             indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level))
-             indent prefix p2 (isd-turtle-format-component c2 ?\  (1+ level))
+             indent prefix (or idc-str (char-to-string idc))
+             indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level) prefix)
+             indent prefix p2 (isd-turtle-format-component c2 ?\  (1+ level) prefix)
              indent
              (if without-head-char
                  ""