update.
[chise/isd.git] / isd-turtle.el
index bbd95f3..93fea22 100644 (file)
@@ -1,6 +1,6 @@
 ;;; isd-turtle.el --- Utility to dump ideographic-structure as Turtle files
 
-;; Copyright (C) 2017 MORIOKA Tomohiko
+;; Copyright (C) 2017, 2018 MORIOKA Tomohiko
 
 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
 ;; Keywords: Ideographic Structures (漢字構造、解字), IDS, CHISE, RDF, Turtle
 (setq est-coded-charset-priority-list
   '(; =ucs
     =mj
-    =adobe-japan1
+    =adobe-japan1-0
+    =adobe-japan1-1
+    =adobe-japan1-2
+    =adobe-japan1-3
+    =adobe-japan1-4
+    =adobe-japan1-5
+    =adobe-japan1-6
     =ucs@iso
     =jis-x0208 =jis-x0208@1990
     =jis-x0213-1
     =jis-x0213-2
     =jis-x0212
     =gt
+    =hanyo-denshi/ks
     =hanyo-denshi/tk
     =ucs-itaiji-001
     =ucs-itaiji-002
     =ucs-itaiji-003
+    =ucs-itaiji-004
     =ucs-itaiji-005
+    =ucs-itaiji-006
+    =ucs-itaiji-007
+    =ucs-itaiji-008
+    =ucs-itaiji-009
+    =ucs-itaiji-010
     =ucs-itaiji-084
     =ucs-var-001
     =ucs-var-002
+    =ucs-var-003
     =ucs-var-004
+    =ucs-var-005
     =cns11643-1 =cns11643-2 =cns11643-3
     =cns11643-4 =cns11643-5 =cns11643-6 =cns11643-7
     =gb2312
     =big5-cdp
+    =ks-x1001
     =gt-k
     =ucs@unicode
     =ucs@JP/hanazono
     =gb12345
+    =ucs@cns
+    =ucs@gb
     =zinbun-oracle =>zinbun-oracle
     =daikanwa
     =ruimoku-v6
     =cbeta =jef-china3
+    =daikanwa/+2p
+    =+>ucs@iso =+>ucs@unicode
+    =+>ucs@jis
+    =+>ucs@cns
+    =+>ucs@ks
+    =+>ucs@jis/1990
+    =>mj
     =>jis-x0208 =>jis-x0213-1
     =>jis-x0208@1997
     =>ucs@iwds-1
+    =>ucs@cognate
     =>ucs@component
     =>iwds-1
     =>ucs@iso
     =>ucs@unicode
-    =+>ucs@iso =+>ucs@unicode
     =>ucs@jis =>ucs@cns =>ucs@ks
+    =>gt
+    =>gt-k
     =>>ucs@iso =>>ucs@unicode
     =>>ucs@jis =>>ucs@cns =>>ucs@ks
+    =>>gt-k
+    =>>hanyo-denshi/ks
     ==mj
+    ==ucs@iso
+    ==ucs@unicode
+    ==adobe-japan1-0
+    ==adobe-japan1-1
+    ==adobe-japan1-2
+    ==adobe-japan1-3
+    ==adobe-japan1-4
+    ==adobe-japan1-5
+    ==adobe-japan1-6
+    ==ks-x1001
+    ==hanyo-denshi/ks
+    ==hanyo-denshi/tk
+    ==ucs@jis
+    ==gt
+    ==cns11643-1 ==cns11643-2 ==cns11643-3
+    ==cns11643-4 ==cns11643-5 ==cns11643-6 ==cns11643-7
+    ==jis-x0212
+    ==ucs@cns
+    ==koseki
+    ==daikanwa
+    ==gt-k
+    ==ucs@gb
+    ==ucs-itaiji-003
+    ==ucs@JP/hanazono
+    ==daikanwa/+2p
     =>>jis-x0208 =>>jis-x0213-1 =>>jis-x0213-2
     =+>jis-x0208 =+>jis-x0213-1 =+>jis-x0213-2
+    =+>hanyo-denshi/jt
     =+>jis-x0208@1978
     =>>gt
     =+>adobe-japan1
     =>>adobe-japan1
     =jis-x0208@1983 =jis-x0208@1978
     =>ucs-itaiji-001
+    =>ucs-itaiji-002
+    =>ucs-itaiji-003
+    =>ucs-itaiji-004
     =>ucs-itaiji-005
-    ==ucs@unicode
+    =>ucs-itaiji-006
+    =>ucs-itaiji-007
+    =>ucs-itaiji-009
     ==>ucs@bucs
     =big5
     =>cbeta
+    ===mj
+    ===ucs@iso
+    ===ucs@unicode
+    ===hanyo-denshi/ks
+    ===ks-x1001
+    ===gt
+    ===gt-k
+    ===ucs@ks
+    ===ucs@gb
+    =shinjigen
+    =shinjigen@rev
+    =shinjigen@1ed
+    =shinjigen/+p@rev
+    ==shinjigen
+    ==shinjigen@rev
+    ==daikanwa/+p
+    ==shinjigen@1ed
+    ===daikanwa/+p
+    =>daikanwa/ho
+    ===daikanwa/ho
     ))
 
-(defvar isd-turtle-ccs-list nil)
+;; (defvar isd-turtle-ccs-list nil)
+(defvar chise-turtle-ccs-prefix-alist nil)
 
 (defun charset-code-point-format-spec (ccs)
   (cond ((memq ccs '(=ucs))
         "0x%04X")
-       ((memq ccs '(=gt
-                    =gt-k =daikanwa =adobe-japan1
-                    =cbeta =zinbun-oracle))
-        "%05d")
-       ((memq ccs '(=hanyo-denshi/ks
-                    =koseki =mj))
-        "%06d")
-       ((memq ccs '(=hanyo-denshi/tk ==hanyo-denshi/tk))
-        "%08d")
        (t
-        "0x%X")))
-
-(defun isd-turtle-uri-encode-feature-name (feature-name)
+        (let ((ccs-name (symbol-name ccs)))
+          (cond
+           ((string-match
+             "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)"
+             ccs-name)
+            "%04d")
+           ((string-match
+             "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)"
+             ccs-name)
+            "%05d")
+           ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name)
+            "%06d")
+           ((string-match "hanyo-denshi/tk" ccs-name)
+            "%08d")
+           (t
+            "0x%X"))))))
+
+;; (defun isd-turtle-uri-encode-feature-name (feature-name)
+;;   (cond
+;;    ((eq '=ucs feature-name)
+;;     "a.ucs")
+;;    ((eq '==>ucs@bucs feature-name)
+;;     "bucs")
+;;    (t
+;;     (mapconcat (lambda (c)
+;;                  (if (eq c ?@)
+;;                      "_"
+;;                    (char-to-string c)))
+;;                (www-uri-encode-feature-name feature-name)
+;;                ""))))
+(defun chise-turtle-uri-encode-ccs-name (feature-name)
   (cond
    ((eq '=ucs feature-name)
     "a.ucs")
+   ((eq '=big5 feature-name)
+    "a.big5")
    ((eq '==>ucs@bucs feature-name)
     "bucs")
    (t
     (mapconcat (lambda (c)
-                (if (eq c ?@)
-                    "_"
-                  (char-to-string c)))
+                (cond
+                 ((eq c ?@)
+                  "_")
+                 ((eq c ?+)
+                  "._.")
+                 ((eq c ?=)
+                  ".:.")
+                 (t
+                  (char-to-string c))))
               (www-uri-encode-feature-name feature-name)
               ""))))
 
-(defun isd-turtle-format-ccs-code-point (ccs code-point)
-  (unless (memq ccs isd-turtle-ccs-list)
-    (setq isd-turtle-ccs-list (cons ccs isd-turtle-ccs-list)))
-  (format "%s:%s"
-         (isd-turtle-uri-encode-feature-name ccs)
-         (format (charset-code-point-format-spec ccs)
-                 code-point)))
+;; (defun isd-turtle-format-ccs-code-point (ccs code-point)
+;;   (unless (memq ccs isd-turtle-ccs-list)
+;;     (setq isd-turtle-ccs-list (cons ccs isd-turtle-ccs-list)))
+;;   (format "%s:%s"
+;;           (isd-turtle-uri-encode-feature-name ccs)
+;;           (format (charset-code-point-format-spec ccs)
+;;                   code-point)))
+(defun chise-turtle-format-ccs-code-point (ccs code-point)
+  (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs)))
+    (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist)
+      (setq chise-turtle-ccs-prefix-alist
+           (cons (cons ccs-uri ccs)
+                 chise-turtle-ccs-prefix-alist)))
+    (format "%s:%s"
+           ccs-uri
+           (format (charset-code-point-format-spec ccs)
+                   code-point))))
 
 (defun isd-turtle-encode-char (object)
   (let ((ccs-list est-coded-charset-priority-list)
        ccs ret)
     (if (setq ret (encode-char object '=ucs))
-       (isd-turtle-format-ccs-code-point '=ucs ret)
+       (chise-turtle-format-ccs-code-point '=ucs ret)
       (while (and ccs-list
                  (setq ccs (pop ccs-list))
                  (not (setq ret (encode-char object ccs 'defined-only)))))
       (cond (ret
-            (isd-turtle-format-ccs-code-point ccs ret)
+            (chise-turtle-format-ccs-code-point ccs ret)
             )
            ((and (setq ccs (car (split-char object)))
                  (setq ret (encode-char object ccs)))
-            (isd-turtle-format-ccs-code-point ccs ret)
+            (chise-turtle-format-ccs-code-point ccs ret)
             )
            (t
             (format (if est-hide-cgi-mode
                     (encode-char object 'system-char-id))
             )))))
 
-(defun isd-turtle-format-component (component separator level)
+(defun isd-turtle-format-component (component separator level prefix)
   (cond ((characterp component)
         (format "%s %c # %c"
                 (isd-turtle-encode-char component)
                 ((setq ret (assq 'ideographic-structure component))
                  (if (eq separator ?\;)
                      (format "%s ;"
-                             (isd-turtle-format-char nil nil (cdr ret) (1+ level)))
-                   (isd-turtle-format-char nil nil (cdr ret) (1+ level)))))))))
+                             (isd-turtle-format-char nil nil (cdr ret) (1+ level)
+                                                     prefix))
+                   (isd-turtle-format-char nil nil (cdr ret) (1+ level)
+                                           prefix))))))))
 
-(defun isd-turtle-format-char (ccs code-point &optional ids-list level)
+(defun isd-turtle-format-char (ccs code-point &optional ids-list level
+                                  prefix without-head-char)
   (unless level
     (setq level 0))
+  (unless prefix
+    (setq prefix ""))
   (let ((indent (make-string (* level 4) ?\ ))
        char
-       idc
+       idc idc-str
        p1 p2 p3
        c1 c2 c3
        ret)
      ((eq idc ?⿻)
       (setq p1 'underlying
            p2 'overlaying)
-      ))
+      )
+     ((and idc (eq (encode-char idc '=ucs-itaiji-001) #x2FF6))
+      (setq idc-str "SLR")
+      (setq p1 'surround
+           p2 'filling)
+      )
+     ((and idc (eq (encode-char idc '=ucs-var-001) #x2FF0))
+      (setq idc-str "⿰・SLR")
+      (setq p1 'left
+           p2 'right)
+      )
+     ((and idc (eq (encode-char idc '=>iwds-1) 307))
+      (setq idc-str "⿰・⿺")
+      (setq p1 'left
+           p2 'right)
+      )
+     ((and idc (eq (encode-char idc '=>iwds-1) 305))
+      (setq idc-str "⿱・⿸")
+      (setq p1 'above
+           p2 'below)
+      )
+     ((and idc (eq (encode-char idc '=>ucs@component) #x2FF5))
+      (setq idc-str "⿱・⿵")
+      (setq p1 'above
+           p2 'below)
+      )
+     )
     (cond
      (p3
       (format "%s
-%s    :structure [ a idc:%c ;
-%s        :%-8s %s
-%s        :%-8s %s
-%s        :%-8s %s
+%s    %s:structure [ a idc:%s ;
+%s        %s:%-8s %s
+%s        %s:%-8s %s
+%s        %s:%-8s %s
 %s    ]%s"
-             (if (and ccs code-point)
-                 (format "%s   # %c"
-                         (isd-turtle-format-ccs-code-point ccs code-point)
-                         char)
-               "[")
-             indent idc
-             indent p1 (isd-turtle-format-component c1 ?\; (1+ level))
-             indent p2 (isd-turtle-format-component c2 ?\; (1+ level))
-             indent p3 (isd-turtle-format-component c3 ?\  (1+ level))
+             (if without-head-char
+                 ""
+               (if (and ccs code-point)
+                   (format "%s   # %c"
+                           (chise-turtle-format-ccs-code-point ccs code-point)
+                           char)
+                 "["))
+             indent prefix (or idc-str (char-to-string idc))
+             indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level) prefix)
+             indent prefix p2 (isd-turtle-format-component c2 ?\; (1+ level) prefix)
+             indent prefix p3 (isd-turtle-format-component c3 ?\  (1+ level) prefix)
              indent
-             (if (null char)
-                 (format "\n%s]"
-                         indent)
-               ""))
+             (if without-head-char
+                 ""
+               (if (null char)
+                   (format "\n%s]"
+                           indent)
+                 "")))
       )
      (idc
       (format "%s
-%s    :structure [ a idc:%c ;
-%s        :%-8s %s
-%s        :%-8s %s
+%s    %s:structure [ a idc:%s ;
+%s        %s:%-8s %s
+%s        %s:%-8s %s
 %s    ]%s"
-             (if (and ccs code-point)
-                 (format "%s   # %c"
-                         (isd-turtle-format-ccs-code-point ccs code-point)
-                         char)
-               "[")
-             indent idc
-             indent p1 (isd-turtle-format-component c1 ?\; (1+ level))
-             indent p2 (isd-turtle-format-component c2 ?\  (1+ level))
+             (if without-head-char
+                 ""
+               (if (and ccs code-point)
+                   (format "%s   # %c"
+                           (chise-turtle-format-ccs-code-point ccs code-point)
+                           char)
+                 "["))
+             indent prefix (or idc-str (char-to-string idc))
+             indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level) prefix)
+             indent prefix p2 (isd-turtle-format-component c2 ?\  (1+ level) prefix)
              indent
-             (if (null char)
-                 (format "\n%s]"
-                         indent)
-               ""))))
+             (if without-head-char
+                 ""
+               (if (null char)
+                   (format "\n%s]"
+                           indent)
+                 "")))))
     ))
 
 (defun isd-turtle-insert-char (ccs code-point)
 (defun isd-turtle-dump-range (file path func &rest args)
   (with-temp-buffer
     (let ((coding-system-for-write 'utf-8-mcs-er)
-         isd-turtle-ccs-list)
+          ;; isd-turtle-ccs-list
+         chise-turtle-ccs-prefix-alist)
       (if (file-directory-p path)
          (setq path (expand-file-name file path)))
       (apply func args)
       (goto-char (point-min))
-      (dolist (ccs (sort isd-turtle-ccs-list
-                        #'char-attribute-name<))
-       (insert (format "@prefix %s: <%s%s=> .\n"
-                       (isd-turtle-uri-encode-feature-name ccs)
-                       "http://www.chise.org/est/view/character/"
-                       (www-uri-encode-feature-name ccs))))
+      ;; (dolist (ccs (sort isd-turtle-ccs-list
+      ;;                    #'char-attribute-name<))
+      ;;   (insert (format "@prefix %s: <%s%s=> .\n"
+      ;;                   (isd-turtle-uri-encode-feature-name ccs)
+      ;;                   "http://www.chise.org/est/view/character/"
+      ;;                   (www-uri-encode-feature-name ccs))))
+      (dolist (cell (sort chise-turtle-ccs-prefix-alist
+                         (lambda (a b)
+                           (char-attribute-name< (cdr a)(cdr b)))))
+       (insert (format "@prefix %s: <%s/%s=> .\n"
+                       (car cell)
+                       "http://www.chise.org/est/view/character"
+                       (www-uri-encode-feature-name (cdr cell)))))
       (insert "\n")
       (goto-char (point-min))
       (insert "# -*- coding: utf-8-mcs-er -*-\n")
                         #'isd-turtle-insert-ccs-ranges
                         '=mj '(60000 . 69999)))
 
+;;;###autoload
+(defun isd-turtle-dump-all (directory)
+  (interactive "DISD directory : ")
+  (isd-turtle-dump-ucs-basic directory)
+  (isd-turtle-dump-ucs-ext-a directory)
+  (isd-turtle-dump-ucs-ext-b-1 directory)
+  (isd-turtle-dump-ucs-ext-b-2 directory)
+  (isd-turtle-dump-ucs-ext-b-3 directory)
+  (isd-turtle-dump-ucs-ext-b-4 directory)
+  (isd-turtle-dump-ucs-ext-b-5 directory)
+  (isd-turtle-dump-ucs-ext-b-6 directory)
+  (isd-turtle-dump-ucs-ext-c directory)
+  (isd-turtle-dump-ucs-ext-d directory)
+  (isd-turtle-dump-ucs-ext-e directory)
+  (isd-turtle-dump-mj-0 directory)
+  (isd-turtle-dump-mj-1 directory)
+  (isd-turtle-dump-mj-2 directory)
+  (isd-turtle-dump-mj-3 directory)
+  (isd-turtle-dump-mj-4 directory)
+  (isd-turtle-dump-mj-5 directory)
+  (isd-turtle-dump-mj-6 directory)
+  )
+
 
 ;;; @ End.
 ;;;