Update Copyright header.
[chise/tomoyo-tools.git] / concord-turtle-dump.el
index 1a835b1..6cf3684 100644 (file)
@@ -1,6 +1,6 @@
 ;;; concord-turtle-dump.el --- Character Database utility -*- coding: utf-8-er; -*-
 
-;; Copyright (C) 2017,2018 MORIOKA Tomohiko.
+;; Copyright (C) 2017,2018,2019 MORIOKA Tomohiko.
 
 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
 ;; Keywords: CHISE, Character Database, RDF, Turtle, ISO/IEC 10646, UCS, Unicode, MULE.
@@ -25,7 +25,7 @@
 ;;; Code:
 
 (require 'char-db-util)
-(require 'cwiki-common)
+(require 'chiset-common)
 (require 'isd-turtle)
 (require 'ideograph-util)
 
     ===daikanwa/ho
     ))
 
-(defvar chise-turtle-ccs-prefix-alist nil)
-
+(defvar chise-turtle-feature-domains)
 (setq chise-turtle-feature-domains
       (append char-db-feature-domains
              (let (dest domain)
                      (setq dest (cons domain dest)))))
                (sort dest #'string<))))
 
-(defun charset-code-point-format-spec (ccs)
-  (cond ((memq ccs '(=ucs))
-        "0x%04X")
-       (t
-        (let ((ccs-name (symbol-name ccs)))
-          (cond
-           ((string-match
-             "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)"
-             ccs-name)
-            "%04d")
-           ((string-match
-             "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)"
-             ccs-name)
-            "%05d")
-           ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name)
-            "%06d")
-           ((string-match "hanyo-denshi/tk" ccs-name)
-            "%08d")
-           (t
-            "0x%X"))))))
-
-(defun chise-turtle-uri-decode-feature-name (uri-feature)
-  (cond ((string= "a.ucs" uri-feature)
-        '=ucs)
-       ((string= "a.big5" uri-feature)
-        '=big5)
-       (t
-        (www-uri-decode-feature-name uri-feature))))
-
-(defun chise-turtle-uri-encode-ccs-name (feature-name)
-  (cond
-   ((eq '=ucs feature-name)
-    "a.ucs")
-   ((eq '=big5 feature-name)
-    "a.big5")
-   ((eq '==>ucs@bucs feature-name)
-    "bucs")
-   (t
-    (mapconcat (lambda (c)
-                (cond
-                 ((eq c ?@)
-                  "_")
-                 ((eq c ?+)
-                  "._.")
-                 ((eq c ?=)
-                  ".:.")
-                 ((eq c ?|)
-                  "._cmp_.")
-                 (t
-                  (char-to-string c))))
-              (www-uri-encode-feature-name feature-name)
-              ""))))
-
 (defun chise-turtle-uri-encode-feature-name (feature-name)
   (cond
    ((eq '->subsumptive feature-name)
     ":subsume")
+   ((eq '<-subsumptive feature-name)
+    ":subsumed-in")
+   ((eq '->denotational feature-name)
+    ":unify")
    ((eq '<-denotational feature-name)
-    ":denotation-of")
+    ":unified-by")
+   ((eq '->formed feature-name)
+    ":formed")
    ((eq '<-formed feature-name)
     ":form-of")
+   ((eq '->same feature-name)
+    "ideo:same")
    ((eq '<-same feature-name)
     "ideo:same-as")
+   ((eq '->simplified feature-name)
+    "ideo:simplified-form")
    ((eq '<-simplified feature-name)
     "ideo:simplified-form-of")
+   ((eq '->vulgar feature-name)
+    "ideo:vulgar-form")
    ((eq '<-vulgar feature-name)
     "ideo:vulgar-form-of")
+   ((eq '->wrong feature-name)
+    "ideo:wrong-form")
    ((eq '<-wrong feature-name)
     "ideo:wrong-form-of")
+   ((eq '->original feature-name)
+    "ideo:original-form")
    ((eq '<-original feature-name)
     "ideo:original-form-of")
+   ((eq '->ancient feature-name)
+    "ideo:ancient-form")
    ((eq '<-ancient feature-name)
     "ideo:ancient-form-of")
+   ((eq '->Small-Seal feature-name)
+    "ideo:Small-Seal-form")
    ((eq '<-Small-Seal feature-name)
-    "ideo:Small-Seal-of")
-   ((eq '<-interchangeable feature-name)
-    "ideo:interchangeable-form-of")
+    "ideo:Small-Seal-form-of")
+   ((eq '->Oracle-Bones feature-name)
+    "ideo:Oracle-Bone-character")
+   ((eq '<-Oracle-Bones feature-name)
+    "ideo:Oracle-Bone-character-of")
    ((eq '->interchangeable feature-name)
+    "ideo:interchangeable-form")
+   ((eq '<-interchangeable feature-name)
     "ideo:interchangeable")
    ((eq '->mistakable feature-name)
-    "ideo:mistakable")
+    "ideo:mistakable-character")
+   ((eq '<-mistakable feature-name)
+    "ideo:mistakable-character-of")
    ((eq 'hanyu-dazidian feature-name)
     "ideo:hanyu-dazidian")
    ((eq '*note feature-name)
     "rdfs:comment")
+   ((eq '*references feature-name)
+    ":reference")
+   ((eq '*instance feature-name)
+    ":instance")
+   ((eq '*source-file feature-name)
+    ":source-file")
    (t
     (concat ":" (chise-turtle-uri-encode-ccs-name feature-name)))))
 
-(defun chise-turtle-format-ccs-code-point (ccs code-point)
-  (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs)))
-    (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist)
-      (setq chise-turtle-ccs-prefix-alist
-           (cons (cons ccs-uri ccs)
-                 chise-turtle-ccs-prefix-alist)))
-    (format "%s:%s"
-           ccs-uri
-           (format (charset-code-point-format-spec ccs)
-                   code-point))))
-
-(defun chise-turtle-encode-char (object)
-  (let (spec cell dest
-       ccs ret ret2)
-    (if (setq ret (encode-char object '=ucs))
-       (chise-turtle-format-ccs-code-point '=ucs ret)
-      (setq spec (char-attribute-alist object))
-      (while (and spec
-                 (setq cell (pop spec)))
-       (if (and (find-charset (car cell))
-                (setq ret (cdr cell)))
-           (setq dest (cons cell dest))))
-      (setq ret (car (sort dest (lambda (a b)
-                                 (char-attribute-name< (car a)(car b)))))
-           ccs (car ret)
-           ret (cdr ret))
-      (cond (ret
-            (chise-turtle-format-ccs-code-point ccs ret)
-            )
-           ((and (setq ccs (car (split-char object)))
-                 (setq ret (encode-char object ccs)))
-            (chise-turtle-format-ccs-code-point ccs ret)
-            )
-           ((setq ret (get-char-attribute object 'ideographic-combination))
-            (format "ideocomb:%s"
-                    (mapconcat (lambda (cell)
-                                 (cond ((characterp cell)
-                                        (char-to-string cell)
-                                        )
-                                       ((setq ret2 (find-char cell))
-                                        (char-to-string ret2)
-                                        )
-                                       (t
-                                        (format "%S" cell)
-                                        )))
-                               ret ""))
-            )
-           (t
-            (format (if est-hide-cgi-mode
-                        "system-char-id=0x%X"
-                      "system-char-id:0x%X")
-                    (encode-char object 'system-char-id))
-            )))))
-
 (defun concord-turtle-encode-object (obj)
   (cond ((characterp obj)
         (chise-turtle-encode-char obj)
 
 (defun concord-turtle-insert-relation-feature (char name value line-breaking
                                                    ccss readable)
-  (insert (format "    %s%s        "
-                 (chise-turtle-uri-encode-feature-name name)
-                 line-breaking))
+  (if (and (consp value)
+          (cdr value))
+      (insert (format "    %s%s        "
+                     (chise-turtle-uri-encode-feature-name name)
+                     line-breaking))
+    (insert (format "    %-20s "
+                   (chise-turtle-uri-encode-feature-name name))))
   (concord-turtle-insert-relations value readable)
   (insert " ;")
   )
 
 (defun concord-turtle-insert-metadata (name value)
-  (let (col indent ret)
+  (let (col indent)
     (insert (format "%-7s " name))
     (cond
      ((or (eq name :sources)
                        (chise-turtle-uri-encode-ccs-name source))))
       nil)
      ((eq name :references)
-      (setq ret (car value))
-      (setq ret (plist-get (nth 1 ret) :ref))
-      (setq col (current-column))
-      (setq indent (make-string col ?\ ))
-      (insert (format "<%s>" ret))
-      (dolist (refspec (cdr value))
-       (setq ret (plist-get (nth 1 refspec) :ref))
-       (insert (format " ,\n%s<%s>" indent ret)))
+      (concord-turtle-insert-references value)
       nil)
      (t
       (insert (format "%S" value))
                                                 al
                                                 nil 'for-sub-node)
            (setq separator lbs))
-       (setq ret (prin1-to-string cell))
+       (setq ret (if (concord-object-p cell)
+                     (concord-turtle-encode-object cell)
+                   (prin1-to-string cell)))
        (if separator
            (if (< (+ (current-column)
                      (length ret)
     (insert " ;")
     'with-separator))
 
+(defun concord-turtle-insert-references (value &optional readable)
+  (let (ret col indent)
+    (setq ret (car value))
+    (setq ret (plist-get (nth 1 ret) :ref))
+    (setq col (current-column))
+    (setq indent (make-string col ?\ ))
+    (insert (format "<%s>" ret))
+    (dolist (refspec (cdr value))
+      (setq ret (plist-get (nth 1 refspec) :ref))
+      (insert (format " ,\n%s<%s>" indent ret)))))
+
 (defun concord-turtle-insert-object (cell &optional readable)
   (if (integerp cell)
       (setq cell (decode-char '=ucs cell)))
        ((eq feature-name-base '=decomposition)
         (concord-turtle-insert-decomposition value readable)
         )
+       ((eq feature-name-base '*references)
+        (concord-turtle-insert-references value readable)
+        )
        ((or (eq feature-name-base 'ideographic-combination)
+            (eq feature-name-base '*instance)
+            (eq feature-name-base 'abstract-glyph)
             (eq feature-name-base '<-formed)
             (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name feature-name-base)))
         (concord-turtle-insert-relations value readable)
                   (insert "rdf:value ")
                   (concord-turtle-insert-target-value mdval feature-name-base)
                   )
+                 ((eq mdname :note)
+                  (insert "rdfs:comment ")
+                  (insert (format "%S" mdval))
+                  )
                  (t
                   (concord-turtle-insert-metadata mdname mdval)))))
     (if last-sep
                                            ccss readable)
       (setq obj-spec (delete feature-pair obj-spec))
       )
-    (when (and (setq feature-pair (assq '<-denotational@component obj-spec))
-              (setq value (cdr feature-pair)))
-      (insert line-separator)
-      (concord-turtle-insert-relation-feature
-       object '<-denotational@component value
-       line-breaking
-       ccss readable)
-      (setq obj-spec (delete feature-pair obj-spec))
-      )
+    ;; (when (and (setq feature-pair (assq '<-denotational@component obj-spec))
+    ;;            (setq value (cdr feature-pair)))
+    ;;   (insert line-separator)
+    ;;   (concord-turtle-insert-relation-feature
+    ;;    object '<-denotational@component value
+    ;;    line-breaking
+    ;;    ccss readable)
+    ;;   (setq obj-spec (delete feature-pair obj-spec))
+    ;;   )
     (when (and (setq feature-pair (assq 'name obj-spec))
               (setq value (cdr feature-pair)))
       (insert (format "%s    " line-separator))
          (insert (format "%s    ideo:total-strokes     [ "
                          line-separator))
          (setq col (current-column))
+         (setq indent (make-string col ?\ ))
          (insert (format ":context domain:%-7s ;\n%srdf:value %S"
                          (chise-turtle-uri-encode-ccs-name domain)
-                         (make-string col ?\ )
+                         indent
                          value))
          (setq obj-spec (delete feature-pair obj-spec))
          (setq skey (intern (format "%s*sources" key)))
          (when (and (setq feature-pair (assq skey obj-spec))
                     (setq value (cdr feature-pair)))
-            (insert (format " ;\n%s" indent))
+           (insert (format " ;\n%s" indent))
             (concord-turtle-insert-metadata :sources value)
             ;; (insert (format " ;\n%s:sources (" indent))
-            ;; (setq col (current-column))
-            ;; (setq indent (make-string col ?\ ))
             ;; (insert (format " chisebib:%s" (car value)))
             ;; (dolist (cell (cdr value))
             ;;   (insert (format "\n%s chisebib:%s" indent cell)))
                                (intern (format "=%s" name)))))
                   (setq child-ccs-list (cons ret child-ccs-list)))
               )
-             ((and
-               (not readable)
-               (not (eq name '->subsumptive))
-               (not (eq name '->uppercase))
-               (not (eq name '->lowercase))
-               (not (eq name '->titlecase))
-               (not (eq name '->canonical))
-               (not (eq name '->Bopomofo))
-               (not (eq name '->mistakable))
-               (not (eq name '->ideographic-variants))
-               (or (eq name '<-identical)
-                   (eq name '<-uppercase)
-                   (eq name '<-lowercase)
-                   (eq name '<-titlecase)
-                   (eq name '<-canonical)
-                   (eq name '<-ideographic-variants)
-                   ;; (eq name '<-synonyms)
-                   (string-match "^<-synonyms" (symbol-name name))
-                   (eq name '<-mistakable)
-                   (when (string-match "^->" (symbol-name name))
-                     (cond
-                      ((string-match "^->fullwidth" (symbol-name name))
-                       (not (and (consp value)
-                                 (characterp (car value))
-                                 (encode-char
-                                  (car value) '=ucs 'defined-only)))
-                       )
-                      (t)))
-                   ))
-              )
+              ;; ((and
+              ;;   (not readable)
+              ;;   (not (eq name '->subsumptive))
+              ;;   (not (eq name '->uppercase))
+              ;;   (not (eq name '->lowercase))
+              ;;   (not (eq name '->titlecase))
+              ;;   (not (eq name '->canonical))
+              ;;   (not (eq name '->Bopomofo))
+              ;;   (not (eq name '->mistakable))
+              ;;   (not (eq name '->ideographic-variants))
+              ;;   (or (eq name '<-identical)
+              ;;       (eq name '<-uppercase)
+              ;;       (eq name '<-lowercase)
+              ;;       (eq name '<-titlecase)
+              ;;       (eq name '<-canonical)
+              ;;       (eq name '<-ideographic-variants)
+              ;;       ;; (eq name '<-synonyms)
+              ;;       (string-match "^<-synonyms" (symbol-name name))
+              ;;       (eq name '<-mistakable)
+              ;;       (when (string-match "^->" (symbol-name name))
+              ;;         (cond
+              ;;          ((string-match "^->fullwidth" (symbol-name name))
+              ;;           (not (and (consp value)
+              ;;                     (characterp (car value))
+              ;;                     (encode-char
+              ;;                      (car value) '=ucs 'defined-only)))
+              ;;           )
+              ;;          (t)))
+              ;;       ))
+              ;;  )
              ((eq name 'ideographic-structure)
               (insert (isd-turtle-format-char nil nil value (/ column 4)
                                               'isd 'without-head-char))
        (concord-turtle-insert-char-data char nil attributes)))
     ))
 
+;;;###autoload
 (defun char-db-turtle-write-ideograph-radical-char-data (radical file)
   (if (file-directory-p file)
       (let ((name (char-feature (decode-char 'ucs (+ #x2EFF radical))