X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=concord-turtle-dump.el;h=6cf3684376ca479988802f7dbf32853a205acbc1;hb=07165901ce5828675d066bd4f107c936a76aa91c;hp=1a835b19374073f15a465b322ad8ba35cc8ab000;hpb=c785fac99d3dc834dfbcc990b6c41edb6397aa94;p=chise%2Ftomoyo-tools.git diff --git a/concord-turtle-dump.el b/concord-turtle-dump.el index 1a835b1..6cf3684 100644 --- a/concord-turtle-dump.el +++ b/concord-turtle-dump.el @@ -1,6 +1,6 @@ ;;; concord-turtle-dump.el --- Character Database utility -*- coding: utf-8-er; -*- -;; Copyright (C) 2017,2018 MORIOKA Tomohiko. +;; Copyright (C) 2017,2018,2019 MORIOKA Tomohiko. ;; Author: MORIOKA Tomohiko ;; Keywords: CHISE, Character Database, RDF, Turtle, ISO/IEC 10646, UCS, Unicode, MULE. @@ -25,7 +25,7 @@ ;;; Code: (require 'char-db-util) -(require 'cwiki-common) +(require 'chiset-common) (require 'isd-turtle) (require 'ideograph-util) @@ -175,8 +175,7 @@ ===daikanwa/ho )) -(defvar chise-turtle-ccs-prefix-alist nil) - +(defvar chise-turtle-feature-domains) (setq chise-turtle-feature-domains (append char-db-feature-domains (let (dest domain) @@ -190,148 +189,73 @@ (setq dest (cons domain dest))))) (sort dest #'string<)))) -(defun charset-code-point-format-spec (ccs) - (cond ((memq ccs '(=ucs)) - "0x%04X") - (t - (let ((ccs-name (symbol-name ccs))) - (cond - ((string-match - "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)" - ccs-name) - "%04d") - ((string-match - "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)" - ccs-name) - "%05d") - ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name) - "%06d") - ((string-match "hanyo-denshi/tk" ccs-name) - "%08d") - (t - "0x%X")))))) - -(defun chise-turtle-uri-decode-feature-name (uri-feature) - (cond ((string= "a.ucs" uri-feature) - '=ucs) - ((string= "a.big5" uri-feature) - '=big5) - (t - (www-uri-decode-feature-name uri-feature)))) - -(defun chise-turtle-uri-encode-ccs-name (feature-name) - (cond - ((eq '=ucs feature-name) - "a.ucs") - ((eq '=big5 feature-name) - "a.big5") - ((eq '==>ucs@bucs feature-name) - "bucs") - (t - (mapconcat (lambda (c) - (cond - ((eq c ?@) - "_") - ((eq c ?+) - "._.") - ((eq c ?=) - ".:.") - ((eq c ?|) - "._cmp_.") - (t - (char-to-string c)))) - (www-uri-encode-feature-name feature-name) - "")))) - (defun chise-turtle-uri-encode-feature-name (feature-name) (cond ((eq '->subsumptive feature-name) ":subsume") + ((eq '<-subsumptive feature-name) + ":subsumed-in") + ((eq '->denotational feature-name) + ":unify") ((eq '<-denotational feature-name) - ":denotation-of") + ":unified-by") + ((eq '->formed feature-name) + ":formed") ((eq '<-formed feature-name) ":form-of") + ((eq '->same feature-name) + "ideo:same") ((eq '<-same feature-name) "ideo:same-as") + ((eq '->simplified feature-name) + "ideo:simplified-form") ((eq '<-simplified feature-name) "ideo:simplified-form-of") + ((eq '->vulgar feature-name) + "ideo:vulgar-form") ((eq '<-vulgar feature-name) "ideo:vulgar-form-of") + ((eq '->wrong feature-name) + "ideo:wrong-form") ((eq '<-wrong feature-name) "ideo:wrong-form-of") + ((eq '->original feature-name) + "ideo:original-form") ((eq '<-original feature-name) "ideo:original-form-of") + ((eq '->ancient feature-name) + "ideo:ancient-form") ((eq '<-ancient feature-name) "ideo:ancient-form-of") + ((eq '->Small-Seal feature-name) + "ideo:Small-Seal-form") ((eq '<-Small-Seal feature-name) - "ideo:Small-Seal-of") - ((eq '<-interchangeable feature-name) - "ideo:interchangeable-form-of") + "ideo:Small-Seal-form-of") + ((eq '->Oracle-Bones feature-name) + "ideo:Oracle-Bone-character") + ((eq '<-Oracle-Bones feature-name) + "ideo:Oracle-Bone-character-of") ((eq '->interchangeable feature-name) + "ideo:interchangeable-form") + ((eq '<-interchangeable feature-name) "ideo:interchangeable") ((eq '->mistakable feature-name) - "ideo:mistakable") + "ideo:mistakable-character") + ((eq '<-mistakable feature-name) + "ideo:mistakable-character-of") ((eq 'hanyu-dazidian feature-name) "ideo:hanyu-dazidian") ((eq '*note feature-name) "rdfs:comment") + ((eq '*references feature-name) + ":reference") + ((eq '*instance feature-name) + ":instance") + ((eq '*source-file feature-name) + ":source-file") (t (concat ":" (chise-turtle-uri-encode-ccs-name feature-name))))) -(defun chise-turtle-format-ccs-code-point (ccs code-point) - (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs))) - (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist) - (setq chise-turtle-ccs-prefix-alist - (cons (cons ccs-uri ccs) - chise-turtle-ccs-prefix-alist))) - (format "%s:%s" - ccs-uri - (format (charset-code-point-format-spec ccs) - code-point)))) - -(defun chise-turtle-encode-char (object) - (let (spec cell dest - ccs ret ret2) - (if (setq ret (encode-char object '=ucs)) - (chise-turtle-format-ccs-code-point '=ucs ret) - (setq spec (char-attribute-alist object)) - (while (and spec - (setq cell (pop spec))) - (if (and (find-charset (car cell)) - (setq ret (cdr cell))) - (setq dest (cons cell dest)))) - (setq ret (car (sort dest (lambda (a b) - (char-attribute-name< (car a)(car b))))) - ccs (car ret) - ret (cdr ret)) - (cond (ret - (chise-turtle-format-ccs-code-point ccs ret) - ) - ((and (setq ccs (car (split-char object))) - (setq ret (encode-char object ccs))) - (chise-turtle-format-ccs-code-point ccs ret) - ) - ((setq ret (get-char-attribute object 'ideographic-combination)) - (format "ideocomb:%s" - (mapconcat (lambda (cell) - (cond ((characterp cell) - (char-to-string cell) - ) - ((setq ret2 (find-char cell)) - (char-to-string ret2) - ) - (t - (format "%S" cell) - ))) - ret "")) - ) - (t - (format (if est-hide-cgi-mode - "system-char-id=0x%X" - "system-char-id:0x%X") - (encode-char object 'system-char-id)) - ))))) - (defun concord-turtle-encode-object (obj) (cond ((characterp obj) (chise-turtle-encode-char obj) @@ -518,15 +442,19 @@ (defun concord-turtle-insert-relation-feature (char name value line-breaking ccss readable) - (insert (format " %s%s " - (chise-turtle-uri-encode-feature-name name) - line-breaking)) + (if (and (consp value) + (cdr value)) + (insert (format " %s%s " + (chise-turtle-uri-encode-feature-name name) + line-breaking)) + (insert (format " %-20s " + (chise-turtle-uri-encode-feature-name name)))) (concord-turtle-insert-relations value readable) (insert " ;") ) (defun concord-turtle-insert-metadata (name value) - (let (col indent ret) + (let (col indent) (insert (format "%-7s " name)) (cond ((or (eq name :sources) @@ -540,14 +468,7 @@ (chise-turtle-uri-encode-ccs-name source)))) nil) ((eq name :references) - (setq ret (car value)) - (setq ret (plist-get (nth 1 ret) :ref)) - (setq col (current-column)) - (setq indent (make-string col ?\ )) - (insert (format "<%s>" ret)) - (dolist (refspec (cdr value)) - (setq ret (plist-get (nth 1 refspec) :ref)) - (insert (format " ,\n%s<%s>" indent ret))) + (concord-turtle-insert-references value) nil) (t (insert (format "%S" value)) @@ -588,7 +509,9 @@ al nil 'for-sub-node) (setq separator lbs)) - (setq ret (prin1-to-string cell)) + (setq ret (if (concord-object-p cell) + (concord-turtle-encode-object cell) + (prin1-to-string cell))) (if separator (if (< (+ (current-column) (length ret) @@ -648,6 +571,17 @@ (insert " ;") 'with-separator)) +(defun concord-turtle-insert-references (value &optional readable) + (let (ret col indent) + (setq ret (car value)) + (setq ret (plist-get (nth 1 ret) :ref)) + (setq col (current-column)) + (setq indent (make-string col ?\ )) + (insert (format "<%s>" ret)) + (dolist (refspec (cdr value)) + (setq ret (plist-get (nth 1 refspec) :ref)) + (insert (format " ,\n%s<%s>" indent ret))))) + (defun concord-turtle-insert-object (cell &optional readable) (if (integerp cell) (setq cell (decode-char '=ucs cell))) @@ -715,7 +649,12 @@ ((eq feature-name-base '=decomposition) (concord-turtle-insert-decomposition value readable) ) + ((eq feature-name-base '*references) + (concord-turtle-insert-references value readable) + ) ((or (eq feature-name-base 'ideographic-combination) + (eq feature-name-base '*instance) + (eq feature-name-base 'abstract-glyph) (eq feature-name-base '<-formed) (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name feature-name-base))) (concord-turtle-insert-relations value readable) @@ -787,6 +726,10 @@ (insert "rdf:value ") (concord-turtle-insert-target-value mdval feature-name-base) ) + ((eq mdname :note) + (insert "rdfs:comment ") + (insert (format "%S" mdval)) + ) (t (concord-turtle-insert-metadata mdname mdval))))) (if last-sep @@ -864,15 +807,15 @@ ccss readable) (setq obj-spec (delete feature-pair obj-spec)) ) - (when (and (setq feature-pair (assq '<-denotational@component obj-spec)) - (setq value (cdr feature-pair))) - (insert line-separator) - (concord-turtle-insert-relation-feature - object '<-denotational@component value - line-breaking - ccss readable) - (setq obj-spec (delete feature-pair obj-spec)) - ) + ;; (when (and (setq feature-pair (assq '<-denotational@component obj-spec)) + ;; (setq value (cdr feature-pair))) + ;; (insert line-separator) + ;; (concord-turtle-insert-relation-feature + ;; object '<-denotational@component value + ;; line-breaking + ;; ccss readable) + ;; (setq obj-spec (delete feature-pair obj-spec)) + ;; ) (when (and (setq feature-pair (assq 'name obj-spec)) (setq value (cdr feature-pair))) (insert (format "%s " line-separator)) @@ -1090,19 +1033,18 @@ (insert (format "%s ideo:total-strokes [ " line-separator)) (setq col (current-column)) + (setq indent (make-string col ?\ )) (insert (format ":context domain:%-7s ;\n%srdf:value %S" (chise-turtle-uri-encode-ccs-name domain) - (make-string col ?\ ) + indent value)) (setq obj-spec (delete feature-pair obj-spec)) (setq skey (intern (format "%s*sources" key))) (when (and (setq feature-pair (assq skey obj-spec)) (setq value (cdr feature-pair))) - (insert (format " ;\n%s" indent)) + (insert (format " ;\n%s" indent)) (concord-turtle-insert-metadata :sources value) ;; (insert (format " ;\n%s:sources (" indent)) - ;; (setq col (current-column)) - ;; (setq indent (make-string col ?\ )) ;; (insert (format " chisebib:%s" (car value))) ;; (dolist (cell (cdr value)) ;; (insert (format "\n%s chisebib:%s" indent cell))) @@ -1174,36 +1116,36 @@ (intern (format "=%s" name))))) (setq child-ccs-list (cons ret child-ccs-list))) ) - ((and - (not readable) - (not (eq name '->subsumptive)) - (not (eq name '->uppercase)) - (not (eq name '->lowercase)) - (not (eq name '->titlecase)) - (not (eq name '->canonical)) - (not (eq name '->Bopomofo)) - (not (eq name '->mistakable)) - (not (eq name '->ideographic-variants)) - (or (eq name '<-identical) - (eq name '<-uppercase) - (eq name '<-lowercase) - (eq name '<-titlecase) - (eq name '<-canonical) - (eq name '<-ideographic-variants) - ;; (eq name '<-synonyms) - (string-match "^<-synonyms" (symbol-name name)) - (eq name '<-mistakable) - (when (string-match "^->" (symbol-name name)) - (cond - ((string-match "^->fullwidth" (symbol-name name)) - (not (and (consp value) - (characterp (car value)) - (encode-char - (car value) '=ucs 'defined-only))) - ) - (t))) - )) - ) + ;; ((and + ;; (not readable) + ;; (not (eq name '->subsumptive)) + ;; (not (eq name '->uppercase)) + ;; (not (eq name '->lowercase)) + ;; (not (eq name '->titlecase)) + ;; (not (eq name '->canonical)) + ;; (not (eq name '->Bopomofo)) + ;; (not (eq name '->mistakable)) + ;; (not (eq name '->ideographic-variants)) + ;; (or (eq name '<-identical) + ;; (eq name '<-uppercase) + ;; (eq name '<-lowercase) + ;; (eq name '<-titlecase) + ;; (eq name '<-canonical) + ;; (eq name '<-ideographic-variants) + ;; ;; (eq name '<-synonyms) + ;; (string-match "^<-synonyms" (symbol-name name)) + ;; (eq name '<-mistakable) + ;; (when (string-match "^->" (symbol-name name)) + ;; (cond + ;; ((string-match "^->fullwidth" (symbol-name name)) + ;; (not (and (consp value) + ;; (characterp (car value)) + ;; (encode-char + ;; (car value) '=ucs 'defined-only))) + ;; ) + ;; (t))) + ;; )) + ;; ) ((eq name 'ideographic-structure) (insert (isd-turtle-format-char nil nil value (/ column 4) 'isd 'without-head-char)) @@ -1320,6 +1262,7 @@ (concord-turtle-insert-char-data char nil attributes))) )) +;;;###autoload (defun char-db-turtle-write-ideograph-radical-char-data (radical file) (if (file-directory-p file) (let ((name (char-feature (decode-char 'ucs (+ #x2EFF radical))