;;; concord-turtle-dump.el --- Character Database utility -*- coding: utf-8-er; -*-
-;; Copyright (C) 2017,2018 MORIOKA Tomohiko.
+;; Copyright (C) 2017,2018,2019 MORIOKA Tomohiko.
;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
;; Keywords: CHISE, Character Database, RDF, Turtle, ISO/IEC 10646, UCS, Unicode, MULE.
;;; Code:
(require 'char-db-util)
-(require 'cwiki-common)
+(require 'chiset-common)
(require 'isd-turtle)
(require 'ideograph-util)
===daikanwa/ho
))
-(defvar chise-turtle-ccs-prefix-alist nil)
-
+(defvar chise-turtle-feature-domains)
(setq chise-turtle-feature-domains
(append char-db-feature-domains
(let (dest domain)
(setq dest (cons domain dest)))))
(sort dest #'string<))))
-(defun charset-code-point-format-spec (ccs)
- (cond ((memq ccs '(=ucs))
- "0x%04X")
- (t
- (let ((ccs-name (symbol-name ccs)))
- (cond
- ((string-match
- "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)"
- ccs-name)
- "%04d")
- ((string-match
- "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)"
- ccs-name)
- "%05d")
- ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name)
- "%06d")
- ((string-match "hanyo-denshi/tk" ccs-name)
- "%08d")
- (t
- "0x%X"))))))
-
-(defun chise-turtle-uri-decode-feature-name (uri-feature)
- (cond ((string= "a.ucs" uri-feature)
- '=ucs)
- ((string= "a.big5" uri-feature)
- '=big5)
- (t
- (www-uri-decode-feature-name uri-feature))))
-
-(defun chise-turtle-uri-encode-ccs-name (feature-name)
- (cond
- ((eq '=ucs feature-name)
- "a.ucs")
- ((eq '=big5 feature-name)
- "a.big5")
- ((eq '==>ucs@bucs feature-name)
- "bucs")
- (t
- (mapconcat (lambda (c)
- (cond
- ((eq c ?@)
- "_")
- ((eq c ?+)
- "._.")
- ((eq c ?=)
- ".:.")
- ((eq c ?|)
- "._cmp_.")
- (t
- (char-to-string c))))
- (www-uri-encode-feature-name feature-name)
- ""))))
-
(defun chise-turtle-uri-encode-feature-name (feature-name)
(cond
((eq '->subsumptive feature-name)
":subsume")
+ ((eq '<-subsumptive feature-name)
+ ":subsumed-in")
+ ((eq '->denotational feature-name)
+ ":unify")
((eq '<-denotational feature-name)
- ":denotation-of")
+ ":unified-by")
+ ((eq '->formed feature-name)
+ ":formed")
((eq '<-formed feature-name)
":form-of")
+ ((eq '->same feature-name)
+ "ideo:same")
((eq '<-same feature-name)
"ideo:same-as")
+ ((eq '->simplified feature-name)
+ "ideo:simplified-form")
((eq '<-simplified feature-name)
"ideo:simplified-form-of")
+ ((eq '->vulgar feature-name)
+ "ideo:vulgar-form")
((eq '<-vulgar feature-name)
"ideo:vulgar-form-of")
+ ((eq '->wrong feature-name)
+ "ideo:wrong-form")
((eq '<-wrong feature-name)
"ideo:wrong-form-of")
+ ((eq '->original feature-name)
+ "ideo:original-form")
((eq '<-original feature-name)
"ideo:original-form-of")
+ ((eq '->ancient feature-name)
+ "ideo:ancient-form")
((eq '<-ancient feature-name)
"ideo:ancient-form-of")
+ ((eq '->Small-Seal feature-name)
+ "ideo:Small-Seal-form")
((eq '<-Small-Seal feature-name)
- "ideo:Small-Seal-of")
- ((eq '<-interchangeable feature-name)
- "ideo:interchangeable-form-of")
+ "ideo:Small-Seal-form-of")
+ ((eq '->Oracle-Bones feature-name)
+ "ideo:Oracle-Bone-character")
+ ((eq '<-Oracle-Bones feature-name)
+ "ideo:Oracle-Bone-character-of")
((eq '->interchangeable feature-name)
+ "ideo:interchangeable-form")
+ ((eq '<-interchangeable feature-name)
"ideo:interchangeable")
((eq '->mistakable feature-name)
- "ideo:mistakable")
+ "ideo:mistakable-character")
+ ((eq '<-mistakable feature-name)
+ "ideo:mistakable-character-of")
((eq 'hanyu-dazidian feature-name)
"ideo:hanyu-dazidian")
((eq '*note feature-name)
"rdfs:comment")
+ ((eq '*references feature-name)
+ ":reference")
+ ((eq '*instance feature-name)
+ ":instance")
+ ((eq '*source-file feature-name)
+ ":source-file")
(t
(concat ":" (chise-turtle-uri-encode-ccs-name feature-name)))))
-(defun chise-turtle-format-ccs-code-point (ccs code-point)
- (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs)))
- (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist)
- (setq chise-turtle-ccs-prefix-alist
- (cons (cons ccs-uri ccs)
- chise-turtle-ccs-prefix-alist)))
- (format "%s:%s"
- ccs-uri
- (format (charset-code-point-format-spec ccs)
- code-point))))
-
-(defun chise-turtle-encode-char (object)
- (let (spec cell dest
- ccs ret ret2)
- (if (setq ret (encode-char object '=ucs))
- (chise-turtle-format-ccs-code-point '=ucs ret)
- (setq spec (char-attribute-alist object))
- (while (and spec
- (setq cell (pop spec)))
- (if (and (find-charset (car cell))
- (setq ret (cdr cell)))
- (setq dest (cons cell dest))))
- (setq ret (car (sort dest (lambda (a b)
- (char-attribute-name< (car a)(car b)))))
- ccs (car ret)
- ret (cdr ret))
- (cond (ret
- (chise-turtle-format-ccs-code-point ccs ret)
- )
- ((and (setq ccs (car (split-char object)))
- (setq ret (encode-char object ccs)))
- (chise-turtle-format-ccs-code-point ccs ret)
- )
- ((setq ret (get-char-attribute object 'ideographic-combination))
- (format "ideocomb:%s"
- (mapconcat (lambda (cell)
- (cond ((characterp cell)
- (char-to-string cell)
- )
- ((setq ret2 (find-char cell))
- (char-to-string ret2)
- )
- (t
- (format "%S" cell)
- )))
- ret ""))
- )
- (t
- (format (if est-hide-cgi-mode
- "system-char-id=0x%X"
- "system-char-id:0x%X")
- (encode-char object 'system-char-id))
- )))))
-
(defun concord-turtle-encode-object (obj)
(cond ((characterp obj)
(chise-turtle-encode-char obj)
(defun concord-turtle-insert-relation-feature (char name value line-breaking
ccss readable)
- (insert (format " %s%s "
- (chise-turtle-uri-encode-feature-name name)
- line-breaking))
+ (if (and (consp value)
+ (cdr value))
+ (insert (format " %s%s "
+ (chise-turtle-uri-encode-feature-name name)
+ line-breaking))
+ (insert (format " %-20s "
+ (chise-turtle-uri-encode-feature-name name))))
(concord-turtle-insert-relations value readable)
(insert " ;")
)
(defun concord-turtle-insert-metadata (name value)
- (let (col indent ret)
+ (let (col indent)
(insert (format "%-7s " name))
(cond
((or (eq name :sources)
(chise-turtle-uri-encode-ccs-name source))))
nil)
((eq name :references)
- (setq ret (car value))
- (setq ret (plist-get (nth 1 ret) :ref))
- (setq col (current-column))
- (setq indent (make-string col ?\ ))
- (insert (format "<%s>" ret))
- (dolist (refspec (cdr value))
- (setq ret (plist-get (nth 1 refspec) :ref))
- (insert (format " ,\n%s<%s>" indent ret)))
+ (concord-turtle-insert-references value)
nil)
(t
(insert (format "%S" value))
al
nil 'for-sub-node)
(setq separator lbs))
- (setq ret (prin1-to-string cell))
+ (setq ret (if (concord-object-p cell)
+ (concord-turtle-encode-object cell)
+ (prin1-to-string cell)))
(if separator
(if (< (+ (current-column)
(length ret)
(insert " ;")
'with-separator))
+(defun concord-turtle-insert-references (value &optional readable)
+ (let (ret col indent)
+ (setq ret (car value))
+ (setq ret (plist-get (nth 1 ret) :ref))
+ (setq col (current-column))
+ (setq indent (make-string col ?\ ))
+ (insert (format "<%s>" ret))
+ (dolist (refspec (cdr value))
+ (setq ret (plist-get (nth 1 refspec) :ref))
+ (insert (format " ,\n%s<%s>" indent ret)))))
+
(defun concord-turtle-insert-object (cell &optional readable)
(if (integerp cell)
(setq cell (decode-char '=ucs cell)))
((eq feature-name-base '=decomposition)
(concord-turtle-insert-decomposition value readable)
)
+ ((eq feature-name-base '*references)
+ (concord-turtle-insert-references value readable)
+ )
((or (eq feature-name-base 'ideographic-combination)
+ (eq feature-name-base '*instance)
+ (eq feature-name-base 'abstract-glyph)
(eq feature-name-base '<-formed)
(string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name feature-name-base)))
(concord-turtle-insert-relations value readable)
(insert "rdf:value ")
(concord-turtle-insert-target-value mdval feature-name-base)
)
+ ((eq mdname :note)
+ (insert "rdfs:comment ")
+ (insert (format "%S" mdval))
+ )
(t
(concord-turtle-insert-metadata mdname mdval)))))
(if last-sep
ccss readable)
(setq obj-spec (delete feature-pair obj-spec))
)
- (when (and (setq feature-pair (assq '<-denotational@component obj-spec))
- (setq value (cdr feature-pair)))
- (insert line-separator)
- (concord-turtle-insert-relation-feature
- object '<-denotational@component value
- line-breaking
- ccss readable)
- (setq obj-spec (delete feature-pair obj-spec))
- )
+ ;; (when (and (setq feature-pair (assq '<-denotational@component obj-spec))
+ ;; (setq value (cdr feature-pair)))
+ ;; (insert line-separator)
+ ;; (concord-turtle-insert-relation-feature
+ ;; object '<-denotational@component value
+ ;; line-breaking
+ ;; ccss readable)
+ ;; (setq obj-spec (delete feature-pair obj-spec))
+ ;; )
(when (and (setq feature-pair (assq 'name obj-spec))
(setq value (cdr feature-pair)))
(insert (format "%s " line-separator))
(insert (format "%s ideo:total-strokes [ "
line-separator))
(setq col (current-column))
+ (setq indent (make-string col ?\ ))
(insert (format ":context domain:%-7s ;\n%srdf:value %S"
(chise-turtle-uri-encode-ccs-name domain)
- (make-string col ?\ )
+ indent
value))
(setq obj-spec (delete feature-pair obj-spec))
(setq skey (intern (format "%s*sources" key)))
(when (and (setq feature-pair (assq skey obj-spec))
(setq value (cdr feature-pair)))
- (insert (format " ;\n%s" indent))
+ (insert (format " ;\n%s" indent))
(concord-turtle-insert-metadata :sources value)
;; (insert (format " ;\n%s:sources (" indent))
- ;; (setq col (current-column))
- ;; (setq indent (make-string col ?\ ))
;; (insert (format " chisebib:%s" (car value)))
;; (dolist (cell (cdr value))
;; (insert (format "\n%s chisebib:%s" indent cell)))
(intern (format "=%s" name)))))
(setq child-ccs-list (cons ret child-ccs-list)))
)
- ((and
- (not readable)
- (not (eq name '->subsumptive))
- (not (eq name '->uppercase))
- (not (eq name '->lowercase))
- (not (eq name '->titlecase))
- (not (eq name '->canonical))
- (not (eq name '->Bopomofo))
- (not (eq name '->mistakable))
- (not (eq name '->ideographic-variants))
- (or (eq name '<-identical)
- (eq name '<-uppercase)
- (eq name '<-lowercase)
- (eq name '<-titlecase)
- (eq name '<-canonical)
- (eq name '<-ideographic-variants)
- ;; (eq name '<-synonyms)
- (string-match "^<-synonyms" (symbol-name name))
- (eq name '<-mistakable)
- (when (string-match "^->" (symbol-name name))
- (cond
- ((string-match "^->fullwidth" (symbol-name name))
- (not (and (consp value)
- (characterp (car value))
- (encode-char
- (car value) '=ucs 'defined-only)))
- )
- (t)))
- ))
- )
+ ;; ((and
+ ;; (not readable)
+ ;; (not (eq name '->subsumptive))
+ ;; (not (eq name '->uppercase))
+ ;; (not (eq name '->lowercase))
+ ;; (not (eq name '->titlecase))
+ ;; (not (eq name '->canonical))
+ ;; (not (eq name '->Bopomofo))
+ ;; (not (eq name '->mistakable))
+ ;; (not (eq name '->ideographic-variants))
+ ;; (or (eq name '<-identical)
+ ;; (eq name '<-uppercase)
+ ;; (eq name '<-lowercase)
+ ;; (eq name '<-titlecase)
+ ;; (eq name '<-canonical)
+ ;; (eq name '<-ideographic-variants)
+ ;; ;; (eq name '<-synonyms)
+ ;; (string-match "^<-synonyms" (symbol-name name))
+ ;; (eq name '<-mistakable)
+ ;; (when (string-match "^->" (symbol-name name))
+ ;; (cond
+ ;; ((string-match "^->fullwidth" (symbol-name name))
+ ;; (not (and (consp value)
+ ;; (characterp (car value))
+ ;; (encode-char
+ ;; (car value) '=ucs 'defined-only)))
+ ;; )
+ ;; (t)))
+ ;; ))
+ ;; )
((eq name 'ideographic-structure)
(insert (isd-turtle-format-char nil nil value (/ column 4)
'isd 'without-head-char))
(concord-turtle-insert-char-data char nil attributes)))
))
+;;;###autoload
(defun char-db-turtle-write-ideograph-radical-char-data (radical file)
(if (file-directory-p file)
(let ((name (char-feature (decode-char 'ucs (+ #x2EFF radical))