From c785fac99d3dc834dfbcc990b6c41edb6397aa94 Mon Sep 17 00:00:00 2001 From: MORIOKA Tomohiko Date: Sat, 15 Dec 2018 11:24:42 +0900 Subject: [PATCH] New files. --- concord-turtle-dump.el | 1355 ++++++++++++++++++++++++++++++++++++++++++++++++ isd-turtle.el | 635 +++++++++++++++++++++++ 2 files changed, 1990 insertions(+) create mode 100644 concord-turtle-dump.el create mode 100644 isd-turtle.el diff --git a/concord-turtle-dump.el b/concord-turtle-dump.el new file mode 100644 index 0000000..1a835b1 --- /dev/null +++ b/concord-turtle-dump.el @@ -0,0 +1,1355 @@ +;;; concord-turtle-dump.el --- Character Database utility -*- coding: utf-8-er; -*- + +;; Copyright (C) 2017,2018 MORIOKA Tomohiko. + +;; Author: MORIOKA Tomohiko +;; Keywords: CHISE, Character Database, RDF, Turtle, ISO/IEC 10646, UCS, Unicode, MULE. + +;; This file is part of CHISET (CHISE/Turtle). + +;; XEmacs CHISE is free software; you can redistribute it and/or +;; modify it under the terms of the GNU General Public License as +;; published by the Free Software Foundation; either version 2, or (at +;; your option) any later version. + +;; XEmacs CHISE is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with XEmacs CHISE; see the file COPYING. If not, write to +;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Code: + +(require 'char-db-util) +(require 'cwiki-common) +(require 'isd-turtle) +(require 'ideograph-util) + +(setq est-coded-charset-priority-list + '(; =ucs + =mj + =adobe-japan1-0 + =adobe-japan1-1 + =adobe-japan1-2 + =adobe-japan1-3 + =adobe-japan1-4 + =adobe-japan1-5 + =adobe-japan1-6 + =ucs@iso + =jis-x0208 =jis-x0208@1990 + =jis-x0213-1 + =jis-x0213-1@2000 =jis-x0213-1@2004 + =jis-x0213-2 + =jis-x0212 + =gt + =hanyo-denshi/ks + =hanyo-denshi/tk + =ucs-itaiji-001 + =ucs-itaiji-002 + =ucs-itaiji-003 + =ucs-itaiji-004 + =ucs-itaiji-005 + =ucs-itaiji-006 + =ucs-itaiji-007 + =ucs-itaiji-008 + =ucs-itaiji-009 + =ucs-itaiji-010 + =ucs-itaiji-011 + =ucs-itaiji-084 + =ucs-var-001 + =ucs-var-002 + =ucs-var-003 + =ucs-var-004 + =ucs-var-005 + =ucs-var-006 + =ucs-var-008 + =ucs-var-010 + =cns11643-1 =cns11643-2 =cns11643-3 + =cns11643-4 =cns11643-5 =cns11643-6 =cns11643-7 + =gb2312 + =big5-cdp + =ks-x1001 + =gt-k + =ucs@unicode + =ucs@JP/hanazono + =gb12345 + =ucs@cns + =ucs@gb + =zinbun-oracle =>zinbun-oracle + =daikanwa + =ruimoku-v6 + =cbeta =jef-china3 + =daikanwa/+2p + =+>ucs@iso =+>ucs@unicode + =+>ucs@jis + =+>ucs@cns + =+>ucs@ks + =+>ucs@jis/1990 + =>mj + =>jis-x0208 =>jis-x0213-1 + =>jis-x0208@1997 + =>ucs@iwds-1 + =>ucs@cognate + =>ucs@component + =>iwds-1 + =>ucs@iso + =>ucs@unicode + =>ucs@jis =>ucs@cns =>ucs@ks + =>gt + =>gt-k + =>>ucs@iso =>>ucs@unicode + =>>ucs@jis =>>ucs@cns =>>ucs@ks + =>>gt-k + =>>hanyo-denshi/ks + ==mj + ==ucs@iso + ==ucs@unicode + ==adobe-japan1-0 + ==adobe-japan1-1 + ==adobe-japan1-2 + ==adobe-japan1-3 + ==adobe-japan1-4 + ==adobe-japan1-5 + ==adobe-japan1-6 + ==ks-x1001 + ==hanyo-denshi/ks + ==hanyo-denshi/tk + ==ucs@jis + ==gt + ==cns11643-1 ==cns11643-2 ==cns11643-3 + ==cns11643-4 ==cns11643-5 ==cns11643-6 ==cns11643-7 + ==jis-x0212 + ==ucs@cns + ==koseki + ==daikanwa + ==gt-k + ==ucs@gb + ==ucs-itaiji-001 + ==ucs-itaiji-002 + ==ucs-itaiji-003 + ==ucs-itaiji-005 + ==ucs-var-002 + ==ucs@JP/hanazono + ==daikanwa/+2p + =>>jis-x0208 =>>jis-x0213-1 =>>jis-x0213-2 + =+>jis-x0208 =+>jis-x0213-1 =+>jis-x0213-2 + =+>hanyo-denshi/jt + =+>jis-x0208@1978 + =>>gt + =+>adobe-japan1 + =>>adobe-japan1 + =jis-x0208@1983 =jis-x0208@1978 + =>ucs-itaiji-001 + =>ucs-itaiji-002 + =>ucs-itaiji-003 + =>ucs-itaiji-004 + =>ucs-itaiji-005 + =>ucs-itaiji-006 + =>ucs-itaiji-007 + ==>ucs@bucs + =big5 + =>cbeta + ===mj + ===ucs@iso + ===ucs@unicode + ===hanyo-denshi/ks + ===ks-x1001 + ===gt + ===gt-k + ===ucs@ks + ===ucs@gb + =shinjigen + =shinjigen@rev + =shinjigen@1ed + =shinjigen/+p@rev + ==shinjigen + ==shinjigen@rev + ==daikanwa/+p + ==shinjigen@1ed + ===daikanwa/+p + =>daikanwa/ho + ===daikanwa/ho + )) + +(defvar chise-turtle-ccs-prefix-alist nil) + +(setq chise-turtle-feature-domains + (append char-db-feature-domains + (let (dest domain) + (dolist (feature (char-attribute-list)) + (setq feature (symbol-name feature)) + (when (string-match + "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)" + feature) + (setq domain (intern (match-string 2 feature))) + (unless (memq domain dest) + (setq dest (cons domain dest))))) + (sort dest #'string<)))) + +(defun charset-code-point-format-spec (ccs) + (cond ((memq ccs '(=ucs)) + "0x%04X") + (t + (let ((ccs-name (symbol-name ccs))) + (cond + ((string-match + "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)" + ccs-name) + "%04d") + ((string-match + "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)" + ccs-name) + "%05d") + ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name) + "%06d") + ((string-match "hanyo-denshi/tk" ccs-name) + "%08d") + (t + "0x%X")))))) + +(defun chise-turtle-uri-decode-feature-name (uri-feature) + (cond ((string= "a.ucs" uri-feature) + '=ucs) + ((string= "a.big5" uri-feature) + '=big5) + (t + (www-uri-decode-feature-name uri-feature)))) + +(defun chise-turtle-uri-encode-ccs-name (feature-name) + (cond + ((eq '=ucs feature-name) + "a.ucs") + ((eq '=big5 feature-name) + "a.big5") + ((eq '==>ucs@bucs feature-name) + "bucs") + (t + (mapconcat (lambda (c) + (cond + ((eq c ?@) + "_") + ((eq c ?+) + "._.") + ((eq c ?=) + ".:.") + ((eq c ?|) + "._cmp_.") + (t + (char-to-string c)))) + (www-uri-encode-feature-name feature-name) + "")))) + +(defun chise-turtle-uri-encode-feature-name (feature-name) + (cond + ((eq '->subsumptive feature-name) + ":subsume") + ((eq '<-denotational feature-name) + ":denotation-of") + ((eq '<-formed feature-name) + ":form-of") + ((eq '<-same feature-name) + "ideo:same-as") + ((eq '<-simplified feature-name) + "ideo:simplified-form-of") + ((eq '<-vulgar feature-name) + "ideo:vulgar-form-of") + ((eq '<-wrong feature-name) + "ideo:wrong-form-of") + ((eq '<-original feature-name) + "ideo:original-form-of") + ((eq '<-ancient feature-name) + "ideo:ancient-form-of") + ((eq '<-Small-Seal feature-name) + "ideo:Small-Seal-of") + ((eq '<-interchangeable feature-name) + "ideo:interchangeable-form-of") + ((eq '->interchangeable feature-name) + "ideo:interchangeable") + ((eq '->mistakable feature-name) + "ideo:mistakable") + ((eq 'hanyu-dazidian feature-name) + "ideo:hanyu-dazidian") + ((eq '*note feature-name) + "rdfs:comment") + (t + (concat ":" (chise-turtle-uri-encode-ccs-name feature-name))))) + +(defun chise-turtle-format-ccs-code-point (ccs code-point) + (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs))) + (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist) + (setq chise-turtle-ccs-prefix-alist + (cons (cons ccs-uri ccs) + chise-turtle-ccs-prefix-alist))) + (format "%s:%s" + ccs-uri + (format (charset-code-point-format-spec ccs) + code-point)))) + +(defun chise-turtle-encode-char (object) + (let (spec cell dest + ccs ret ret2) + (if (setq ret (encode-char object '=ucs)) + (chise-turtle-format-ccs-code-point '=ucs ret) + (setq spec (char-attribute-alist object)) + (while (and spec + (setq cell (pop spec))) + (if (and (find-charset (car cell)) + (setq ret (cdr cell))) + (setq dest (cons cell dest)))) + (setq ret (car (sort dest (lambda (a b) + (char-attribute-name< (car a)(car b))))) + ccs (car ret) + ret (cdr ret)) + (cond (ret + (chise-turtle-format-ccs-code-point ccs ret) + ) + ((and (setq ccs (car (split-char object))) + (setq ret (encode-char object ccs))) + (chise-turtle-format-ccs-code-point ccs ret) + ) + ((setq ret (get-char-attribute object 'ideographic-combination)) + (format "ideocomb:%s" + (mapconcat (lambda (cell) + (cond ((characterp cell) + (char-to-string cell) + ) + ((setq ret2 (find-char cell)) + (char-to-string ret2) + ) + (t + (format "%S" cell) + ))) + ret "")) + ) + (t + (format (if est-hide-cgi-mode + "system-char-id=0x%X" + "system-char-id:0x%X") + (encode-char object 'system-char-id)) + ))))) + +(defun concord-turtle-encode-object (obj) + (cond ((characterp obj) + (chise-turtle-encode-char obj) + ) + ((concord-object-p obj) + (let ((genre (est-object-genre obj)) + (url-object (www-uri-encode-object obj))) + (format "<%s/%s/%s>" + "http://www.chise.org/est/view" + genre + url-object))))) + +(defun chise-split-feature-name (feature-name) + (let (base domain number meta) + (setq feature-name (symbol-name feature-name)) + (if (string-match ".\\*." feature-name) + (setq meta (intern + (format ":%s" (substring feature-name (1- (match-end 0))))) + feature-name (substring feature-name 0 (1+ (match-beginning 0))))) + (if (string-match "\\$_\\([0-9]+\\)$" feature-name) + (setq number (car (read-from-string (match-string 1 feature-name))) + feature-name (substring feature-name 0 (match-beginning 0)))) + (if (string-match "@" feature-name) + (setq domain (car (read-from-string (substring feature-name (match-end 0)))) + base (intern (substring feature-name 0 (match-beginning 0)))) + (setq base (intern feature-name))) + (list base domain number meta))) + +(defun chise-compose-feature-name (base domain number meta) + (let ((name (if domain + (format "%s@%s" base domain) + (symbol-name base)))) + (if number + (setq name (format "%s$_%d" name number))) + (if meta + (setq name (format "%s*%s" name + (substring (symbol-name meta) 1)))) + (intern name))) + +(defvar chise-feature-name-base-metadata-alist nil) + +(defun chise-update-feature-name-base-metadata-alist () + (interactive) + (let (base domain number metadata + bcell dcell ncell ret) + (setq chise-feature-name-base-metadata-alist nil) + (dolist (fname (sort (char-attribute-list) + #'char-attribute-name<)) + (setq ret (chise-split-feature-name fname) + base (car ret) + domain (nth 1 ret) + number (nth 2 ret) + metadata (nth 3 ret)) + (when metadata + (if (setq bcell (assq base chise-feature-name-base-metadata-alist)) + (if (setq dcell (assq domain (cdr bcell))) + (if (setq ncell (assq number (cdr dcell))) + (unless (memq metadata (cdr ncell)) + (setcdr ncell (nconc (cdr ncell) + (list metadata)))) + (setcdr dcell (cons (list number metadata) + (cdr dcell)))) + (setcdr bcell (cons (list domain (list number metadata)) + (cdr bcell)))) + (setq chise-feature-name-base-metadata-alist + (cons (list base (list domain (list number metadata))) + chise-feature-name-base-metadata-alist)) + ))))) + +(chise-update-feature-name-base-metadata-alist) + +(defun chise-get-char-attribute-with-metadata (obj-spec feature-name-base domain) + (let ((feature-pair (assq (chise-compose-feature-name + feature-name-base domain nil nil) + obj-spec)) + value + dcell + base-metadata metadata + ret m i rest dest) + (when feature-pair + (setq value (cdr feature-pair)) + (cond + ((and (setq ret (assq feature-name-base + chise-feature-name-base-metadata-alist)) + (setq dcell (assq domain (cdr ret)))) + (if (setq ret (assq nil (cdr dcell))) + (dolist (bmn (cdr ret)) + (when (setq m (assq (chise-compose-feature-name + feature-name-base domain nil bmn) + obj-spec)) + (setq m (cdr m)) + (setq base-metadata + (list* bmn m base-metadata))))) + (setq i 1 + rest value) + (while (consp rest) + (setq dest + (cons (cond + ((setq ret (assq i (cdr dcell))) + (setq metadata nil) + (dolist (mn (cdr ret)) + (when (setq m (assq (chise-compose-feature-name + feature-name-base domain i mn) + obj-spec)) + (setq m (cdr m)) + (setq metadata (list* mn m metadata)))) + (if metadata + (list* :value (car rest) metadata) + (car rest)) + ) + (t (car rest))) + dest)) + (setq i (1+ i) + rest (cdr rest))) + (list (nconc (nreverse dest) rest) + base-metadata) + ) + (t (list value nil))) + ))) + +(defun chise-split-ccs-name (ccs) + (cond ((eq ccs '=ucs) + '(ucs abstract-character nil) + ) + ((eq ccs '=big5) + '(big5 abstract-character nil) + ) + (t + (setq ccs (symbol-name ccs)) + (let (ret) + (if (string-match "^\\(=[=+>]*\\)\\([^=>@*]+\\)@?" ccs) + (list (intern (match-string 2 ccs)) + (chise-decode-ccs-prefix (match-string 1 ccs)) + (if (string= (setq ret (substring ccs (match-end 0))) "") + nil + (intern ret)))) + )))) + +(defun chise-decode-ccs-prefix (ccs) + (or (cdr (assoc ccs '(("==>" . super-abstract-character) + ("=>" . abstract-character) + ("=+>" . unified-glyph) + ("=" . abstract-glyph) + ("=>>" . detailed-glyph) + ("==" . abstract-glyph-form) + ("===" . glyph-image)))) + 'character)) + +(defun chise-turtle-uri-split-ccs (uri-ccs) + (cond + ((string-match "^a2\\." uri-ccs) + (cons ":super-abstract-character" + (substring uri-ccs (match-end 0))) + ) + ((string-match "^a\\." uri-ccs) + (cons ":abstract-character" + (substring uri-ccs (match-end 0))) + ) + ((string-match "^o\\." uri-ccs) + (cons ":unified-glyph" + (substring uri-ccs (match-end 0))) + ) + ((string-match "^rep\\." uri-ccs) + (cons ":abstract-glyph" + (substring uri-ccs (match-end 0))) + ) + ((string-match "^g\\." uri-ccs) + (cons ":detailed-glyph" + (substring uri-ccs (match-end 0))) + ) + ((string-match "^g2\\." uri-ccs) + (cons ":abstract-glyph-form" + (substring uri-ccs (match-end 0))) + ) + ((string-match "^gi\\." uri-ccs) + (cons ":abstract-glyph-form" + (substring uri-ccs (match-end 0))) + ) + ((string-match "^repi\\." uri-ccs) + (cons ":glyph-image" + (substring uri-ccs (match-end 0))) + ) + (t (cons ":character" uri-ccs)))) + +(defun concord-turtle-insert-relation-feature (char name value line-breaking + ccss readable) + (insert (format " %s%s " + (chise-turtle-uri-encode-feature-name name) + line-breaking)) + (concord-turtle-insert-relations value readable) + (insert " ;") + ) + +(defun concord-turtle-insert-metadata (name value) + (let (col indent ret) + (insert (format "%-7s " name)) + (cond + ((or (eq name :sources) + (eq name :denied)) + (setq col (current-column)) + (setq indent (make-string col ?\ )) + (insert (format "chisebib:%s" + (chise-turtle-uri-encode-ccs-name (car value)))) + (dolist (source (cdr value)) + (insert (format " ,\n%schisebib:%s" indent + (chise-turtle-uri-encode-ccs-name source)))) + nil) + ((eq name :references) + (setq ret (car value)) + (setq ret (plist-get (nth 1 ret) :ref)) + (setq col (current-column)) + (setq indent (make-string col ?\ )) + (insert (format "<%s>" ret)) + (dolist (refspec (cdr value)) + (setq ret (plist-get (nth 1 refspec) :ref)) + (insert (format " ,\n%s<%s>" indent ret))) + nil) + (t + (insert (format "%S" value)) + nil)))) + +(defun concord-turtle-insert-radical (radical-number) + (insert (format " %3d ; # %c" + radical-number + (ideographic-radical radical-number))) + 'with-separator) + +(defun concord-turtle-insert-list (value &optional readable) + (let (lbs separator rest cell al cal key ret) + (insert "( ") + (setq lbs (concat "\n" (make-string (current-column) ?\ )) + separator nil) + (while (consp value) + (setq cell (car value)) + (if (and (consp cell) + (consp (car cell)) + (setq ret (condition-case nil + (find-char cell) + (error nil)))) + (progn + (setq rest cell + al nil + cal nil) + (while rest + (setq key (car (car rest))) + (if (find-charset key) + (setq cal (cons key cal)) + (setq al (cons key al))) + (setq rest (cdr rest))) + (if separator + (insert lbs)) + (concord-turtle-insert-object-features ret + readable + al + nil 'for-sub-node) + (setq separator lbs)) + (setq ret (prin1-to-string cell)) + (if separator + (if (< (+ (current-column) + (length ret) + (length separator)) + 76) + (insert separator) + (insert lbs))) + (insert ret) + (setq separator " ")) + (setq value (cdr value))) + (insert " ) ;") + 'with-separator)) + +(defun concord-turtle-insert-source-list (value &optional readable) + (let (lbs separator rest cell al cal key ret) + (setq lbs (concat " ,\n" (make-string (current-column) ?\ )) + separator nil) + (while (consp value) + (setq cell (car value)) + (if (and (consp cell) + (consp (car cell)) + (setq ret (condition-case nil + (find-char cell) + (error nil)))) + (progn + (setq rest cell + al nil + cal nil) + (while rest + (setq key (car (car rest))) + (if (find-charset key) + (setq cal (cons key cal)) + (setq al (cons key al))) + (setq rest (cdr rest))) + (if separator + (insert lbs)) + (concord-turtle-insert-object-features ret + readable + al + nil 'for-sub-node) + (setq separator lbs)) + (setq ret (prin1-to-string cell)) + (if separator + (if (< (+ (current-column) + (length ret) + (length separator)) + 76) + (insert separator) + (insert lbs))) + (if (string-match "=" ret) + (insert (format "%s:%s" + (substring ret 0 (match-beginning 0)) + (substring ret (match-end 0)))) + (insert (format "chisebib:%s" ret))) + (setq separator " , ")) + (setq value (cdr value))) + (insert " ;") + 'with-separator)) + +(defun concord-turtle-insert-object (cell &optional readable) + (if (integerp cell) + (setq cell (decode-char '=ucs cell))) + (cond + ((characterp cell) + (insert (format "%-20s" (chise-turtle-encode-char cell))) + nil) + ((concord-object-p cell) + (insert (format "%-20s" (concord-turtle-encode-object cell))) + nil) + (t + (concord-turtle-insert-char-ref cell '<-formed) + ))) + +(defun concord-turtle-insert-decomposition (value &optional readable) + (let ((lbs (concat "\n" (make-string (current-column) ?\ ))) + base vs lb) + (if (characterp value) + (setq value (list value))) + (if (setq base (pop value)) + (cond ((setq vs (pop value)) + (insert "[ :base ") + (setq lb (concord-turtle-insert-object base readable)) + (insert " ;") + (insert lbs) + (insert " :vs ") + (setq lb (concord-turtle-insert-object vs readable)) + (insert lbs) + (insert "]") + nil) + (t + (setq lb (concord-turtle-insert-object base readable)) + )) + ))) + +(defun concord-turtle-insert-relations (value &optional readable) + (let ((lbs (concat "\n" (make-string (current-column) ?\ ))) + separator cell) + (if (characterp value) + (setq value (list value))) + (while (consp value) + (setq cell (car value)) + ;; (if (integerp cell) + ;; (setq cell (decode-char '=ucs cell))) + (if separator + (insert separator) + (setq separator (format " ,%s" lbs))) + ;; (cond + ;; ((characterp cell) + ;; (insert (format "%-20s" (chise-turtle-encode-char cell))) + ;; ) + ;; ((concord-object-p cell) + ;; (insert (format "%-20s" (concord-turtle-encode-object cell))) + ;; ) + ;; (t + ;; (concord-turtle-insert-char-ref cell '<-formed))) + (concord-turtle-insert-object cell readable) + (setq value (cdr value))) + nil)) + +(defun concord-turtle-insert-target-value (value feature-name-base &optional readable) + (cond ((eq feature-name-base 'ideographic-radical) + (concord-turtle-insert-radical value) + ) + ((eq feature-name-base '=decomposition) + (concord-turtle-insert-decomposition value readable) + ) + ((or (eq feature-name-base 'ideographic-combination) + (eq feature-name-base '<-formed) + (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name feature-name-base))) + (concord-turtle-insert-relations value readable) + ) + ((eq feature-name-base 'comment) + (insert (format "%S" value)) + nil) + ((eq feature-name-base 'sources) + (concord-turtle-insert-source-list value readable) + ) + ((consp value) + (concord-turtle-insert-list value readable) + ) + ((or (symbolp value) + (char-or-string-p value)) + (insert (format " %-14s" (format "\"%s\"" value))) + nil) + (t + (insert (format " %-14s" value)) + nil))) + +(defun concord-turtle-insert-feature-value (value metadata domain feature-name-base) + (let (indent0 indent rest mdname mdval lb) + (cond + ((or metadata domain) + (setq indent0 (make-string (current-column) ?\ )) + (insert "[ ") + (setq indent (make-string (current-column) ?\ )) + (when domain + (insert (format ":context domain:%-7s ;" + (chise-turtle-uri-encode-ccs-name domain))) + (setq lb t)) + (if lb + (insert (format "\n%s" indent))) + (insert "rdf:value ") + (setq lb (concord-turtle-insert-target-value value feature-name-base)) + (setq rest metadata) + (while rest + (setq mdname (pop rest) + mdval (pop rest)) + (insert (format " ;\n%s" indent)) + (setq lb (concord-turtle-insert-metadata mdname mdval))) + (if lb + (insert (format "\n%s] ;" indent0)) + (insert " ] ;")) + 'with-separator) + (t + (concord-turtle-insert-target-value value feature-name-base) + )))) + +(defun concord-turtle-insert-char-ref (char-ref feature-name-base) + (let (indent0 indent rest mdname mdval lb last-sep) + (setq indent0 (make-string (current-column) ?\ )) + (insert "[ ") + (setq indent (make-string (current-column) ?\ )) + (setq rest char-ref) + (while rest + (setq mdname (pop rest) + mdval (pop rest)) + (if lb + (insert (format "%s\n%s" + (if last-sep + "" + " ;") + indent)) + (setq lb t)) + (setq last-sep + (cond ((eq mdname :value) + (insert "rdf:value ") + (concord-turtle-insert-target-value mdval feature-name-base) + ) + (t + (concord-turtle-insert-metadata mdname mdval))))) + (if last-sep + (insert (format "\n%s]" indent0)) + (insert " ]")) + nil)) + +(defun concord-turtle-insert-object-features (object + &optional readable attributes column + for-sub-node) + (unless column + (setq column (current-column))) + (let ((est-coded-charset-priority-list est-coded-charset-priority-list) + (est-view-url-prefix "http://chise.org/est/view") + (obj-spec (sort (del-alist 'composition + (if (characterp object) + (char-attribute-alist object) + (concord-object-spec object))) + (lambda (a b) + (char-attribute-name< (car a)(car b))))) + feature-pair + id obj-id type domain + name value metadata + name-base name-domain + radical strokes + (line-breaking (concat "\n" (make-string column ?\ ))) + line-separator + ret + skey + dest-ccss ; sources required-features + ccss eq-cpos-list + uri-ccs uri-cpos ccs-base children child-ccs-list col indent lb) + (setq line-separator line-breaking) + (setq id (concord-turtle-encode-object object)) + (insert (format "%s" id)) + (cond + ((characterp object) + (setq obj-id (file-name-nondirectory id)) + (string-match ":" obj-id) + (setq uri-ccs (substring obj-id 0 (match-beginning 0)) + uri-cpos (substring obj-id (match-end 0))) + (setq ret (assoc uri-ccs chise-turtle-ccs-prefix-alist)) + (setq dest-ccss (list (cdr ret))) + (setq ret (chise-split-ccs-name (cdr ret))) + (setq ccs-base (car ret) + type (nth 1 ret) + domain (nth 2 ret)) + (insert (format "%s a chisegg:%s ;" line-separator type)) + (insert (format "%s :%s-of" line-breaking type)) + (if (null domain) + (insert (format " %s:%s ;" + (chise-turtle-uri-encode-ccs-name ccs-base) uri-cpos)) + (insert " [ ") + (setq col (current-column)) + (insert (format ":context domain:%-7s ;\n%srdf:value %5s:%-7s ] ;" + (chise-turtle-uri-encode-ccs-name domain) + (make-string col ?\ ) + (chise-turtle-uri-encode-ccs-name ccs-base) uri-cpos))) + )) + (when (setq feature-pair (assq '<-subsumptive obj-spec)) + (when (or readable (not for-sub-node)) + (when (setq value (cdr feature-pair)) + (insert line-separator) + (concord-turtle-insert-relation-feature object '<-subsumptive value + line-breaking + ccss readable) + )) + (setq obj-spec (delete feature-pair obj-spec)) + ) + (when (and (setq feature-pair (assq '<-denotational obj-spec)) + (setq value (cdr feature-pair))) + (insert line-separator) + (concord-turtle-insert-relation-feature object '<-denotational value + line-breaking + ccss readable) + (setq obj-spec (delete feature-pair obj-spec)) + ) + (when (and (setq feature-pair (assq '<-denotational@component obj-spec)) + (setq value (cdr feature-pair))) + (insert line-separator) + (concord-turtle-insert-relation-feature + object '<-denotational@component value + line-breaking + ccss readable) + (setq obj-spec (delete feature-pair obj-spec)) + ) + (when (and (setq feature-pair (assq 'name obj-spec)) + (setq value (cdr feature-pair))) + (insert (format "%s " line-separator)) + (insert (format + (if (> (+ (current-column) (length value)) 48) + ":name %S ;" + ":name %S ;") + value)) + (setq obj-spec (delete feature-pair obj-spec)) + ) + (when (and (setq feature-pair (assq 'name* obj-spec)) + (setq value (cdr feature-pair))) + (insert (format "%s " line-separator)) + (insert (format + (if (> (+ (current-column) (length value)) 48) + "rdfs:label %S ;" + "rdfs:label %S ;") + value)) + (setq obj-spec (delete feature-pair obj-spec)) + ) + (when (and (setq feature-pair (assq 'script obj-spec)) + (setq value (cdr feature-pair))) + (insert (format "%s :script\t\t ( %s ) ;" + line-separator + (mapconcat (lambda (cell) + (format "script:%s" cell)) + value " "))) + (setq obj-spec (delete feature-pair obj-spec)) + ) + (when (and (setq feature-pair (assq '=>ucs obj-spec)) + (setq value (cdr feature-pair))) + (insert (format "%s :to.ucs\t\t a.ucs:0x%04X ; # %c" + line-separator value (decode-char '=ucs value))) + (setq obj-spec (delete feature-pair obj-spec)) + ) + (when (and (setq feature-pair (assq '=>ucs* obj-spec)) + (setq value (cdr feature-pair))) + (insert (format "%s :to.canonical-ucs\ta.ucs:0x%04X ; # %c" + line-separator value (decode-char '=ucs value))) + (setq obj-spec (delete feature-pair obj-spec)) + ) + (dolist (name '(=>ucs@gb =>ucs@big5)) + (when (and (setq feature-pair (assq name obj-spec)) + (setq value (cdr feature-pair))) + (insert line-separator) + (insert (format " \"%-20s\": #x%04X,\t\"_comment\": \"%c\"%s" + name value + (decode-char (intern + (concat "=" + (substring + (symbol-name name) 2))) + value) + line-breaking)) + (setq obj-spec (delete feature-pair obj-spec)) + )) + (when (and (setq feature-pair (assq 'general-category obj-spec)) + (setq value (cdr feature-pair))) + (insert (format "%s :general-category \"%s\" ; # %s" + line-separator value + (cond ((rassoc value unidata-normative-category-alist) + "Normative Category") + ((rassoc value unidata-informative-category-alist) + "Informative Category") + (t + "Unknown Category")))) + (setq obj-spec (delete feature-pair obj-spec)) + ) + (when (and (setq feature-pair (assq 'bidi-category obj-spec)) + (setq value (cdr feature-pair))) + (insert (format "%s :bidi-category %S ;" + line-separator + value)) + (setq obj-spec (delete feature-pair obj-spec)) + ) + (when (and (setq feature-pair (assq 'mirrored obj-spec)) + (setq value (cdr feature-pair))) + (insert (format "%s :mirrored \"%s\" ;" + line-separator + value)) + (setq obj-spec (delete feature-pair obj-spec)) + ) + (cond + ((and (and (setq feature-pair (assq 'decimal-digit-value obj-spec)) + (setq value (cdr feature-pair)))) + (insert (format "%s :decimal-digit-value %2d ;" + line-separator value)) + (setq obj-spec (delete feature-pair obj-spec)) + (when (and (setq feature-pair (assq 'digit-value obj-spec)) + (setq value (cdr feature-pair))) + (insert (format "%s :digit-value\t %2d ;" + line-separator value)) + (setq obj-spec (delete feature-pair obj-spec)) + ) + (when (and (setq feature-pair (assq 'numeric-value obj-spec)) + (setq value (cdr feature-pair))) + (insert (format "%s :numeric-value\t %2d ;" + line-separator value)) + (setq obj-spec (delete feature-pair obj-spec)) + ) + ) + (t + (when (and (setq feature-pair (assq 'digit-value obj-spec)) + (setq value (cdr feature-pair))) + (insert line-separator) + (insert (format "%s :digit-value\t %2d ;" + line-separator value)) + (setq obj-spec (delete feature-pair obj-spec)) + ) + (when (and (setq feature-pair (assq 'numeric-value obj-spec)) + (setq value (cdr feature-pair))) + (insert line-separator) + (insert (format "%s :numeric-value\t %2d ;" + line-separator value)) + (setq obj-spec (delete feature-pair obj-spec)) + ))) + (when (and (setq feature-pair (assq 'iso-10646-comment obj-spec)) + (setq value (cdr feature-pair))) + (insert line-separator) + (insert (format "{\"iso-10646-comment\":\t %S}%s" + value + line-breaking)) + (setq obj-spec (delete feature-pair obj-spec)) + ) + (when (and (setq feature-pair (assq 'morohashi-daikanwa obj-spec)) + (setq value (cdr feature-pair))) + (insert line-separator) + (insert (format "%s :morohashi-daikanwa\t %S ;" + line-separator value)) + (setq obj-spec (delete feature-pair obj-spec)) + ) + (setq radical nil + strokes nil) + (when (and (setq feature-pair (assq 'ideographic-radical obj-spec)) + (setq value (cdr feature-pair))) + (setq radical value) + (insert (format "%s ideo:radical %3d ; # %c " + line-separator + radical + (ideographic-radical radical) + )) + (setq obj-spec (delete feature-pair obj-spec)) + ) + (when (and (setq feature-pair (assq 'shuowen-radical obj-spec)) + (setq value (cdr feature-pair))) + (insert line-separator) + (insert (format " \"shuowen-radical\":\t %S,\t\"_comment\": \"%c\"" + value + (shuowen-radical value))) + (setq obj-spec (delete feature-pair obj-spec)) + ) + (let (key) + (dolist (domain chise-turtle-feature-domains) + (setq key (intern (format "%s@%s" 'ideographic-radical domain))) + (when (and (setq feature-pair (assq key obj-spec)) + (setq value (cdr feature-pair))) + (setq radical value) + (insert (format "%s ideo:radical [ " + line-separator)) + (setq col (current-column)) + (setq indent (make-string col ?\ )) + (insert (format ":context domain:%-7s ;\n%srdf:value " + (chise-turtle-uri-encode-ccs-name domain) + indent)) + (setq lb (concord-turtle-insert-radical radical)) + (setq obj-spec (delete feature-pair obj-spec)) + (setq skey (intern (format "%s*sources" key))) + (when (and (setq feature-pair (assq skey obj-spec)) + (setq value (cdr feature-pair))) + (insert (format "\n%s" indent)) + (setq lb (concord-turtle-insert-metadata :sources value)) + ;; (insert (format " ;\n%s:sources (" indent)) + ;; (setq col (current-column)) + ;; (setq indent (make-string col ?\ )) + ;; (insert (format " chisebib:%s" (car value))) + ;; (dolist (cell (cdr value)) + ;; (insert (format "\n%s chisebib:%s" indent cell))) + ;; (insert " )") + ) + (setq obj-spec (delete feature-pair obj-spec)) + (if lb + (insert (format "\n%s] ;" (make-string (- col 2) ?\ ))) + (insert " ] ;")) + ) + (setq key (intern (format "%s@%s" 'ideographic-strokes domain))) + (when (and (setq feature-pair (assq key obj-spec)) + (setq value (cdr feature-pair))) + (setq strokes value) + (insert (format "%s ideo:strokes [ " + line-separator)) + (setq col (current-column)) + (setq indent (make-string col ?\ )) + (insert (format ":context domain:%-7s ;\n%srdf:value %S" + (chise-turtle-uri-encode-ccs-name domain) + indent strokes)) + (setq obj-spec (delete feature-pair obj-spec)) + (setq skey (intern (format "%s*sources" key))) + (when (and (setq feature-pair (assq skey obj-spec)) + (setq value (cdr feature-pair))) + (insert (format " ;\n%s" indent)) + (concord-turtle-insert-metadata :sources value) + ;; (insert (format " ;\n%s:sources (" indent)) + ;; (setq col (current-column)) + ;; (setq indent (make-string col ?\ )) + ;; (insert (format " chisebib:%s" (car value))) + ;; (dolist (cell (cdr value)) + ;; (insert (format "\n%s chisebib:%s" indent cell))) + ;; (insert " )") + ) + (setq obj-spec (delete feature-pair obj-spec)) + (insert " ] ;") + ) + (setq key (intern (format "%s@%s" 'total-strokes domain))) + (when (and (setq feature-pair (assq key obj-spec)) + (setq value (cdr feature-pair))) + (insert (format "%s ideo:total-strokes [ " + line-separator)) + (setq col (current-column)) + (insert (format ":context domain:%-7s ;\n%srdf:value %S" + (chise-turtle-uri-encode-ccs-name domain) + (make-string col ?\ ) + value)) + (setq obj-spec (delete feature-pair obj-spec)) + (setq skey (intern (format "%s*sources" key))) + (when (and (setq feature-pair (assq skey obj-spec)) + (setq value (cdr feature-pair))) + (insert (format " ;\n%s" indent)) + (concord-turtle-insert-metadata :sources value) + ;; (insert (format " ;\n%s:sources (" indent)) + ;; (setq col (current-column)) + ;; (setq indent (make-string col ?\ )) + ;; (insert (format " chisebib:%s" (car value))) + ;; (dolist (cell (cdr value)) + ;; (insert (format "\n%s chisebib:%s" indent cell))) + ;; (insert " )") + ) + (setq obj-spec (delete feature-pair obj-spec)) + (insert " ] ;") + ) + ;; (dolist (feature '(ideographic-radical + ;; ideographic-strokes + ;; total-strokes)) + ;; (setq key (intern (format "%s@%s*sources" feature domain))) + ;; (when (and (setq feature-pair (assq key obj-spec)) + ;; (setq value (cdr feature-pair))) + ;; (insert line-separator) + ;; (insert (format " \"%s\":%s" key line-breaking)) + ;; (dolist (cell value) + ;; (insert (format " %s" cell))) + ;; (setq obj-spec (delete feature-pair obj-spec)) + ;; )) + )) + (when (and (setq feature-pair (assq 'ideographic-strokes obj-spec)) + (setq value (cdr feature-pair))) + (setq strokes value) + (insert (format "%s ideo:strokes %2d ;" + line-separator strokes)) + (setq obj-spec (delete feature-pair obj-spec)) + ) + (when (and (setq feature-pair (assq 'total-strokes obj-spec)) + (setq value (cdr feature-pair))) + (insert (format "%s ideo:total-strokes %2d ;" + line-separator value)) + (setq obj-spec (delete feature-pair obj-spec)) + ) + ;; (if (equal (get-char-attribute char '->titlecase) + ;; (get-char-attribute char '->uppercase)) + ;; (setq attributes (delq '->titlecase attributes))) + ;; (unless readable + ;; (dolist (ignored '(composition + ;; ->denotational <-subsumptive ->ucs-unified + ;; ->ideographic-component-forms)) + ;; (setq attributes (delq ignored attributes)))) + (while obj-spec + (setq name (car (car obj-spec))) + (setq ret (chise-split-feature-name name)) + (setq name-base (car ret) + name-domain (nth 1 ret)) + (when (setq value (chise-get-char-attribute-with-metadata + obj-spec name-base name-domain)) + (setq metadata (nth 1 value) + value (car value)) + (cond ((setq ret (find-charset name)) + (setq name (charset-name ret)) + (when (not (memq name dest-ccss)) + (setq dest-ccss (cons name dest-ccss)) + (if (null value) + (insert (format "%s :%-25s rdf:nil ;" line-separator + (chise-turtle-uri-encode-ccs-name name))) + (setq ret (chise-turtle-format-ccs-code-point name value)) + (insert (format "%s :eq %-25s ; # %c" line-separator + ret + (char-db-decode-isolated-char name value))) + (setq eq-cpos-list (cons (list ret name value) eq-cpos-list)))) + (if (find-charset + (setq ret (if (eq name '=ucs) + (if (< value #x10000) + '==ucs@unicode + '==ucs@iso) + (intern (format "=%s" name))))) + (setq child-ccs-list (cons ret child-ccs-list))) + ) + ((and + (not readable) + (not (eq name '->subsumptive)) + (not (eq name '->uppercase)) + (not (eq name '->lowercase)) + (not (eq name '->titlecase)) + (not (eq name '->canonical)) + (not (eq name '->Bopomofo)) + (not (eq name '->mistakable)) + (not (eq name '->ideographic-variants)) + (or (eq name '<-identical) + (eq name '<-uppercase) + (eq name '<-lowercase) + (eq name '<-titlecase) + (eq name '<-canonical) + (eq name '<-ideographic-variants) + ;; (eq name '<-synonyms) + (string-match "^<-synonyms" (symbol-name name)) + (eq name '<-mistakable) + (when (string-match "^->" (symbol-name name)) + (cond + ((string-match "^->fullwidth" (symbol-name name)) + (not (and (consp value) + (characterp (car value)) + (encode-char + (car value) '=ucs 'defined-only))) + ) + (t))) + )) + ) + ((eq name 'ideographic-structure) + (insert (isd-turtle-format-char nil nil value (/ column 4) + 'isd 'without-head-char)) + (insert " ;") + ) + ((eq name '->subsumptive) + (insert line-separator) + (concord-turtle-insert-relation-feature object name value + line-breaking + ccss readable) + (setq children value) + ) + ((eq name 'character) + (insert line-separator) + (concord-turtle-insert-relation-feature object name value + line-breaking + ccss readable) + ;; (setq children value) + ) + (t + (insert (format "%s %-20s " + line-separator + (chise-turtle-uri-encode-feature-name name-base))) + (unless (concord-turtle-insert-feature-value + value metadata name-domain name-base) + (insert " ;")) + ) + )) + (setq obj-spec (cdr obj-spec))) + (insert (format "%s ." line-breaking)) + (dolist (eq-cpos (nreverse eq-cpos-list)) + (setq ret (chise-split-ccs-name (nth 1 eq-cpos))) + (insert (format "%s %s" line-breaking + (car eq-cpos))) + (insert (format "%s %s" line-breaking + (format ":%s-of" (nth 1 ret)))) + (if (null (nth 2 ret)) + (insert (format " %14s:%-7s ." + (chise-turtle-uri-encode-ccs-name (car ret)) + (nth 1 (split-string (car eq-cpos) ":")))) + (insert " [ ") + (setq col (current-column)) + (insert (format ":context domain:%-7s ;\n%srdf:value %5s:%-7s ] ." + (chise-turtle-uri-encode-ccs-name (nth 2 ret)) + (make-string col ?\ ) + (chise-turtle-uri-encode-ccs-name (car ret)) + (nth 1 (split-string (car eq-cpos) ":")))))) + (setq est-coded-charset-priority-list + (append est-coded-charset-priority-list + (nreverse child-ccs-list))) + (when children + (dolist (child children) + (insert (format "%s " line-breaking)) + (concord-turtle-insert-object-features child nil nil nil 'for-sub-node))) + )) + +(defun concord-turtle-insert-char-data (char &optional readable attributes) + (save-restriction + (narrow-to-region (point)(point)) + (concord-turtle-insert-object-features char readable attributes) + (insert "\n\n") + )) + +(defun concord-turtle-insert-prefix () + (let (base-ccs-list ret) + (insert "@prefix rdf: . +@prefix rdfs: . +@prefix : . +@prefix ideo: . +@prefix isd: . +@prefix idc: . +@prefix chisegg: . +@prefix domain: . +@prefix script: . +@prefix ideocomb: . +@prefix chisebib: . +@prefix ruimoku: . +@prefix zob1959: . + +") + (dolist (cell (sort chise-turtle-ccs-prefix-alist + (lambda (a b) + (char-attribute-name< (cdr a)(cdr b))))) + (insert (format "@prefix %s: <%s/%s=> .\n" + (car cell) + "http://www.chise.org/est/view/character" + (www-uri-encode-feature-name (cdr cell)))) + (setq ret (chise-split-ccs-name (cdr cell))) + (unless (memq (car ret) base-ccs-list) + (setq base-ccs-list (cons (car ret) base-ccs-list)))) + (insert "\n") + (dolist (base-ccs (nreverse base-ccs-list)) + (insert (format "@prefix %s: <%s/%s/code-point/> .\n" + (chise-turtle-uri-encode-ccs-name base-ccs) + "http://rdf.chise.org/data/ccs" + (www-uri-encode-feature-name base-ccs)))))) + +(defun concord-turtle-insert-ideograph-radical-char-data (radical) + (let ((chars + (sort (copy-list (aref ideograph-radical-chars-vector radical)) + (lambda (a b) + (ideograph-char< a b radical)))) + attributes) + (dolist (name (char-attribute-list)) + (unless (memq name char-db-ignored-attributes) + (push name attributes) + )) + (setq attributes (sort attributes #'char-attribute-name<)) + (aset ideograph-radical-chars-vector radical chars) + (dolist (char chars) + (when (not (some (lambda (atr) + (get-char-attribute char atr)) + char-db-ignored-attributes)) + (concord-turtle-insert-char-data char nil attributes))) + )) + +(defun char-db-turtle-write-ideograph-radical-char-data (radical file) + (if (file-directory-p file) + (let ((name (char-feature (decode-char 'ucs (+ #x2EFF radical)) + 'name))) + (if (string-match "KANGXI RADICAL " name) + (setq name (capitalize (substring name (match-end 0))))) + (setq name (mapconcat (lambda (char) + (if (eq char ? ) + "-" + (char-to-string char))) name "")) + (setq file + (expand-file-name + (format "Ideograph-R%03d-%s.ttl" radical name) + file)))) + (let (chise-turtle-ccs-prefix-alist) + (with-temp-buffer + (concord-turtle-insert-ideograph-radical-char-data radical) + (goto-char (point-min)) + (concord-turtle-insert-prefix) + (insert "\n") + (goto-char (point-min)) + (insert (format "# -*- coding: %s -*-\n" + char-db-file-coding-system)) + (let ((coding-system-for-write char-db-file-coding-system)) + (write-region (point-min)(point-max) file))))) + + +;;; @ end +;;; + +(provide 'concord-turtle-dump) + +;;; concord-turtle-dump.el ends here diff --git a/isd-turtle.el b/isd-turtle.el new file mode 100644 index 0000000..93fea22 --- /dev/null +++ b/isd-turtle.el @@ -0,0 +1,635 @@ +;;; isd-turtle.el --- Utility to dump ideographic-structure as Turtle files + +;; Copyright (C) 2017, 2018 MORIOKA Tomohiko + +;; Author: MORIOKA Tomohiko +;; Keywords: Ideographic Structures (漢字構造、解字), IDS, CHISE, RDF, Turtle + +;; This file is a part of CHISE-ISD (Ideographic Structure Database). + +;; This program is free software; you can redistribute it and/or +;; modify it under the terms of the GNU General Public License as +;; published by the Free Software Foundation; either version 2, or (at +;; your option) any later version. + +;; This program is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program; see the file COPYING. If not, write to +;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Code: + +(require 'cwiki-common) + +(defvar isd-url-prefix "http://rdf.chise.org/data/") + +(setq est-coded-charset-priority-list + '(; =ucs + =mj + =adobe-japan1-0 + =adobe-japan1-1 + =adobe-japan1-2 + =adobe-japan1-3 + =adobe-japan1-4 + =adobe-japan1-5 + =adobe-japan1-6 + =ucs@iso + =jis-x0208 =jis-x0208@1990 + =jis-x0213-1 + =jis-x0213-1@2000 =jis-x0213-1@2004 + =jis-x0213-2 + =jis-x0212 + =gt + =hanyo-denshi/ks + =hanyo-denshi/tk + =ucs-itaiji-001 + =ucs-itaiji-002 + =ucs-itaiji-003 + =ucs-itaiji-004 + =ucs-itaiji-005 + =ucs-itaiji-006 + =ucs-itaiji-007 + =ucs-itaiji-008 + =ucs-itaiji-009 + =ucs-itaiji-010 + =ucs-itaiji-084 + =ucs-var-001 + =ucs-var-002 + =ucs-var-003 + =ucs-var-004 + =ucs-var-005 + =cns11643-1 =cns11643-2 =cns11643-3 + =cns11643-4 =cns11643-5 =cns11643-6 =cns11643-7 + =gb2312 + =big5-cdp + =ks-x1001 + =gt-k + =ucs@unicode + =ucs@JP/hanazono + =gb12345 + =ucs@cns + =ucs@gb + =zinbun-oracle =>zinbun-oracle + =daikanwa + =ruimoku-v6 + =cbeta =jef-china3 + =daikanwa/+2p + =+>ucs@iso =+>ucs@unicode + =+>ucs@jis + =+>ucs@cns + =+>ucs@ks + =+>ucs@jis/1990 + =>mj + =>jis-x0208 =>jis-x0213-1 + =>jis-x0208@1997 + =>ucs@iwds-1 + =>ucs@cognate + =>ucs@component + =>iwds-1 + =>ucs@iso + =>ucs@unicode + =>ucs@jis =>ucs@cns =>ucs@ks + =>gt + =>gt-k + =>>ucs@iso =>>ucs@unicode + =>>ucs@jis =>>ucs@cns =>>ucs@ks + =>>gt-k + =>>hanyo-denshi/ks + ==mj + ==ucs@iso + ==ucs@unicode + ==adobe-japan1-0 + ==adobe-japan1-1 + ==adobe-japan1-2 + ==adobe-japan1-3 + ==adobe-japan1-4 + ==adobe-japan1-5 + ==adobe-japan1-6 + ==ks-x1001 + ==hanyo-denshi/ks + ==hanyo-denshi/tk + ==ucs@jis + ==gt + ==cns11643-1 ==cns11643-2 ==cns11643-3 + ==cns11643-4 ==cns11643-5 ==cns11643-6 ==cns11643-7 + ==jis-x0212 + ==ucs@cns + ==koseki + ==daikanwa + ==gt-k + ==ucs@gb + ==ucs-itaiji-003 + ==ucs@JP/hanazono + ==daikanwa/+2p + =>>jis-x0208 =>>jis-x0213-1 =>>jis-x0213-2 + =+>jis-x0208 =+>jis-x0213-1 =+>jis-x0213-2 + =+>hanyo-denshi/jt + =+>jis-x0208@1978 + =>>gt + =+>adobe-japan1 + =>>adobe-japan1 + =jis-x0208@1983 =jis-x0208@1978 + =>ucs-itaiji-001 + =>ucs-itaiji-002 + =>ucs-itaiji-003 + =>ucs-itaiji-004 + =>ucs-itaiji-005 + =>ucs-itaiji-006 + =>ucs-itaiji-007 + =>ucs-itaiji-009 + ==>ucs@bucs + =big5 + =>cbeta + ===mj + ===ucs@iso + ===ucs@unicode + ===hanyo-denshi/ks + ===ks-x1001 + ===gt + ===gt-k + ===ucs@ks + ===ucs@gb + =shinjigen + =shinjigen@rev + =shinjigen@1ed + =shinjigen/+p@rev + ==shinjigen + ==shinjigen@rev + ==daikanwa/+p + ==shinjigen@1ed + ===daikanwa/+p + =>daikanwa/ho + ===daikanwa/ho + )) + +;; (defvar isd-turtle-ccs-list nil) +(defvar chise-turtle-ccs-prefix-alist nil) + +(defun charset-code-point-format-spec (ccs) + (cond ((memq ccs '(=ucs)) + "0x%04X") + (t + (let ((ccs-name (symbol-name ccs))) + (cond + ((string-match + "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)" + ccs-name) + "%04d") + ((string-match + "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)" + ccs-name) + "%05d") + ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name) + "%06d") + ((string-match "hanyo-denshi/tk" ccs-name) + "%08d") + (t + "0x%X")))))) + +;; (defun isd-turtle-uri-encode-feature-name (feature-name) +;; (cond +;; ((eq '=ucs feature-name) +;; "a.ucs") +;; ((eq '==>ucs@bucs feature-name) +;; "bucs") +;; (t +;; (mapconcat (lambda (c) +;; (if (eq c ?@) +;; "_" +;; (char-to-string c))) +;; (www-uri-encode-feature-name feature-name) +;; "")))) +(defun chise-turtle-uri-encode-ccs-name (feature-name) + (cond + ((eq '=ucs feature-name) + "a.ucs") + ((eq '=big5 feature-name) + "a.big5") + ((eq '==>ucs@bucs feature-name) + "bucs") + (t + (mapconcat (lambda (c) + (cond + ((eq c ?@) + "_") + ((eq c ?+) + "._.") + ((eq c ?=) + ".:.") + (t + (char-to-string c)))) + (www-uri-encode-feature-name feature-name) + "")))) + +;; (defun isd-turtle-format-ccs-code-point (ccs code-point) +;; (unless (memq ccs isd-turtle-ccs-list) +;; (setq isd-turtle-ccs-list (cons ccs isd-turtle-ccs-list))) +;; (format "%s:%s" +;; (isd-turtle-uri-encode-feature-name ccs) +;; (format (charset-code-point-format-spec ccs) +;; code-point))) +(defun chise-turtle-format-ccs-code-point (ccs code-point) + (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs))) + (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist) + (setq chise-turtle-ccs-prefix-alist + (cons (cons ccs-uri ccs) + chise-turtle-ccs-prefix-alist))) + (format "%s:%s" + ccs-uri + (format (charset-code-point-format-spec ccs) + code-point)))) + +(defun isd-turtle-encode-char (object) + (let ((ccs-list est-coded-charset-priority-list) + ccs ret) + (if (setq ret (encode-char object '=ucs)) + (chise-turtle-format-ccs-code-point '=ucs ret) + (while (and ccs-list + (setq ccs (pop ccs-list)) + (not (setq ret (encode-char object ccs 'defined-only))))) + (cond (ret + (chise-turtle-format-ccs-code-point ccs ret) + ) + ((and (setq ccs (car (split-char object))) + (setq ret (encode-char object ccs))) + (chise-turtle-format-ccs-code-point ccs ret) + ) + (t + (format (if est-hide-cgi-mode + "system-char-id=0x%X" + "system-char-id:0x%X") + (encode-char object 'system-char-id)) + ))))) + +(defun isd-turtle-format-component (component separator level prefix) + (cond ((characterp component) + (format "%s %c # %c" + (isd-turtle-encode-char component) + separator + component) + ) + ((consp component) + (let ((ret (find-char component))) + (cond (ret + (format "%s %c # %c" + (isd-turtle-encode-char ret) separator ret)) + ((setq ret (assq 'ideographic-structure component)) + (if (eq separator ?\;) + (format "%s ;" + (isd-turtle-format-char nil nil (cdr ret) (1+ level) + prefix)) + (isd-turtle-format-char nil nil (cdr ret) (1+ level) + prefix)))))))) + +(defun isd-turtle-format-char (ccs code-point &optional ids-list level + prefix without-head-char) + (unless level + (setq level 0)) + (unless prefix + (setq prefix "")) + (let ((indent (make-string (* level 4) ?\ )) + char + idc idc-str + p1 p2 p3 + c1 c2 c3 + ret) + (unless ids-list + (if (and ccs code-point + (setq char (decode-char ccs code-point))) + (setq ids-list (get-char-attribute char 'ideographic-structure)))) + (setq idc (car ids-list)) + (setq c1 (nth 1 ids-list) + c2 (nth 2 ids-list) + c3 (nth 3 ids-list)) + (if (char-ref-p idc) + (setq idc (plist-get idc :char))) + (if (and (consp idc) + (setq ret (find-char idc))) + (setq idc ret)) + (if (and (consp c1) + (setq ret (find-char c1))) + (setq c1 ret)) + (if (and (consp c2) + (setq ret (find-char c2))) + (setq c2 ret)) + (if (and (consp c3) + (setq ret (find-char c3))) + (setq c3 ret)) + (cond + ((eq idc ?\u2FF0) ; ⿰ + (setq p1 'left + p2 'right) + ) + ((eq idc ?⿱) + (setq p1 'above + p2 'below) + ) + ((eq idc ?⿲) + (setq p1 'left + p2 'middle + p3 'right) + ) + ((eq idc ?⿳) + (setq p1 'above + p2 'middle + p3 'below) + ) + ((memq idc '(?⿴ ?⿵ ?⿶ ?⿷ ?⿸ ?⿹ ?⿺)) + (setq p1 'surround + p2 'filling) + ) + ((eq idc ?⿻) + (setq p1 'underlying + p2 'overlaying) + ) + ((and idc (eq (encode-char idc '=ucs-itaiji-001) #x2FF6)) + (setq idc-str "SLR") + (setq p1 'surround + p2 'filling) + ) + ((and idc (eq (encode-char idc '=ucs-var-001) #x2FF0)) + (setq idc-str "⿰・SLR") + (setq p1 'left + p2 'right) + ) + ((and idc (eq (encode-char idc '=>iwds-1) 307)) + (setq idc-str "⿰・⿺") + (setq p1 'left + p2 'right) + ) + ((and idc (eq (encode-char idc '=>iwds-1) 305)) + (setq idc-str "⿱・⿸") + (setq p1 'above + p2 'below) + ) + ((and idc (eq (encode-char idc '=>ucs@component) #x2FF5)) + (setq idc-str "⿱・⿵") + (setq p1 'above + p2 'below) + ) + ) + (cond + (p3 + (format "%s +%s %s:structure [ a idc:%s ; +%s %s:%-8s %s +%s %s:%-8s %s +%s %s:%-8s %s +%s ]%s" + (if without-head-char + "" + (if (and ccs code-point) + (format "%s # %c" + (chise-turtle-format-ccs-code-point ccs code-point) + char) + "[")) + indent prefix (or idc-str (char-to-string idc)) + indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level) prefix) + indent prefix p2 (isd-turtle-format-component c2 ?\; (1+ level) prefix) + indent prefix p3 (isd-turtle-format-component c3 ?\ (1+ level) prefix) + indent + (if without-head-char + "" + (if (null char) + (format "\n%s]" + indent) + ""))) + ) + (idc + (format "%s +%s %s:structure [ a idc:%s ; +%s %s:%-8s %s +%s %s:%-8s %s +%s ]%s" + (if without-head-char + "" + (if (and ccs code-point) + (format "%s # %c" + (chise-turtle-format-ccs-code-point ccs code-point) + char) + "[")) + indent prefix (or idc-str (char-to-string idc)) + indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level) prefix) + indent prefix p2 (isd-turtle-format-component c2 ?\ (1+ level) prefix) + indent + (if without-head-char + "" + (if (null char) + (format "\n%s]" + indent) + ""))))) + )) + +(defun isd-turtle-insert-char (ccs code-point) + (let ((ret (isd-turtle-format-char ccs code-point))) + (when ret + (insert ret) + (insert " .\n")))) + +(defun isd-turtle-insert-ccs-ranges (ccs &rest ranges) + (let (range code max-code) + (while ranges + (setq range (car ranges)) + (cond ((consp range) + (setq code (car range) + max-code (cdr range)) + (while (<= code max-code) + (isd-turtle-insert-char ccs code) + (setq code (1+ code))) + ) + ((integerp range) + (isd-turtle-insert-char ccs range) + ) + (t (error 'wrong-type-argument range))) + (setq ranges (cdr ranges))))) + +(defun isd-turtle-dump-range (file path func &rest args) + (with-temp-buffer + (let ((coding-system-for-write 'utf-8-mcs-er) + ;; isd-turtle-ccs-list + chise-turtle-ccs-prefix-alist) + (if (file-directory-p path) + (setq path (expand-file-name file path))) + (apply func args) + (goto-char (point-min)) + ;; (dolist (ccs (sort isd-turtle-ccs-list + ;; #'char-attribute-name<)) + ;; (insert (format "@prefix %s: <%s%s=> .\n" + ;; (isd-turtle-uri-encode-feature-name ccs) + ;; "http://www.chise.org/est/view/character/" + ;; (www-uri-encode-feature-name ccs)))) + (dolist (cell (sort chise-turtle-ccs-prefix-alist + (lambda (a b) + (char-attribute-name< (cdr a)(cdr b))))) + (insert (format "@prefix %s: <%s/%s=> .\n" + (car cell) + "http://www.chise.org/est/view/character" + (www-uri-encode-feature-name (cdr cell))))) + (insert "\n") + (goto-char (point-min)) + (insert "# -*- coding: utf-8-mcs-er -*-\n") + (insert "@prefix : . +@prefix idc: .\n") + (write-region (point-min)(point-max) path)))) + +;;;###autoload +(defun isd-turtle-dump-ucs-basic (filename) + (interactive "Fdump ISD-UCS-Basic : ") + (isd-turtle-dump-range "ISD-UCS-Basic.ttl" filename + #'isd-turtle-insert-ccs-ranges + '=ucs '(#x4E00 . #x9FA5))) + +;;;###autoload +(defun isd-turtle-dump-ucs-ext-a (filename) + (interactive "Fdump ISD-UCS-Ext-A : ") + (isd-turtle-dump-range "ISD-UCS-Ext-A.ttl" filename + #'isd-turtle-insert-ccs-ranges + '=ucs '(#x3400 . #x4DB5) #xFA1F #xFA23)) + +;;;###autoload +(defun isd-turtle-dump-ucs-ext-b-1 (filename) + (interactive "Fdump IDS-UCS-Ext-B-1 : ") + (isd-turtle-dump-range "ISD-UCS-Ext-B-1.ttl" filename + #'isd-turtle-insert-ccs-ranges + 'ucs '(#x20000 . #x21FFF))) + +;;;###autoload +(defun isd-turtle-dump-ucs-ext-b-2 (filename) + (interactive "Fdump IDS-UCS-Ext-B-2 : ") + (isd-turtle-dump-range "ISD-UCS-Ext-B-2.ttl" filename + #'isd-turtle-insert-ccs-ranges + 'ucs '(#x22000 . #x23FFF))) + +;;;###autoload +(defun isd-turtle-dump-ucs-ext-b-3 (filename) + (interactive "Fdump IDS-UCS-Ext-B-3 : ") + (isd-turtle-dump-range "ISD-UCS-Ext-B-3.ttl" filename + #'isd-turtle-insert-ccs-ranges + 'ucs '(#x24000 . #x25FFF))) + +;;;###autoload +(defun isd-turtle-dump-ucs-ext-b-4 (filename) + (interactive "Fdump IDS-UCS-Ext-B-4 : ") + (isd-turtle-dump-range "ISD-UCS-Ext-B-4.ttl" filename + #'isd-turtle-insert-ccs-ranges + 'ucs '(#x26000 . #x27FFF))) + +;;;###autoload +(defun isd-turtle-dump-ucs-ext-b-5 (filename) + (interactive "Fdump IDS-UCS-Ext-B-5 : ") + (isd-turtle-dump-range "ISD-UCS-Ext-B-5.ttl" filename + #'isd-turtle-insert-ccs-ranges + 'ucs '(#x28000 . #x29FFF))) + +;;;###autoload +(defun isd-turtle-dump-ucs-ext-b-6 (filename) + (interactive "Fdump IDS-UCS-Ext-B-6 : ") + (isd-turtle-dump-range "ISD-UCS-Ext-B-6.ttl" filename + #'isd-turtle-insert-ccs-ranges + 'ucs '(#x2A000 . #x2A6D6))) + +;;;###autoload +(defun isd-turtle-dump-ucs-ext-c (filename) + (interactive "Fdump IDS-UCS-Ext-C : ") + (isd-turtle-dump-range "ISD-UCS-Ext-C.ttl" filename + #'isd-turtle-insert-ccs-ranges + 'ucs '(#x2A700 . #x2B734))) + +;;;###autoload +(defun isd-turtle-dump-ucs-ext-d (filename) + (interactive "Fdump IDS-UCS-Ext-D : ") + (isd-turtle-dump-range "ISD-UCS-Ext-D.ttl" filename + #'isd-turtle-insert-ccs-ranges + 'ucs '(#x2B740 . #x2B81D))) + +;;;###autoload +(defun isd-turtle-dump-ucs-ext-e (filename) + (interactive "Fdump IDS-UCS-Ext-E : ") + (isd-turtle-dump-range "ISD-UCS-Ext-E.ttl" filename + #'isd-turtle-insert-ccs-ranges + 'ucs '(#x2B820 . #x2CEA1))) + +;;;###autoload +(defun isd-turtle-dump-mj-0 (filename) + (interactive "Fdump ISD-MJ-0 : ") + (isd-turtle-dump-range "ISD-MJ-0.ttl" filename + #'isd-turtle-insert-ccs-ranges + '=mj '(1 . 9999))) + +;;;###autoload +(defun isd-turtle-dump-mj-1 (filename) + (interactive "Fdump ISD-MJ-1 : ") + (isd-turtle-dump-range "ISD-MJ-1.ttl" filename + #'isd-turtle-insert-ccs-ranges + '=mj '(10000 . 19999))) + +;;;###autoload +(defun isd-turtle-dump-mj-2 (filename) + (interactive "Fdump ISD-MJ-2 : ") + (isd-turtle-dump-range "ISD-MJ-2.ttl" filename + #'isd-turtle-insert-ccs-ranges + '=mj '(20000 . 29999))) + +;;;###autoload +(defun isd-turtle-dump-mj-3 (filename) + (interactive "Fdump ISD-MJ-3 : ") + (isd-turtle-dump-range "ISD-MJ-3.ttl" filename + #'isd-turtle-insert-ccs-ranges + '=mj '(30000 . 39999))) + +;;;###autoload +(defun isd-turtle-dump-mj-4 (filename) + (interactive "Fdump ISD-MJ-4 : ") + (isd-turtle-dump-range "ISD-MJ-4.ttl" filename + #'isd-turtle-insert-ccs-ranges + '=mj '(40000 . 49999))) + +;;;###autoload +(defun isd-turtle-dump-mj-5 (filename) + (interactive "Fdump ISD-MJ-5 : ") + (isd-turtle-dump-range "ISD-MJ-5.ttl" filename + #'isd-turtle-insert-ccs-ranges + '=mj '(50000 . 59999))) + +;;;###autoload +(defun isd-turtle-dump-mj-6 (filename) + (interactive "Fdump ISD-MJ-6 : ") + (isd-turtle-dump-range "ISD-MJ-6.ttl" filename + #'isd-turtle-insert-ccs-ranges + '=mj '(60000 . 69999))) + +;;;###autoload +(defun isd-turtle-dump-all (directory) + (interactive "DISD directory : ") + (isd-turtle-dump-ucs-basic directory) + (isd-turtle-dump-ucs-ext-a directory) + (isd-turtle-dump-ucs-ext-b-1 directory) + (isd-turtle-dump-ucs-ext-b-2 directory) + (isd-turtle-dump-ucs-ext-b-3 directory) + (isd-turtle-dump-ucs-ext-b-4 directory) + (isd-turtle-dump-ucs-ext-b-5 directory) + (isd-turtle-dump-ucs-ext-b-6 directory) + (isd-turtle-dump-ucs-ext-c directory) + (isd-turtle-dump-ucs-ext-d directory) + (isd-turtle-dump-ucs-ext-e directory) + (isd-turtle-dump-mj-0 directory) + (isd-turtle-dump-mj-1 directory) + (isd-turtle-dump-mj-2 directory) + (isd-turtle-dump-mj-3 directory) + (isd-turtle-dump-mj-4 directory) + (isd-turtle-dump-mj-5 directory) + (isd-turtle-dump-mj-6 directory) + ) + + +;;; @ End. +;;; + +(provide 'isd-turtle) + +;;; isd-turtle.el ends here -- 1.7.10.4