1 ;;; concord-turtle-dump.el --- Character Database utility -*- coding: utf-8-er; -*-
3 ;; Copyright (C) 2017,2018 MORIOKA Tomohiko.
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: CHISE, Character Database, RDF, Turtle, ISO/IEC 10646, UCS, Unicode, MULE.
8 ;; This file is part of CHISET (CHISE/Turtle).
10 ;; XEmacs CHISE is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
15 ;; XEmacs CHISE is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with XEmacs CHISE; see the file COPYING. If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 (require 'char-db-util)
28 (require 'cwiki-common)
30 (require 'ideograph-util)
32 (setq est-coded-charset-priority-list
43 =jis-x0208 =jis-x0208@1990
45 =jis-x0213-1@2000 =jis-x0213-1@2004
71 =cns11643-1 =cns11643-2 =cns11643-3
72 =cns11643-4 =cns11643-5 =cns11643-6 =cns11643-7
82 =zinbun-oracle =>zinbun-oracle
87 =+>ucs@iso =+>ucs@unicode
93 =>jis-x0208 =>jis-x0213-1
101 =>ucs@jis =>ucs@cns =>ucs@ks
104 =>>ucs@iso =>>ucs@unicode
105 =>>ucs@jis =>>ucs@cns =>>ucs@ks
123 ==cns11643-1 ==cns11643-2 ==cns11643-3
124 ==cns11643-4 ==cns11643-5 ==cns11643-6 ==cns11643-7
138 =>>jis-x0208 =>>jis-x0213-1 =>>jis-x0213-2
139 =+>jis-x0208 =+>jis-x0213-1 =+>jis-x0213-2
145 =jis-x0208@1983 =jis-x0208@1978
178 (defvar chise-turtle-ccs-prefix-alist nil)
180 (setq chise-turtle-feature-domains
181 (append char-db-feature-domains
183 (dolist (feature (char-attribute-list))
184 (setq feature (symbol-name feature))
186 "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
188 (setq domain (intern (match-string 2 feature)))
189 (unless (memq domain dest)
190 (setq dest (cons domain dest)))))
191 (sort dest #'string<))))
193 (defun charset-code-point-format-spec (ccs)
194 (cond ((memq ccs '(=ucs))
197 (let ((ccs-name (symbol-name ccs)))
200 "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)"
204 "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)"
207 ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name)
209 ((string-match "hanyo-denshi/tk" ccs-name)
214 (defun chise-turtle-uri-decode-feature-name (uri-feature)
215 (cond ((string= "a.ucs" uri-feature)
217 ((string= "a.big5" uri-feature)
220 (www-uri-decode-feature-name uri-feature))))
222 (defun chise-turtle-uri-encode-ccs-name (feature-name)
224 ((eq '=ucs feature-name)
226 ((eq '=big5 feature-name)
228 ((eq '==>ucs@bucs feature-name)
231 (mapconcat (lambda (c)
242 (char-to-string c))))
243 (www-uri-encode-feature-name feature-name)
246 (defun chise-turtle-uri-encode-feature-name (feature-name)
248 ((eq '->subsumptive feature-name)
250 ((eq '<-denotational feature-name)
252 ((eq '<-formed feature-name)
254 ((eq '<-same feature-name)
256 ((eq '<-simplified feature-name)
257 "ideo:simplified-form-of")
258 ((eq '<-vulgar feature-name)
259 "ideo:vulgar-form-of")
260 ((eq '<-wrong feature-name)
261 "ideo:wrong-form-of")
262 ((eq '<-original feature-name)
263 "ideo:original-form-of")
264 ((eq '<-ancient feature-name)
265 "ideo:ancient-form-of")
266 ((eq '<-Small-Seal feature-name)
267 "ideo:Small-Seal-of")
268 ((eq '<-interchangeable feature-name)
269 "ideo:interchangeable-form-of")
270 ((eq '->interchangeable feature-name)
271 "ideo:interchangeable")
272 ((eq '->mistakable feature-name)
274 ((eq 'hanyu-dazidian feature-name)
275 "ideo:hanyu-dazidian")
276 ((eq '*note feature-name)
279 (concat ":" (chise-turtle-uri-encode-ccs-name feature-name)))))
281 (defun chise-turtle-format-ccs-code-point (ccs code-point)
282 (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs)))
283 (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist)
284 (setq chise-turtle-ccs-prefix-alist
285 (cons (cons ccs-uri ccs)
286 chise-turtle-ccs-prefix-alist)))
289 (format (charset-code-point-format-spec ccs)
292 (defun chise-turtle-encode-char (object)
295 (if (setq ret (encode-char object '=ucs))
296 (chise-turtle-format-ccs-code-point '=ucs ret)
297 (setq spec (char-attribute-alist object))
299 (setq cell (pop spec)))
300 (if (and (find-charset (car cell))
301 (setq ret (cdr cell)))
302 (setq dest (cons cell dest))))
303 (setq ret (car (sort dest (lambda (a b)
304 (char-attribute-name< (car a)(car b)))))
308 (chise-turtle-format-ccs-code-point ccs ret)
310 ((and (setq ccs (car (split-char object)))
311 (setq ret (encode-char object ccs)))
312 (chise-turtle-format-ccs-code-point ccs ret)
314 ((setq ret (get-char-attribute object 'ideographic-combination))
315 (format "ideocomb:%s"
316 (mapconcat (lambda (cell)
317 (cond ((characterp cell)
318 (char-to-string cell)
320 ((setq ret2 (find-char cell))
321 (char-to-string ret2)
329 (format (if est-hide-cgi-mode
330 "system-char-id=0x%X"
331 "system-char-id:0x%X")
332 (encode-char object 'system-char-id))
335 (defun concord-turtle-encode-object (obj)
336 (cond ((characterp obj)
337 (chise-turtle-encode-char obj)
339 ((concord-object-p obj)
340 (let ((genre (est-object-genre obj))
341 (url-object (www-uri-encode-object obj)))
343 "http://www.chise.org/est/view"
347 (defun chise-split-feature-name (feature-name)
348 (let (base domain number meta)
349 (setq feature-name (symbol-name feature-name))
350 (if (string-match ".\\*." feature-name)
352 (format ":%s" (substring feature-name (1- (match-end 0)))))
353 feature-name (substring feature-name 0 (1+ (match-beginning 0)))))
354 (if (string-match "\\$_\\([0-9]+\\)$" feature-name)
355 (setq number (car (read-from-string (match-string 1 feature-name)))
356 feature-name (substring feature-name 0 (match-beginning 0))))
357 (if (string-match "@" feature-name)
358 (setq domain (car (read-from-string (substring feature-name (match-end 0))))
359 base (intern (substring feature-name 0 (match-beginning 0))))
360 (setq base (intern feature-name)))
361 (list base domain number meta)))
363 (defun chise-compose-feature-name (base domain number meta)
364 (let ((name (if domain
365 (format "%s@%s" base domain)
366 (symbol-name base))))
368 (setq name (format "%s$_%d" name number)))
370 (setq name (format "%s*%s" name
371 (substring (symbol-name meta) 1))))
374 (defvar chise-feature-name-base-metadata-alist nil)
376 (defun chise-update-feature-name-base-metadata-alist ()
378 (let (base domain number metadata
379 bcell dcell ncell ret)
380 (setq chise-feature-name-base-metadata-alist nil)
381 (dolist (fname (sort (char-attribute-list)
382 #'char-attribute-name<))
383 (setq ret (chise-split-feature-name fname)
387 metadata (nth 3 ret))
389 (if (setq bcell (assq base chise-feature-name-base-metadata-alist))
390 (if (setq dcell (assq domain (cdr bcell)))
391 (if (setq ncell (assq number (cdr dcell)))
392 (unless (memq metadata (cdr ncell))
393 (setcdr ncell (nconc (cdr ncell)
395 (setcdr dcell (cons (list number metadata)
397 (setcdr bcell (cons (list domain (list number metadata))
399 (setq chise-feature-name-base-metadata-alist
400 (cons (list base (list domain (list number metadata)))
401 chise-feature-name-base-metadata-alist))
404 (chise-update-feature-name-base-metadata-alist)
406 (defun chise-get-char-attribute-with-metadata (obj-spec feature-name-base domain)
407 (let ((feature-pair (assq (chise-compose-feature-name
408 feature-name-base domain nil nil)
412 base-metadata metadata
415 (setq value (cdr feature-pair))
417 ((and (setq ret (assq feature-name-base
418 chise-feature-name-base-metadata-alist))
419 (setq dcell (assq domain (cdr ret))))
420 (if (setq ret (assq nil (cdr dcell)))
421 (dolist (bmn (cdr ret))
422 (when (setq m (assq (chise-compose-feature-name
423 feature-name-base domain nil bmn)
427 (list* bmn m base-metadata)))))
433 ((setq ret (assq i (cdr dcell)))
435 (dolist (mn (cdr ret))
436 (when (setq m (assq (chise-compose-feature-name
437 feature-name-base domain i mn)
440 (setq metadata (list* mn m metadata))))
442 (list* :value (car rest) metadata)
449 (list (nconc (nreverse dest) rest)
452 (t (list value nil)))
455 (defun chise-split-ccs-name (ccs)
456 (cond ((eq ccs '=ucs)
457 '(ucs abstract-character nil)
460 '(big5 abstract-character nil)
463 (setq ccs (symbol-name ccs))
465 (if (string-match "^\\(=[=+>]*\\)\\([^=>@*]+\\)@?" ccs)
466 (list (intern (match-string 2 ccs))
467 (chise-decode-ccs-prefix (match-string 1 ccs))
468 (if (string= (setq ret (substring ccs (match-end 0))) "")
473 (defun chise-decode-ccs-prefix (ccs)
474 (or (cdr (assoc ccs '(("==>" . super-abstract-character)
475 ("=>" . abstract-character)
476 ("=+>" . unified-glyph)
477 ("=" . abstract-glyph)
478 ("=>>" . detailed-glyph)
479 ("==" . abstract-glyph-form)
480 ("===" . glyph-image))))
483 (defun chise-turtle-uri-split-ccs (uri-ccs)
485 ((string-match "^a2\\." uri-ccs)
486 (cons ":super-abstract-character"
487 (substring uri-ccs (match-end 0)))
489 ((string-match "^a\\." uri-ccs)
490 (cons ":abstract-character"
491 (substring uri-ccs (match-end 0)))
493 ((string-match "^o\\." uri-ccs)
494 (cons ":unified-glyph"
495 (substring uri-ccs (match-end 0)))
497 ((string-match "^rep\\." uri-ccs)
498 (cons ":abstract-glyph"
499 (substring uri-ccs (match-end 0)))
501 ((string-match "^g\\." uri-ccs)
502 (cons ":detailed-glyph"
503 (substring uri-ccs (match-end 0)))
505 ((string-match "^g2\\." uri-ccs)
506 (cons ":abstract-glyph-form"
507 (substring uri-ccs (match-end 0)))
509 ((string-match "^gi\\." uri-ccs)
510 (cons ":abstract-glyph-form"
511 (substring uri-ccs (match-end 0)))
513 ((string-match "^repi\\." uri-ccs)
515 (substring uri-ccs (match-end 0)))
517 (t (cons ":character" uri-ccs))))
519 (defun concord-turtle-insert-relation-feature (char name value line-breaking
521 (insert (format " %s%s "
522 (chise-turtle-uri-encode-feature-name name)
524 (concord-turtle-insert-relations value readable)
528 (defun concord-turtle-insert-metadata (name value)
529 (let (col indent ret)
530 (insert (format "%-7s " name))
532 ((or (eq name :sources)
534 (setq col (current-column))
535 (setq indent (make-string col ?\ ))
536 (insert (format "chisebib:%s"
537 (chise-turtle-uri-encode-ccs-name (car value))))
538 (dolist (source (cdr value))
539 (insert (format " ,\n%schisebib:%s" indent
540 (chise-turtle-uri-encode-ccs-name source))))
542 ((eq name :references)
543 (setq ret (car value))
544 (setq ret (plist-get (nth 1 ret) :ref))
545 (setq col (current-column))
546 (setq indent (make-string col ?\ ))
547 (insert (format "<%s>" ret))
548 (dolist (refspec (cdr value))
549 (setq ret (plist-get (nth 1 refspec) :ref))
550 (insert (format " ,\n%s<%s>" indent ret)))
553 (insert (format "%S" value))
556 (defun concord-turtle-insert-radical (radical-number)
557 (insert (format " %3d ; # %c"
559 (ideographic-radical radical-number)))
562 (defun concord-turtle-insert-list (value &optional readable)
563 (let (lbs separator rest cell al cal key ret)
565 (setq lbs (concat "\n" (make-string (current-column) ?\ ))
568 (setq cell (car value))
569 (if (and (consp cell)
571 (setq ret (condition-case nil
579 (setq key (car (car rest)))
580 (if (find-charset key)
581 (setq cal (cons key cal))
582 (setq al (cons key al)))
583 (setq rest (cdr rest)))
586 (concord-turtle-insert-object-features ret
590 (setq separator lbs))
591 (setq ret (prin1-to-string cell))
593 (if (< (+ (current-column)
600 (setq separator " "))
601 (setq value (cdr value)))
605 (defun concord-turtle-insert-source-list (value &optional readable)
606 (let (lbs separator rest cell al cal key ret)
607 (setq lbs (concat " ,\n" (make-string (current-column) ?\ ))
610 (setq cell (car value))
611 (if (and (consp cell)
613 (setq ret (condition-case nil
621 (setq key (car (car rest)))
622 (if (find-charset key)
623 (setq cal (cons key cal))
624 (setq al (cons key al)))
625 (setq rest (cdr rest)))
628 (concord-turtle-insert-object-features ret
632 (setq separator lbs))
633 (setq ret (prin1-to-string cell))
635 (if (< (+ (current-column)
641 (if (string-match "=" ret)
642 (insert (format "%s:%s"
643 (substring ret 0 (match-beginning 0))
644 (substring ret (match-end 0))))
645 (insert (format "chisebib:%s" ret)))
646 (setq separator " , "))
647 (setq value (cdr value)))
651 (defun concord-turtle-insert-object (cell &optional readable)
653 (setq cell (decode-char '=ucs cell)))
656 (insert (format "%-20s" (chise-turtle-encode-char cell)))
658 ((concord-object-p cell)
659 (insert (format "%-20s" (concord-turtle-encode-object cell)))
662 (concord-turtle-insert-char-ref cell '<-formed)
665 (defun concord-turtle-insert-decomposition (value &optional readable)
666 (let ((lbs (concat "\n" (make-string (current-column) ?\ )))
668 (if (characterp value)
669 (setq value (list value)))
670 (if (setq base (pop value))
671 (cond ((setq vs (pop value))
673 (setq lb (concord-turtle-insert-object base readable))
677 (setq lb (concord-turtle-insert-object vs readable))
682 (setq lb (concord-turtle-insert-object base readable))
686 (defun concord-turtle-insert-relations (value &optional readable)
687 (let ((lbs (concat "\n" (make-string (current-column) ?\ )))
689 (if (characterp value)
690 (setq value (list value)))
692 (setq cell (car value))
693 ;; (if (integerp cell)
694 ;; (setq cell (decode-char '=ucs cell)))
697 (setq separator (format " ,%s" lbs)))
699 ;; ((characterp cell)
700 ;; (insert (format "%-20s" (chise-turtle-encode-char cell)))
702 ;; ((concord-object-p cell)
703 ;; (insert (format "%-20s" (concord-turtle-encode-object cell)))
706 ;; (concord-turtle-insert-char-ref cell '<-formed)))
707 (concord-turtle-insert-object cell readable)
708 (setq value (cdr value)))
711 (defun concord-turtle-insert-target-value (value feature-name-base &optional readable)
712 (cond ((eq feature-name-base 'ideographic-radical)
713 (concord-turtle-insert-radical value)
715 ((eq feature-name-base '=decomposition)
716 (concord-turtle-insert-decomposition value readable)
718 ((or (eq feature-name-base 'ideographic-combination)
719 (eq feature-name-base '<-formed)
720 (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name feature-name-base)))
721 (concord-turtle-insert-relations value readable)
723 ((eq feature-name-base 'comment)
724 (insert (format "%S" value))
726 ((eq feature-name-base 'sources)
727 (concord-turtle-insert-source-list value readable)
730 (concord-turtle-insert-list value readable)
733 (char-or-string-p value))
734 (insert (format " %-14s" (format "\"%s\"" value)))
737 (insert (format " %-14s" value))
740 (defun concord-turtle-insert-feature-value (value metadata domain feature-name-base)
741 (let (indent0 indent rest mdname mdval lb)
743 ((or metadata domain)
744 (setq indent0 (make-string (current-column) ?\ ))
746 (setq indent (make-string (current-column) ?\ ))
748 (insert (format ":context domain:%-7s ;"
749 (chise-turtle-uri-encode-ccs-name domain)))
752 (insert (format "\n%s" indent)))
753 (insert "rdf:value ")
754 (setq lb (concord-turtle-insert-target-value value feature-name-base))
757 (setq mdname (pop rest)
759 (insert (format " ;\n%s" indent))
760 (setq lb (concord-turtle-insert-metadata mdname mdval)))
762 (insert (format "\n%s] ;" indent0))
766 (concord-turtle-insert-target-value value feature-name-base)
769 (defun concord-turtle-insert-char-ref (char-ref feature-name-base)
770 (let (indent0 indent rest mdname mdval lb last-sep)
771 (setq indent0 (make-string (current-column) ?\ ))
773 (setq indent (make-string (current-column) ?\ ))
776 (setq mdname (pop rest)
779 (insert (format "%s\n%s"
786 (cond ((eq mdname :value)
787 (insert "rdf:value ")
788 (concord-turtle-insert-target-value mdval feature-name-base)
791 (concord-turtle-insert-metadata mdname mdval)))))
793 (insert (format "\n%s]" indent0))
797 (defun concord-turtle-insert-object-features (object
798 &optional readable attributes column
801 (setq column (current-column)))
802 (let ((est-coded-charset-priority-list est-coded-charset-priority-list)
803 (est-view-url-prefix "http://chise.org/est/view")
804 (obj-spec (sort (del-alist 'composition
805 (if (characterp object)
806 (char-attribute-alist object)
807 (concord-object-spec object)))
809 (char-attribute-name< (car a)(car b)))))
811 id obj-id type domain
813 name-base name-domain
815 (line-breaking (concat "\n" (make-string column ?\ )))
819 dest-ccss ; sources required-features
821 uri-ccs uri-cpos ccs-base children child-ccs-list col indent lb)
822 (setq line-separator line-breaking)
823 (setq id (concord-turtle-encode-object object))
824 (insert (format "%s" id))
827 (setq obj-id (file-name-nondirectory id))
828 (string-match ":" obj-id)
829 (setq uri-ccs (substring obj-id 0 (match-beginning 0))
830 uri-cpos (substring obj-id (match-end 0)))
831 (setq ret (assoc uri-ccs chise-turtle-ccs-prefix-alist))
832 (setq dest-ccss (list (cdr ret)))
833 (setq ret (chise-split-ccs-name (cdr ret)))
834 (setq ccs-base (car ret)
837 (insert (format "%s a chisegg:%s ;" line-separator type))
838 (insert (format "%s :%s-of" line-breaking type))
840 (insert (format " %s:%s ;"
841 (chise-turtle-uri-encode-ccs-name ccs-base) uri-cpos))
843 (setq col (current-column))
844 (insert (format ":context domain:%-7s ;\n%srdf:value %5s:%-7s ] ;"
845 (chise-turtle-uri-encode-ccs-name domain)
846 (make-string col ?\ )
847 (chise-turtle-uri-encode-ccs-name ccs-base) uri-cpos)))
849 (when (setq feature-pair (assq '<-subsumptive obj-spec))
850 (when (or readable (not for-sub-node))
851 (when (setq value (cdr feature-pair))
852 (insert line-separator)
853 (concord-turtle-insert-relation-feature object '<-subsumptive value
857 (setq obj-spec (delete feature-pair obj-spec))
859 (when (and (setq feature-pair (assq '<-denotational obj-spec))
860 (setq value (cdr feature-pair)))
861 (insert line-separator)
862 (concord-turtle-insert-relation-feature object '<-denotational value
865 (setq obj-spec (delete feature-pair obj-spec))
867 (when (and (setq feature-pair (assq '<-denotational@component obj-spec))
868 (setq value (cdr feature-pair)))
869 (insert line-separator)
870 (concord-turtle-insert-relation-feature
871 object '<-denotational@component value
874 (setq obj-spec (delete feature-pair obj-spec))
876 (when (and (setq feature-pair (assq 'name obj-spec))
877 (setq value (cdr feature-pair)))
878 (insert (format "%s " line-separator))
880 (if (> (+ (current-column) (length value)) 48)
884 (setq obj-spec (delete feature-pair obj-spec))
886 (when (and (setq feature-pair (assq 'name* obj-spec))
887 (setq value (cdr feature-pair)))
888 (insert (format "%s " line-separator))
890 (if (> (+ (current-column) (length value)) 48)
894 (setq obj-spec (delete feature-pair obj-spec))
896 (when (and (setq feature-pair (assq 'script obj-spec))
897 (setq value (cdr feature-pair)))
898 (insert (format "%s :script\t\t ( %s ) ;"
900 (mapconcat (lambda (cell)
901 (format "script:%s" cell))
903 (setq obj-spec (delete feature-pair obj-spec))
905 (when (and (setq feature-pair (assq '=>ucs obj-spec))
906 (setq value (cdr feature-pair)))
907 (insert (format "%s :to.ucs\t\t a.ucs:0x%04X ; # %c"
908 line-separator value (decode-char '=ucs value)))
909 (setq obj-spec (delete feature-pair obj-spec))
911 (when (and (setq feature-pair (assq '=>ucs* obj-spec))
912 (setq value (cdr feature-pair)))
913 (insert (format "%s :to.canonical-ucs\ta.ucs:0x%04X ; # %c"
914 line-separator value (decode-char '=ucs value)))
915 (setq obj-spec (delete feature-pair obj-spec))
917 (dolist (name '(=>ucs@gb =>ucs@big5))
918 (when (and (setq feature-pair (assq name obj-spec))
919 (setq value (cdr feature-pair)))
920 (insert line-separator)
921 (insert (format " \"%-20s\": #x%04X,\t\"_comment\": \"%c\"%s"
926 (symbol-name name) 2)))
929 (setq obj-spec (delete feature-pair obj-spec))
931 (when (and (setq feature-pair (assq 'general-category obj-spec))
932 (setq value (cdr feature-pair)))
933 (insert (format "%s :general-category \"%s\" ; # %s"
935 (cond ((rassoc value unidata-normative-category-alist)
936 "Normative Category")
937 ((rassoc value unidata-informative-category-alist)
938 "Informative Category")
940 "Unknown Category"))))
941 (setq obj-spec (delete feature-pair obj-spec))
943 (when (and (setq feature-pair (assq 'bidi-category obj-spec))
944 (setq value (cdr feature-pair)))
945 (insert (format "%s :bidi-category %S ;"
948 (setq obj-spec (delete feature-pair obj-spec))
950 (when (and (setq feature-pair (assq 'mirrored obj-spec))
951 (setq value (cdr feature-pair)))
952 (insert (format "%s :mirrored \"%s\" ;"
955 (setq obj-spec (delete feature-pair obj-spec))
958 ((and (and (setq feature-pair (assq 'decimal-digit-value obj-spec))
959 (setq value (cdr feature-pair))))
960 (insert (format "%s :decimal-digit-value %2d ;"
961 line-separator value))
962 (setq obj-spec (delete feature-pair obj-spec))
963 (when (and (setq feature-pair (assq 'digit-value obj-spec))
964 (setq value (cdr feature-pair)))
965 (insert (format "%s :digit-value\t %2d ;"
966 line-separator value))
967 (setq obj-spec (delete feature-pair obj-spec))
969 (when (and (setq feature-pair (assq 'numeric-value obj-spec))
970 (setq value (cdr feature-pair)))
971 (insert (format "%s :numeric-value\t %2d ;"
972 line-separator value))
973 (setq obj-spec (delete feature-pair obj-spec))
977 (when (and (setq feature-pair (assq 'digit-value obj-spec))
978 (setq value (cdr feature-pair)))
979 (insert line-separator)
980 (insert (format "%s :digit-value\t %2d ;"
981 line-separator value))
982 (setq obj-spec (delete feature-pair obj-spec))
984 (when (and (setq feature-pair (assq 'numeric-value obj-spec))
985 (setq value (cdr feature-pair)))
986 (insert line-separator)
987 (insert (format "%s :numeric-value\t %2d ;"
988 line-separator value))
989 (setq obj-spec (delete feature-pair obj-spec))
991 (when (and (setq feature-pair (assq 'iso-10646-comment obj-spec))
992 (setq value (cdr feature-pair)))
993 (insert line-separator)
994 (insert (format "{\"iso-10646-comment\":\t %S}%s"
997 (setq obj-spec (delete feature-pair obj-spec))
999 (when (and (setq feature-pair (assq 'morohashi-daikanwa obj-spec))
1000 (setq value (cdr feature-pair)))
1001 (insert line-separator)
1002 (insert (format "%s :morohashi-daikanwa\t %S ;"
1003 line-separator value))
1004 (setq obj-spec (delete feature-pair obj-spec))
1008 (when (and (setq feature-pair (assq 'ideographic-radical obj-spec))
1009 (setq value (cdr feature-pair)))
1010 (setq radical value)
1011 (insert (format "%s ideo:radical %3d ; # %c "
1014 (ideographic-radical radical)
1016 (setq obj-spec (delete feature-pair obj-spec))
1018 (when (and (setq feature-pair (assq 'shuowen-radical obj-spec))
1019 (setq value (cdr feature-pair)))
1020 (insert line-separator)
1021 (insert (format " \"shuowen-radical\":\t %S,\t\"_comment\": \"%c\""
1023 (shuowen-radical value)))
1024 (setq obj-spec (delete feature-pair obj-spec))
1027 (dolist (domain chise-turtle-feature-domains)
1028 (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
1029 (when (and (setq feature-pair (assq key obj-spec))
1030 (setq value (cdr feature-pair)))
1031 (setq radical value)
1032 (insert (format "%s ideo:radical [ "
1034 (setq col (current-column))
1035 (setq indent (make-string col ?\ ))
1036 (insert (format ":context domain:%-7s ;\n%srdf:value "
1037 (chise-turtle-uri-encode-ccs-name domain)
1039 (setq lb (concord-turtle-insert-radical radical))
1040 (setq obj-spec (delete feature-pair obj-spec))
1041 (setq skey (intern (format "%s*sources" key)))
1042 (when (and (setq feature-pair (assq skey obj-spec))
1043 (setq value (cdr feature-pair)))
1044 (insert (format "\n%s" indent))
1045 (setq lb (concord-turtle-insert-metadata :sources value))
1046 ;; (insert (format " ;\n%s:sources (" indent))
1047 ;; (setq col (current-column))
1048 ;; (setq indent (make-string col ?\ ))
1049 ;; (insert (format " chisebib:%s" (car value)))
1050 ;; (dolist (cell (cdr value))
1051 ;; (insert (format "\n%s chisebib:%s" indent cell)))
1054 (setq obj-spec (delete feature-pair obj-spec))
1056 (insert (format "\n%s] ;" (make-string (- col 2) ?\ )))
1059 (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
1060 (when (and (setq feature-pair (assq key obj-spec))
1061 (setq value (cdr feature-pair)))
1062 (setq strokes value)
1063 (insert (format "%s ideo:strokes [ "
1065 (setq col (current-column))
1066 (setq indent (make-string col ?\ ))
1067 (insert (format ":context domain:%-7s ;\n%srdf:value %S"
1068 (chise-turtle-uri-encode-ccs-name domain)
1070 (setq obj-spec (delete feature-pair obj-spec))
1071 (setq skey (intern (format "%s*sources" key)))
1072 (when (and (setq feature-pair (assq skey obj-spec))
1073 (setq value (cdr feature-pair)))
1074 (insert (format " ;\n%s" indent))
1075 (concord-turtle-insert-metadata :sources value)
1076 ;; (insert (format " ;\n%s:sources (" indent))
1077 ;; (setq col (current-column))
1078 ;; (setq indent (make-string col ?\ ))
1079 ;; (insert (format " chisebib:%s" (car value)))
1080 ;; (dolist (cell (cdr value))
1081 ;; (insert (format "\n%s chisebib:%s" indent cell)))
1084 (setq obj-spec (delete feature-pair obj-spec))
1087 (setq key (intern (format "%s@%s" 'total-strokes domain)))
1088 (when (and (setq feature-pair (assq key obj-spec))
1089 (setq value (cdr feature-pair)))
1090 (insert (format "%s ideo:total-strokes [ "
1092 (setq col (current-column))
1093 (insert (format ":context domain:%-7s ;\n%srdf:value %S"
1094 (chise-turtle-uri-encode-ccs-name domain)
1095 (make-string col ?\ )
1097 (setq obj-spec (delete feature-pair obj-spec))
1098 (setq skey (intern (format "%s*sources" key)))
1099 (when (and (setq feature-pair (assq skey obj-spec))
1100 (setq value (cdr feature-pair)))
1101 (insert (format " ;\n%s" indent))
1102 (concord-turtle-insert-metadata :sources value)
1103 ;; (insert (format " ;\n%s:sources (" indent))
1104 ;; (setq col (current-column))
1105 ;; (setq indent (make-string col ?\ ))
1106 ;; (insert (format " chisebib:%s" (car value)))
1107 ;; (dolist (cell (cdr value))
1108 ;; (insert (format "\n%s chisebib:%s" indent cell)))
1111 (setq obj-spec (delete feature-pair obj-spec))
1114 ;; (dolist (feature '(ideographic-radical
1115 ;; ideographic-strokes
1117 ;; (setq key (intern (format "%s@%s*sources" feature domain)))
1118 ;; (when (and (setq feature-pair (assq key obj-spec))
1119 ;; (setq value (cdr feature-pair)))
1120 ;; (insert line-separator)
1121 ;; (insert (format " \"%s\":%s" key line-breaking))
1122 ;; (dolist (cell value)
1123 ;; (insert (format " %s" cell)))
1124 ;; (setq obj-spec (delete feature-pair obj-spec))
1127 (when (and (setq feature-pair (assq 'ideographic-strokes obj-spec))
1128 (setq value (cdr feature-pair)))
1129 (setq strokes value)
1130 (insert (format "%s ideo:strokes %2d ;"
1131 line-separator strokes))
1132 (setq obj-spec (delete feature-pair obj-spec))
1134 (when (and (setq feature-pair (assq 'total-strokes obj-spec))
1135 (setq value (cdr feature-pair)))
1136 (insert (format "%s ideo:total-strokes %2d ;"
1137 line-separator value))
1138 (setq obj-spec (delete feature-pair obj-spec))
1140 ;; (if (equal (get-char-attribute char '->titlecase)
1141 ;; (get-char-attribute char '->uppercase))
1142 ;; (setq attributes (delq '->titlecase attributes)))
1144 ;; (dolist (ignored '(composition
1145 ;; ->denotational <-subsumptive ->ucs-unified
1146 ;; ->ideographic-component-forms))
1147 ;; (setq attributes (delq ignored attributes))))
1149 (setq name (car (car obj-spec)))
1150 (setq ret (chise-split-feature-name name))
1151 (setq name-base (car ret)
1152 name-domain (nth 1 ret))
1153 (when (setq value (chise-get-char-attribute-with-metadata
1154 obj-spec name-base name-domain))
1155 (setq metadata (nth 1 value)
1157 (cond ((setq ret (find-charset name))
1158 (setq name (charset-name ret))
1159 (when (not (memq name dest-ccss))
1160 (setq dest-ccss (cons name dest-ccss))
1162 (insert (format "%s :%-25s rdf:nil ;" line-separator
1163 (chise-turtle-uri-encode-ccs-name name)))
1164 (setq ret (chise-turtle-format-ccs-code-point name value))
1165 (insert (format "%s :eq %-25s ; # %c" line-separator
1167 (char-db-decode-isolated-char name value)))
1168 (setq eq-cpos-list (cons (list ret name value) eq-cpos-list))))
1170 (setq ret (if (eq name '=ucs)
1171 (if (< value #x10000)
1174 (intern (format "=%s" name)))))
1175 (setq child-ccs-list (cons ret child-ccs-list)))
1179 (not (eq name '->subsumptive))
1180 (not (eq name '->uppercase))
1181 (not (eq name '->lowercase))
1182 (not (eq name '->titlecase))
1183 (not (eq name '->canonical))
1184 (not (eq name '->Bopomofo))
1185 (not (eq name '->mistakable))
1186 (not (eq name '->ideographic-variants))
1187 (or (eq name '<-identical)
1188 (eq name '<-uppercase)
1189 (eq name '<-lowercase)
1190 (eq name '<-titlecase)
1191 (eq name '<-canonical)
1192 (eq name '<-ideographic-variants)
1193 ;; (eq name '<-synonyms)
1194 (string-match "^<-synonyms" (symbol-name name))
1195 (eq name '<-mistakable)
1196 (when (string-match "^->" (symbol-name name))
1198 ((string-match "^->fullwidth" (symbol-name name))
1199 (not (and (consp value)
1200 (characterp (car value))
1202 (car value) '=ucs 'defined-only)))
1207 ((eq name 'ideographic-structure)
1208 (insert (isd-turtle-format-char nil nil value (/ column 4)
1209 'isd 'without-head-char))
1212 ((eq name '->subsumptive)
1213 (insert line-separator)
1214 (concord-turtle-insert-relation-feature object name value
1217 (setq children value)
1219 ((eq name 'character)
1220 (insert line-separator)
1221 (concord-turtle-insert-relation-feature object name value
1224 ;; (setq children value)
1227 (insert (format "%s %-20s "
1229 (chise-turtle-uri-encode-feature-name name-base)))
1230 (unless (concord-turtle-insert-feature-value
1231 value metadata name-domain name-base)
1235 (setq obj-spec (cdr obj-spec)))
1236 (insert (format "%s ." line-breaking))
1237 (dolist (eq-cpos (nreverse eq-cpos-list))
1238 (setq ret (chise-split-ccs-name (nth 1 eq-cpos)))
1239 (insert (format "%s %s" line-breaking
1241 (insert (format "%s %s" line-breaking
1242 (format ":%s-of" (nth 1 ret))))
1243 (if (null (nth 2 ret))
1244 (insert (format " %14s:%-7s ."
1245 (chise-turtle-uri-encode-ccs-name (car ret))
1246 (nth 1 (split-string (car eq-cpos) ":"))))
1248 (setq col (current-column))
1249 (insert (format ":context domain:%-7s ;\n%srdf:value %5s:%-7s ] ."
1250 (chise-turtle-uri-encode-ccs-name (nth 2 ret))
1251 (make-string col ?\ )
1252 (chise-turtle-uri-encode-ccs-name (car ret))
1253 (nth 1 (split-string (car eq-cpos) ":"))))))
1254 (setq est-coded-charset-priority-list
1255 (append est-coded-charset-priority-list
1256 (nreverse child-ccs-list)))
1258 (dolist (child children)
1259 (insert (format "%s " line-breaking))
1260 (concord-turtle-insert-object-features child nil nil nil 'for-sub-node)))
1263 (defun concord-turtle-insert-char-data (char &optional readable attributes)
1265 (narrow-to-region (point)(point))
1266 (concord-turtle-insert-object-features char readable attributes)
1270 (defun concord-turtle-insert-prefix ()
1271 (let (base-ccs-list ret)
1272 (insert "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
1273 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
1274 @prefix : <http://rdf.chise.org/rdf/property/character/main/> .
1275 @prefix ideo: <http://rdf.chise.org/rdf/property/character/ideo/> .
1276 @prefix isd: <http://rdf.chise.org/rdf/property/character/isd/> .
1277 @prefix idc: <http://rdf.chise.org/rdf/type/character/idc/> .
1278 @prefix chisegg: <http://rdf.chise.org/rdf/type/character/ggg/> .
1279 @prefix domain: <http://rdf.chise.org/data/domain/> .
1280 @prefix script: <http://rdf.chise.org/data/script/> .
1281 @prefix ideocomb: <http://rdf.chise.org/data/character/ideo/combination/> .
1282 @prefix chisebib: <http://rdf.chise.org/data/bibliography/> .
1283 @prefix ruimoku: <http://www.chise.org/est/view/article@ruimoku/rep.id=/> .
1284 @prefix zob1959: <http://chise.zinbun.kyoto-u.ac.jp/koukotsu/rubbings/> .
1287 (dolist (cell (sort chise-turtle-ccs-prefix-alist
1289 (char-attribute-name< (cdr a)(cdr b)))))
1290 (insert (format "@prefix %s: <%s/%s=> .\n"
1292 "http://www.chise.org/est/view/character"
1293 (www-uri-encode-feature-name (cdr cell))))
1294 (setq ret (chise-split-ccs-name (cdr cell)))
1295 (unless (memq (car ret) base-ccs-list)
1296 (setq base-ccs-list (cons (car ret) base-ccs-list))))
1298 (dolist (base-ccs (nreverse base-ccs-list))
1299 (insert (format "@prefix %s: <%s/%s/code-point/> .\n"
1300 (chise-turtle-uri-encode-ccs-name base-ccs)
1301 "http://rdf.chise.org/data/ccs"
1302 (www-uri-encode-feature-name base-ccs))))))
1304 (defun concord-turtle-insert-ideograph-radical-char-data (radical)
1306 (sort (copy-list (aref ideograph-radical-chars-vector radical))
1308 (ideograph-char< a b radical))))
1310 (dolist (name (char-attribute-list))
1311 (unless (memq name char-db-ignored-attributes)
1312 (push name attributes)
1314 (setq attributes (sort attributes #'char-attribute-name<))
1315 (aset ideograph-radical-chars-vector radical chars)
1316 (dolist (char chars)
1317 (when (not (some (lambda (atr)
1318 (get-char-attribute char atr))
1319 char-db-ignored-attributes))
1320 (concord-turtle-insert-char-data char nil attributes)))
1323 (defun char-db-turtle-write-ideograph-radical-char-data (radical file)
1324 (if (file-directory-p file)
1325 (let ((name (char-feature (decode-char 'ucs (+ #x2EFF radical))
1327 (if (string-match "KANGXI RADICAL " name)
1328 (setq name (capitalize (substring name (match-end 0)))))
1329 (setq name (mapconcat (lambda (char)
1332 (char-to-string char))) name ""))
1335 (format "Ideograph-R%03d-%s.ttl" radical name)
1337 (let (chise-turtle-ccs-prefix-alist)
1339 (concord-turtle-insert-ideograph-radical-char-data radical)
1340 (goto-char (point-min))
1341 (concord-turtle-insert-prefix)
1343 (goto-char (point-min))
1344 (insert (format "# -*- coding: %s -*-\n"
1345 char-db-file-coding-system))
1346 (let ((coding-system-for-write char-db-file-coding-system))
1347 (write-region (point-min)(point-max) file)))))
1353 (provide 'concord-turtle-dump)
1355 ;;; concord-turtle-dump.el ends here