1 ;;; char-db-turtle.el --- Character Database utility -*- coding: utf-8-er; -*-
3 ;; Copyright (C) 2017, 2018 MORIOKA Tomohiko.
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: CHISE, Character Database, RDF, Turtle, ISO/IEC 10646, UCS, Unicode, MULE.
8 ;; This file is part of CHISET (CHISE/Turtle).
10 ;; XEmacs CHISE is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
15 ;; XEmacs CHISE is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with XEmacs CHISE; see the file COPYING. If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 (require 'char-db-util)
28 (require 'cwiki-common)
30 (require 'ideograph-util)
32 (setq est-coded-charset-priority-list
43 =jis-x0208 =jis-x0208@1990
45 =jis-x0213-1@2000 =jis-x0213-1@2004
71 =cns11643-1 =cns11643-2 =cns11643-3
72 =cns11643-4 =cns11643-5 =cns11643-6 =cns11643-7
82 =zinbun-oracle =>zinbun-oracle
87 =+>ucs@iso =+>ucs@unicode
93 =>jis-x0208 =>jis-x0213-1
101 =>ucs@jis =>ucs@cns =>ucs@ks
104 =>>ucs@iso =>>ucs@unicode
105 =>>ucs@jis =>>ucs@cns =>>ucs@ks
124 ==cns11643-1 ==cns11643-2 ==cns11643-3
125 ==cns11643-4 ==cns11643-5 ==cns11643-6 ==cns11643-7
140 =>>jis-x0208 =>>jis-x0213-1 =>>jis-x0213-2
141 =+>jis-x0208 =+>jis-x0213-1 =+>jis-x0213-2
147 =jis-x0208@1983 =jis-x0208@1978
155 =>ucs-itaiji-001@iwds-1
181 (defvar chise-turtle-ccs-prefix-alist nil)
183 (defun charset-code-point-format-spec (ccs)
184 (cond ((memq ccs '(=ucs))
187 (let ((ccs-name (symbol-name ccs)))
190 "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)"
194 "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)"
197 ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name)
199 ((string-match "hanyo-denshi/tk" ccs-name)
204 (defun chise-turtle-uri-decode-feature-name (uri-feature)
205 (cond ((string= "a.ucs" uri-feature)
207 ((string= "a.big5" uri-feature)
210 (www-uri-decode-feature-name uri-feature))))
212 (defun chise-turtle-uri-encode-ccs-name (feature-name)
214 ((eq '=ucs feature-name)
216 ((eq '=big5 feature-name)
218 ((eq '==>ucs@bucs feature-name)
221 (mapconcat (lambda (c)
232 (char-to-string c))))
233 (www-uri-encode-feature-name feature-name)
236 (defun chise-turtle-uri-encode-feature-name (feature-name)
238 ((eq '->subsumptive feature-name)
240 ((eq '<-denotational feature-name)
242 ((eq '<-formed feature-name)
244 ((eq '<-same feature-name)
246 ((eq '<-simplified feature-name)
247 "ideo:simplified-form-of")
248 ((eq '<-vulgar feature-name)
249 "ideo:vulgar-form-of")
250 ((eq '<-wrong feature-name)
251 "ideo:wrong-form-of")
252 ((eq '<-original feature-name)
253 "ideo:original-form-of")
254 ((eq '<-ancient feature-name)
255 "ideo:ancient-form-of")
256 ((eq '<-Small-Seal feature-name)
257 "ideo:Small-Seal-of")
258 ((eq '<-interchangeable feature-name)
259 "ideo:interchangeable-form-of")
260 ((eq '->interchangeable feature-name)
261 "ideo:interchangeable")
262 ((eq '->mistakable feature-name)
264 ((eq 'hanyu-dazidian feature-name)
265 "ideo:hanyu-dazidian")
267 (concat ":" (chise-turtle-uri-encode-ccs-name feature-name)))))
269 (defun chise-turtle-format-ccs-code-point (ccs code-point)
270 (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs)))
271 (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist)
272 (setq chise-turtle-ccs-prefix-alist
273 (cons (cons ccs-uri ccs)
274 chise-turtle-ccs-prefix-alist)))
277 (format (charset-code-point-format-spec ccs)
280 (defun chise-turtle-encode-char (object)
281 (let ((ccs-list est-coded-charset-priority-list)
283 (if (setq ret (encode-char object '=ucs))
284 (chise-turtle-format-ccs-code-point '=ucs ret)
286 (setq ccs (pop ccs-list))
287 (not (setq ret (encode-char object ccs 'defined-only)))))
289 (chise-turtle-format-ccs-code-point ccs ret)
291 ((and (setq ccs (car (split-char object)))
292 (setq ret (encode-char object ccs)))
293 (chise-turtle-format-ccs-code-point ccs ret)
295 ((setq ret (get-char-attribute object 'ideographic-combination))
296 (format "ideocomb:%s"
297 (mapconcat (lambda (cell)
298 (cond ((characterp cell)
299 (char-to-string cell)
301 ((setq ret2 (find-char cell))
302 (char-to-string ret2)
310 (format (if est-hide-cgi-mode
311 "system-char-id=0x%X"
312 "system-char-id:0x%X")
313 (encode-char object 'system-char-id))
316 (defun chise-split-feature-name (feature-name)
317 (let (base domain number meta)
318 (setq feature-name (symbol-name feature-name))
319 (if (string-match ".\\*." feature-name)
321 (format ":%s" (substring feature-name (1- (match-end 0)))))
322 feature-name (substring feature-name 0 (1+ (match-beginning 0)))))
323 (if (string-match "\\$_\\([0-9]+\\)$" feature-name)
324 (setq number (car (read-from-string (match-string 1 feature-name)))
325 feature-name (substring feature-name 0 (match-beginning 0))))
326 (if (string-match "@" feature-name)
327 (setq domain (car (read-from-string (substring feature-name (match-end 0))))
328 base (intern (substring feature-name 0 (match-beginning 0))))
329 (setq base (intern feature-name)))
330 (list base domain number meta)))
332 (defun chise-compose-feature-name (base domain number meta)
333 (let ((name (if domain
334 (format "%s@%s" base domain)
335 (symbol-name base))))
337 (setq name (format "%s$_%d" name number)))
339 (setq name (format "%s*%s" name
340 (substring (symbol-name meta) 1))))
343 (defvar chise-feature-name-base-metadata-alist nil)
345 (defun chise-update-feature-name-base-metadata-alist ()
347 (let (base domain number metadata
348 bcell dcell ncell ret)
349 (setq chise-feature-name-base-metadata-alist nil)
350 (dolist (fname (sort (char-attribute-list)
351 #'char-attribute-name<))
352 (setq ret (chise-split-feature-name fname)
356 metadata (nth 3 ret))
358 (if (setq bcell (assq base chise-feature-name-base-metadata-alist))
359 (if (setq dcell (assq domain (cdr bcell)))
360 (if (setq ncell (assq number (cdr dcell)))
361 (unless (memq metadata (cdr ncell))
362 (setcdr ncell (nconc (cdr ncell)
364 (setcdr dcell (cons (list number metadata)
366 (setcdr bcell (cons (list domain (list number metadata))
368 (setq chise-feature-name-base-metadata-alist
369 (cons (list base (list domain (list number metadata)))
370 chise-feature-name-base-metadata-alist))
373 (chise-update-feature-name-base-metadata-alist)
375 (defun chise-get-char-attribute-with-metadata (character feature-name-base domain)
376 (let ((value (get-char-attribute
378 (chise-compose-feature-name feature-name-base domain nil nil)
379 '*feature-value-is-empty*))
381 base-metadata metadata
383 (unless (eq value '*feature-value-is-empty*)
385 ((and (setq ret (assq feature-name-base
386 chise-feature-name-base-metadata-alist))
387 (setq dcell (assq domain (cdr ret))))
388 (if (setq ret (assq nil (cdr dcell)))
389 (dolist (bmn (cdr ret))
390 (when (setq m (get-char-attribute
392 (chise-compose-feature-name
393 feature-name-base domain nil bmn)))
395 (list* bmn m base-metadata)))))
401 ((setq ret (assq i (cdr dcell)))
403 (dolist (mn (cdr ret))
404 (when (setq m (get-char-attribute
406 (chise-compose-feature-name
407 feature-name-base domain i mn)))
408 (setq metadata (list* mn m metadata))))
410 (list* :target (car rest) metadata)
417 (list (nconc (nreverse dest) rest)
420 (t (list value nil)))
423 (defun chise-split-ccs-name (ccs)
424 (cond ((eq ccs '=ucs)
425 '(ucs abstract-character nil)
428 '(big5 abstract-character nil)
431 (setq ccs (symbol-name ccs))
433 (if (string-match "^\\(=[=+>]*\\)\\([^=>@*]+\\)@?" ccs)
434 (list (intern (match-string 2 ccs))
435 (chise-decode-ccs-prefix (match-string 1 ccs))
436 (if (string= (setq ret (substring ccs (match-end 0))) "")
441 (defun chise-decode-ccs-prefix (ccs)
442 (or (cdr (assoc ccs '(("==>" . super-abstract-character)
443 ("=>" . abstract-character)
444 ("=+>" . unified-glyph)
445 ("=" . abstract-glyph)
446 ("=>>" . detailed-glyph)
447 ("==" . abstract-glyph-form)
448 ("===" . glyph-image))))
451 (defun chise-turtle-uri-split-ccs (uri-ccs)
453 ((string-match "^a2\\." uri-ccs)
454 (cons ":super-abstract-character"
455 (substring uri-ccs (match-end 0)))
457 ((string-match "^a\\." uri-ccs)
458 (cons ":abstract-character"
459 (substring uri-ccs (match-end 0)))
461 ((string-match "^o\\." uri-ccs)
462 (cons ":unified-glyph"
463 (substring uri-ccs (match-end 0)))
465 ((string-match "^rep\\." uri-ccs)
466 (cons ":abstract-glyph"
467 (substring uri-ccs (match-end 0)))
469 ((string-match "^g\\." uri-ccs)
470 (cons ":detailed-glyph"
471 (substring uri-ccs (match-end 0)))
473 ((string-match "^g2\\." uri-ccs)
474 (cons ":abstract-glyph-form"
475 (substring uri-ccs (match-end 0)))
477 ((string-match "^gi\\." uri-ccs)
478 (cons ":abstract-glyph-form"
479 (substring uri-ccs (match-end 0)))
481 ((string-match "^repi\\." uri-ccs)
483 (substring uri-ccs (match-end 0)))
485 (t (cons ":character" uri-ccs))))
487 (defun char-db-turtle-insert-relation-feature (char name value line-breaking
489 (insert (format " %s%s "
490 (chise-turtle-uri-encode-feature-name name)
492 (char-db-turtle-insert-relations value readable)
496 (defun char-db-turtle-insert-metadata (name value)
497 (let (col indent ret)
498 (insert (format "%-7s " name))
500 ((or (eq name :sources)
502 (setq col (current-column))
503 (setq indent (make-string col ?\ ))
504 (insert (format "chisebib:%s"
505 (chise-turtle-uri-encode-ccs-name (car value))))
506 (dolist (source (cdr value))
507 (insert (format " ,\n%schisebib:%s" indent
508 (chise-turtle-uri-encode-ccs-name source))))
510 ((eq name :references)
511 (setq ret (car value))
512 (setq ret (plist-get (nth 1 ret) :ref))
513 (setq col (current-column))
514 (setq indent (make-string col ?\ ))
515 (insert (format "<%s>" ret))
516 (dolist (refspec (cdr value))
517 (setq ret (plist-get (nth 1 refspec) :ref))
518 (insert (format " ,\n%s<%s>" indent ret)))
521 (insert (format "%S" value))
524 (defun char-db-turtle-insert-radical (radical-number)
525 (insert (format " %3d ; # %c"
527 (ideographic-radical radical-number)))
530 (defun char-db-turtle-insert-list (value &optional readable)
531 (let (lbs separator rest cell al cal key ret)
533 (setq lbs (concat "\n" (make-string (current-column) ?\ ))
536 (setq cell (car value))
537 (if (and (consp cell)
539 (setq ret (condition-case nil
547 (setq key (car (car rest)))
548 (if (find-charset key)
549 (setq cal (cons key cal))
550 (setq al (cons key al)))
551 (setq rest (cdr rest)))
554 (char-db-turtle-insert-char-features ret
558 (setq separator lbs))
559 (setq ret (prin1-to-string cell))
561 (if (< (+ (current-column)
568 (setq separator " "))
569 (setq value (cdr value)))
573 (defun char-db-turtle-insert-source-list (value &optional readable)
574 (let (lbs separator rest cell al cal key ret)
575 (setq lbs (concat " ,\n" (make-string (current-column) ?\ ))
578 (setq cell (car value))
579 (if (and (consp cell)
581 (setq ret (condition-case nil
589 (setq key (car (car rest)))
590 (if (find-charset key)
591 (setq cal (cons key cal))
592 (setq al (cons key al)))
593 (setq rest (cdr rest)))
596 (char-db-turtle-insert-char-features ret
600 (setq separator lbs))
601 (setq ret (prin1-to-string cell))
603 (if (< (+ (current-column)
609 (if (string-match "=" ret)
610 (insert (format "%s:%s"
611 (substring ret 0 (match-beginning 0))
612 (substring ret (match-end 0))))
613 (insert (format "chisebib:%s" ret)))
614 (setq separator " , "))
615 (setq value (cdr value)))
619 (defun char-db-turtle-insert-relations (value &optional readable)
620 (let ((lbs (concat "\n" (make-string (current-column) ?\ )))
622 (if (characterp value)
623 (setq value (list value)))
625 (setq cell (car value))
627 (setq cell (decode-char '=ucs cell)))
630 (setq separator (format " ,%s" lbs)))
631 (if (characterp cell)
632 (insert (format "%-20s" (chise-turtle-encode-char cell)))
633 (char-db-turtle-insert-char-ref cell '<-formed))
634 (setq value (cdr value)))
637 (defun char-db-turtle-insert-target-value (value feature-name-base &optional readable)
638 (cond ((eq feature-name-base 'ideographic-radical)
639 (char-db-turtle-insert-radical value)
641 ((or (eq feature-name-base 'ideographic-combination)
642 (eq feature-name-base '=decomposition)
643 (eq feature-name-base '<-formed)
644 (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name feature-name-base)))
645 (char-db-turtle-insert-relations value readable)
647 ((eq feature-name-base 'comment)
648 (insert (format "%S" value))
650 ((eq feature-name-base 'sources)
651 (char-db-turtle-insert-source-list value readable)
654 (char-db-turtle-insert-list value readable)
657 (insert (format " %-14s" value))
660 (defun char-db-turtle-insert-feature-value (value metadata domain feature-name-base)
661 (let (indent0 indent rest mdname mdval lb)
663 ((or metadata domain)
664 (setq indent0 (make-string (current-column) ?\ ))
666 (setq indent (make-string (current-column) ?\ ))
668 (insert (format ":context domain:%-7s ;"
669 (chise-turtle-uri-encode-ccs-name domain)))
673 (setq mdname (pop rest)
676 (insert (format "\n%s" indent))
678 (unless (char-db-turtle-insert-metadata mdname mdval)
681 (insert (format "\n%s" indent)))
683 (if (char-db-turtle-insert-target-value value feature-name-base)
684 (insert (format "\n%s] ;" indent0))
688 (char-db-turtle-insert-target-value value feature-name-base)
691 (defun char-db-turtle-insert-char-ref (char-ref feature-name-base)
692 (let (indent0 indent rest mdname mdval lb last-sep)
693 (setq indent0 (make-string (current-column) ?\ ))
695 (setq indent (make-string (current-column) ?\ ))
698 (setq mdname (pop rest)
701 (insert (format "%s\n%s"
708 (cond ((eq mdname :target)
710 (char-db-turtle-insert-target-value mdval feature-name-base)
713 (char-db-turtle-insert-metadata mdname mdval)))))
715 (insert (format "\n%s]" indent0))
719 (defun char-db-turtle-insert-char-features (char
720 &optional readable attributes column
723 (setq column (current-column)))
724 (let ((est-coded-charset-priority-list est-coded-charset-priority-list)
725 (est-view-url-prefix "http://chise.org/est/view")
726 id obj-id type domain
728 name-base name-domain
730 (line-breaking (concat "\n" (make-string column ?\ )))
734 dest-ccss ; sources required-features
736 uri-ccs uri-cpos ccs-base children child-ccs-list col indent)
740 (if (consp attributes)
742 (dolist (name attributes)
743 (unless (or (memq name char-db-ignored-attributes)
744 (string-match "\\*" (symbol-name name)))
745 (if (find-charset name)
749 (dolist (name (char-attribute-list))
750 (unless (or (memq name char-db-ignored-attributes)
751 (string-match "\\*" (symbol-name name)))
752 (if (find-charset name)
756 #'char-attribute-name<)))
757 (setq line-separator line-breaking)
758 (setq id (chise-turtle-encode-char char))
759 (setq obj-id (file-name-nondirectory id))
760 (string-match ":" obj-id)
761 (setq uri-ccs (substring obj-id 0 (match-beginning 0))
762 uri-cpos (substring obj-id (match-end 0)))
763 (insert (format "%s" obj-id))
764 (setq ret (assoc uri-ccs chise-turtle-ccs-prefix-alist))
765 (setq dest-ccss (list (cdr ret)))
766 (setq ret (chise-split-ccs-name (cdr ret)))
767 (setq ccs-base (car ret)
770 (insert (format "%s a chisegg:%s ;" line-separator type))
771 (insert (format "%s :%s-of" line-breaking type))
773 (insert (format " %s:%s ;"
774 (chise-turtle-uri-encode-ccs-name ccs-base) uri-cpos))
776 (setq col (current-column))
777 (insert (format ":context domain:%-7s ;\n%s:target %7s:%-7s ] ;"
778 (chise-turtle-uri-encode-ccs-name domain)
779 (make-string col ?\ )
780 (chise-turtle-uri-encode-ccs-name ccs-base) uri-cpos)))
781 (when (memq '<-subsumptive attributes)
782 (when (or readable (not for-sub-node))
783 (when (setq value (get-char-attribute char '<-subsumptive))
784 (insert line-separator)
785 (char-db-turtle-insert-relation-feature char '<-subsumptive value
789 (setq attributes (delq '<-subsumptive attributes))
791 (when (and (memq '<-denotational attributes)
792 (setq value (get-char-attribute char '<-denotational)))
793 (insert line-separator)
794 (char-db-turtle-insert-relation-feature char '<-denotational value
797 (setq attributes (delq '<-denotational attributes)))
798 (when (and (memq '<-denotational@component attributes)
800 (get-char-attribute char '<-denotational@component)))
801 (insert line-separator)
802 (char-db-turtle-insert-relation-feature
803 char '<-denotational@component value
806 (setq attributes (delq '<-denotational@component attributes)))
807 (when (and (memq 'name attributes)
808 (setq value (get-char-attribute char 'name)))
809 (insert (format "%s " line-separator))
811 (if (> (+ (current-column) (length value)) 48)
815 (setq attributes (delq 'name attributes))
817 (when (and (memq 'name* attributes)
818 (setq value (get-char-attribute char 'name*)))
819 (insert line-separator)
821 (if (> (+ (current-column) (length value)) 48)
825 (setq attributes (delq 'name* attributes))
827 (when (and (memq 'script attributes)
828 (setq value (get-char-attribute char 'script)))
829 (insert (format "%s :script\t\t ( %s ) ;"
831 (mapconcat (lambda (cell)
832 (format "script:%s" cell))
834 (setq attributes (delq 'script attributes))
836 ;; (dolist (name '(=>ucs =>ucs*))
837 ;; (when (and (memq name attributes)
838 ;; (setq value (get-char-attribute char name)))
839 ;; (insert line-separator)
840 ;; (insert (format " \"%-20s\": #x%04X,\t\"_comment\": \"%c\""
841 ;; name value (decode-char '=ucs value)))
842 ;; (setq attributes (delq name attributes))))
843 (when (and (memq '=>ucs attributes)
844 (setq value (get-char-attribute char '=>ucs)))
845 (insert (format "%s :to.ucs\t\t a.ucs:0x%04X ; # %c"
846 line-separator value (decode-char '=ucs value)))
847 (setq attributes (delq '=>ucs attributes))
849 (when (setq value (get-char-attribute char '=>ucs*))
850 (insert (format "%s :to.canonical-ucs\ta.ucs:0x%04X ; # %c"
851 line-separator value (decode-char '=ucs value)))
852 (setq attributes (delq '=>ucs* attributes))
854 (dolist (name '(=>ucs@gb =>ucs@big5))
855 (when (and (memq name attributes)
856 (setq value (get-char-attribute char name)))
857 (insert line-separator)
858 (insert (format " \"%-20s\": #x%04X,\t\"_comment\": \"%c\"%s"
863 (symbol-name name) 2)))
866 (setq attributes (delq name attributes))
868 (when (and (memq 'general-category attributes)
869 (setq value (get-char-attribute char 'general-category)))
870 (insert (format "%s :general-category \"%s\" ; # %s"
872 (cond ((rassoc value unidata-normative-category-alist)
873 "Normative Category")
874 ((rassoc value unidata-informative-category-alist)
875 "Informative Category")
877 "Unknown Category"))))
878 (setq attributes (delq 'general-category attributes))
880 (when (and (memq 'bidi-category attributes)
881 (setq value (get-char-attribute char 'bidi-category)))
882 (insert (format "%s :bidi-category %S ;"
885 (setq attributes (delq 'bidi-category attributes))
887 (unless (or (not (memq 'mirrored attributes))
888 (eq (setq value (get-char-attribute char 'mirrored 'empty))
890 (insert (format "%s :mirrored \"%s\" ;"
893 (setq attributes (delq 'mirrored attributes))
896 ((and (memq 'decimal-digit-value attributes)
897 (setq value (get-char-attribute char 'decimal-digit-value)))
898 (insert (format "%s :decimal-digit-value %2d ;"
899 line-separator value))
900 (setq attributes (delq 'decimal-digit-value attributes))
901 (when (and (memq 'digit-value attributes)
902 (setq value (get-char-attribute char 'digit-value)))
903 (insert (format "%s :digit-value\t %2d ;"
904 line-separator value))
905 (setq attributes (delq 'digit-value attributes))
907 (when (and (memq 'numeric-value attributes)
908 (setq value (get-char-attribute char 'numeric-value)))
909 (insert (format "%s :numeric-value\t %2d ;"
910 line-separator value))
911 (setq attributes (delq 'numeric-value attributes))
915 (when (and (memq 'digit-value attributes)
916 (setq value (get-char-attribute char 'digit-value)))
917 (insert line-separator)
918 (insert (format "%s :digit-value\t %2d ;"
919 line-separator value))
920 (setq attributes (delq 'digit-value attributes))
922 (when (and (memq 'numeric-value attributes)
923 (setq value (get-char-attribute char 'numeric-value)))
924 (insert line-separator)
925 (insert (format "%s :numeric-value\t %2d ;"
926 line-separator value))
927 (setq attributes (delq 'numeric-value attributes))
929 (when (and (memq 'iso-10646-comment attributes)
930 (setq value (get-char-attribute char 'iso-10646-comment)))
931 (insert line-separator)
932 (insert (format "{\"iso-10646-comment\":\t %S}%s"
935 (setq attributes (delq 'iso-10646-comment attributes))
937 (when (and (memq 'morohashi-daikanwa attributes)
938 (setq value (get-char-attribute char 'morohashi-daikanwa)))
939 (insert line-separator)
940 (insert (format "%s :morohashi-daikanwa\t %S ;"
941 line-separator value))
942 (setq attributes (delq 'morohashi-daikanwa attributes))
946 (when (and (memq 'ideographic-radical attributes)
947 (setq value (get-char-attribute char 'ideographic-radical)))
949 (insert (format "%s ideo:radical %3d ; # %c "
952 (ideographic-radical radical)
954 (setq attributes (delq 'ideographic-radical attributes))
956 (when (and (memq 'shuowen-radical attributes)
957 (setq value (get-char-attribute char 'shuowen-radical)))
958 (insert line-separator)
959 (insert (format " \"shuowen-radical\":\t %S,\t\"_comment\": \"%c\""
961 (shuowen-radical value)))
962 (setq attributes (delq 'shuowen-radical attributes))
967 char-db-feature-domains
969 (dolist (feature (char-attribute-list))
970 (setq feature (symbol-name feature))
972 "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
974 (setq domain (intern (match-string 2 feature)))
975 (unless (memq domain dest)
976 (setq dest (cons domain dest)))))
977 (sort dest #'string<))))
978 (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
979 (when (and (memq key attributes)
980 (setq value (get-char-attribute char key)))
982 (insert (format "%s ideo:radical "
984 (char-db-turtle-insert-feature-value value nil domain 'ideographic-radical)
985 (setq attributes (delq key attributes))
987 (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
988 (when (and (memq key attributes)
989 (setq value (get-char-attribute char key)))
991 (insert (format "%s ideo:strokes [ "
993 (setq col (current-column))
994 (setq indent (make-string col ?\ ))
995 (insert (format ":context domain:%-7s ;\n%s:target %S"
996 (chise-turtle-uri-encode-ccs-name domain)
998 (setq attributes (delq key attributes))
999 (setq skey (intern (format "%s*sources" key)))
1000 (when (and (memq skey attributes)
1001 (setq value (get-char-attribute char skey)))
1002 (insert (format " ;\n%s:sources (" indent))
1003 (setq col (current-column))
1004 (setq indent (make-string col ?\ ))
1005 (insert (format " chisebib:%s" (car value)))
1006 (dolist (cell (cdr value))
1007 (insert (format "\n%s chisebib:%s" indent cell)))
1009 (setq attributes (delq skey attributes))
1012 (setq key (intern (format "%s@%s" 'total-strokes domain)))
1013 (when (and (memq key attributes)
1014 (setq value (get-char-attribute char key)))
1015 (insert (format "%s ideo:total-strokes [ "
1017 (setq col (current-column))
1018 (insert (format ":context domain:%-7s ;\n%s:target %S"
1019 (chise-turtle-uri-encode-ccs-name domain)
1020 (make-string col ?\ )
1022 (setq attributes (delq key attributes))
1023 (setq skey (intern (format "%s*sources" key)))
1026 (dolist (feature '(ideographic-radical
1029 (setq key (intern (format "%s@%s*sources" feature domain)))
1030 (when (and (memq key attributes)
1031 (setq value (get-char-attribute char key)))
1032 (insert line-separator)
1033 (insert (format " \"%s\":%s" key line-breaking))
1034 (dolist (cell value)
1035 (insert (format " %s" cell)))
1036 (setq attributes (delq key attributes))
1039 (when (and (memq 'ideographic-strokes attributes)
1040 (setq value (get-char-attribute char 'ideographic-strokes)))
1041 (setq strokes value)
1042 (insert (format "%s ideo:strokes %2d ;"
1043 line-separator strokes))
1044 (setq attributes (delq 'ideographic-strokes attributes))
1046 (when (and (memq 'kangxi-radical attributes)
1047 (setq value (get-char-attribute char 'kangxi-radical)))
1048 (unless (eq value radical)
1049 (insert line-separator)
1050 (insert (format "{\"kangxi-radical\":\t%S},\t\"_comment\": \"%c\"%s"
1052 (ideographic-radical value)
1055 (setq radical value)))
1056 (setq attributes (delq 'kangxi-radical attributes))
1058 (when (and (memq 'kangxi-strokes attributes)
1059 (setq value (get-char-attribute char 'kangxi-strokes)))
1060 (unless (eq value strokes)
1061 (insert line-separator)
1062 (insert (format "{\"kangxi-strokes\":\t%S}%s"
1066 (setq strokes value)))
1067 (setq attributes (delq 'kangxi-strokes attributes))
1069 (when (and (memq 'japanese-strokes attributes)
1070 (setq value (get-char-attribute char 'japanese-strokes)))
1071 (unless (eq value strokes)
1072 (insert line-separator)
1073 (insert (format "{\"japanese-strokes\":\t%S}%s"
1077 (setq strokes value)))
1078 (setq attributes (delq 'japanese-strokes attributes))
1080 (when (and (memq 'cns-radical attributes)
1081 (setq value (get-char-attribute char 'cns-radical)))
1082 (insert line-separator)
1083 (insert (format "{\"cns-radical\":\t%S},\t\"_comment\": \"%c\"%s"
1085 (ideographic-radical value)
1087 (setq attributes (delq 'cns-radical attributes))
1089 (when (and (memq 'cns-strokes attributes)
1090 (setq value (get-char-attribute char 'cns-strokes)))
1091 (unless (eq value strokes)
1092 (insert line-separator)
1093 (insert (format "{\"cns-strokes\":\t%S}%s"
1097 (setq strokes value)))
1098 (setq attributes (delq 'cns-strokes attributes))
1100 (when (and (memq 'total-strokes attributes)
1101 (setq value (get-char-attribute char 'total-strokes)))
1102 (insert (format "%s ideo:total-strokes %2d ;"
1103 line-separator value))
1104 (setq attributes (delq 'total-strokes attributes))
1106 (if (equal (get-char-attribute char '->titlecase)
1107 (get-char-attribute char '->uppercase))
1108 (setq attributes (delq '->titlecase attributes)))
1110 (dolist (ignored '(composition
1111 ->denotational <-subsumptive ->ucs-unified
1112 ->ideographic-component-forms))
1113 (setq attributes (delq ignored attributes))))
1115 (setq name (car attributes))
1116 (setq ret (chise-split-feature-name name))
1117 (setq name-base (car ret)
1118 name-domain (nth 1 ret))
1119 (when (setq value (chise-get-char-attribute-with-metadata
1120 char name-base name-domain))
1121 (setq metadata (nth 1 value)
1123 (cond ((setq ret (find-charset name))
1124 (setq name (charset-name ret))
1125 (when (not (memq name dest-ccss))
1126 (setq dest-ccss (cons name dest-ccss))
1128 (insert (format "%s :%-25s rdf:nil ;" line-separator
1129 (chise-turtle-uri-encode-ccs-name name)))
1130 (setq ret (chise-turtle-format-ccs-code-point name value))
1131 (insert (format "%s :eq %-25s ; # %c" line-separator
1133 (char-db-decode-isolated-char name value)))
1134 (setq eq-cpos-list (cons (list ret name value) eq-cpos-list))))
1136 (setq ret (if (eq name '=ucs)
1137 (if (< value #x10000)
1140 (intern (format "=%s" name)))))
1141 (setq child-ccs-list (cons ret child-ccs-list)))
1145 (not (eq name '->subsumptive))
1146 (not (eq name '->uppercase))
1147 (not (eq name '->lowercase))
1148 (not (eq name '->titlecase))
1149 (not (eq name '->canonical))
1150 (not (eq name '->Bopomofo))
1151 (not (eq name '->mistakable))
1152 (not (eq name '->ideographic-variants))
1153 (or (eq name '<-identical)
1154 (eq name '<-uppercase)
1155 (eq name '<-lowercase)
1156 (eq name '<-titlecase)
1157 (eq name '<-canonical)
1158 (eq name '<-ideographic-variants)
1159 ;; (eq name '<-synonyms)
1160 (string-match "^<-synonyms" (symbol-name name))
1161 (eq name '<-mistakable)
1162 (when (string-match "^->" (symbol-name name))
1164 ((string-match "^->fullwidth" (symbol-name name))
1165 (not (and (consp value)
1166 (characterp (car value))
1168 (car value) '=ucs 'defined-only)))
1173 ((eq name 'ideographic-structure)
1174 (insert (isd-turtle-format-char nil nil value (/ column 4)
1175 'isd 'without-head-char))
1178 ((eq name '->subsumptive)
1179 (insert line-separator)
1180 (char-db-turtle-insert-relation-feature char name value
1183 (setq children value)
1186 (insert (format "%s %-20s "
1188 (chise-turtle-uri-encode-feature-name name-base)))
1189 (unless (char-db-turtle-insert-feature-value
1190 value metadata name-domain name-base)
1194 (setq attributes (cdr attributes)))
1195 (insert (format "%s ." line-breaking))
1196 (dolist (eq-cpos (nreverse eq-cpos-list))
1197 (setq ret (chise-split-ccs-name (nth 1 eq-cpos)))
1198 (insert (format "%s %s" line-breaking
1200 (insert (format "%s %25s" line-breaking
1201 (format ":%s-of" (nth 1 ret))))
1202 (if (null (nth 2 ret))
1203 (insert (format " %14s:%-7s ."
1204 (chise-turtle-uri-encode-ccs-name (car ret))
1205 (nth 1 (split-string (car eq-cpos) ":"))))
1207 (setq col (current-column))
1208 (insert (format ":context domain:%-7s ;\n%s:target %7s:%-7s ] ."
1209 (chise-turtle-uri-encode-ccs-name (nth 2 ret))
1210 (make-string col ?\ )
1211 (chise-turtle-uri-encode-ccs-name (car ret))
1212 (nth 1 (split-string (car eq-cpos) ":"))))))
1213 (setq est-coded-charset-priority-list
1214 (append est-coded-charset-priority-list
1215 (nreverse child-ccs-list)))
1217 (dolist (child children)
1218 (insert (format "%s " line-breaking))
1219 (char-db-turtle-insert-char-features child nil nil nil 'for-sub-node)))
1222 (defun char-db-turtle-insert-char-data (char &optional readable attributes)
1224 (narrow-to-region (point)(point))
1225 (char-db-turtle-insert-char-features char readable attributes)
1229 (defun char-db-turtle-insert-prefix ()
1230 (let (base-ccs-list ret)
1231 (insert "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
1232 @prefix : <http://rdf.chise.org/rdf/property/character/main/> .
1233 @prefix ideo: <http://rdf.chise.org/rdf/property/character/ideo/> .
1234 @prefix isd: <http://rdf.chise.org/rdf/property/character/isd/> .
1235 @prefix idc: <http://rdf.chise.org/rdf/type/character/idc/> .
1236 @prefix chisegg: <http://rdf.chise.org/rdf/type/character/ggg/> .
1237 @prefix domain: <http://rdf.chise.org/data/domain/> .
1238 @prefix script: <http://rdf.chise.org/data/script/> .
1239 @prefix ideocomb: <http://rdf.chise.org/data/character/ideo/combination/> .
1240 @prefix chisebib: <http://rdf.chise.org/data/bibliography/> .
1241 @prefix ruimoku: <http://www.chise.org/est/view/article@ruimoku/rep.id=/> .
1242 @prefix zob1959: <http://chise.zinbun.kyoto-u.ac.jp/koukotsu/rubbings/> .
1245 (dolist (cell (sort chise-turtle-ccs-prefix-alist
1247 (char-attribute-name< (cdr a)(cdr b)))))
1248 (insert (format "@prefix %s: <%s/%s=> .\n"
1250 "http://www.chise.org/est/view/character"
1251 (www-uri-encode-feature-name (cdr cell))))
1252 (setq ret (chise-split-ccs-name (cdr cell)))
1253 (unless (memq (car ret) base-ccs-list)
1254 (setq base-ccs-list (cons (car ret) base-ccs-list))))
1256 (dolist (base-ccs (nreverse base-ccs-list))
1257 (insert (format "@prefix %s: <%s/%s/code-point/> .\n"
1258 (chise-turtle-uri-encode-ccs-name base-ccs)
1259 "http://rdf.chise.org/data/ccs"
1260 (www-uri-encode-feature-name base-ccs))))))
1262 (defun char-db-turtle-insert-ideograph-radical-char-data (radical)
1264 (sort (copy-list (aref ideograph-radical-chars-vector radical))
1266 (ideograph-char< a b radical))))
1268 (dolist (name (char-attribute-list))
1269 (unless (memq name char-db-ignored-attributes)
1270 (push name attributes)
1272 (setq attributes (sort attributes #'char-attribute-name<))
1273 (aset ideograph-radical-chars-vector radical chars)
1274 (dolist (char chars)
1275 (when (not (some (lambda (atr)
1276 (get-char-attribute char atr))
1277 char-db-ignored-attributes))
1278 (char-db-turtle-insert-char-data char nil attributes)))
1281 (defun char-db-turtle-write-ideograph-radical-char-data (radical file)
1282 (if (file-directory-p file)
1283 (let ((name (char-feature (decode-char 'ucs (+ #x2EFF radical))
1285 (if (string-match "KANGXI RADICAL " name)
1286 (setq name (capitalize (substring name (match-end 0)))))
1287 (setq name (mapconcat (lambda (char)
1290 (char-to-string char))) name ""))
1293 (format "Ideograph-R%03d-%s.ttl" radical name)
1295 (let (chise-turtle-ccs-prefix-alist)
1297 (char-db-turtle-insert-ideograph-radical-char-data radical)
1298 (goto-char (point-min))
1299 (char-db-turtle-insert-prefix)
1301 (goto-char (point-min))
1302 (insert (format "# -*- coding: %s -*-\n"
1303 char-db-file-coding-system))
1304 (let ((coding-system-for-write char-db-file-coding-system))
1305 (write-region (point-min)(point-max) file)))))
1311 (provide 'char-db-turtle)
1313 ;;; char-db-turtle.el ends here