1 ;;; isd-turtle.el --- Utility to dump ideographic-structure as Turtle files
3 ;; Copyright (C) 2017, 2018 MORIOKA Tomohiko
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: Ideographic Structures (漢字構造、解字), IDS, CHISE, RDF, Turtle
8 ;; This file is a part of CHISE-ISD (Ideographic Structure Database).
10 ;; This program is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
15 ;; This program is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with this program; see the file COPYING. If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 (require 'cwiki-common)
29 (defvar isd-url-prefix "http://rdf.chise.org/data/")
31 (setq est-coded-charset-priority-list
42 =jis-x0208 =jis-x0208@1990
44 =jis-x0213-1@2000 =jis-x0213-1@2004
66 =cns11643-1 =cns11643-2 =cns11643-3
67 =cns11643-4 =cns11643-5 =cns11643-6 =cns11643-7
77 =zinbun-oracle =>zinbun-oracle
82 =+>ucs@iso =+>ucs@unicode
88 =>jis-x0208 =>jis-x0213-1
96 =>ucs@jis =>ucs@cns =>ucs@ks
99 =>>ucs@iso =>>ucs@unicode
100 =>>ucs@jis =>>ucs@cns =>>ucs@ks
118 ==cns11643-1 ==cns11643-2 ==cns11643-3
119 ==cns11643-4 ==cns11643-5 ==cns11643-6 ==cns11643-7
129 =>>jis-x0208 =>>jis-x0213-1 =>>jis-x0213-2
130 =+>jis-x0208 =+>jis-x0213-1 =+>jis-x0213-2
136 =jis-x0208@1983 =jis-x0208@1978
169 ;; (defvar isd-turtle-ccs-list nil)
170 (defvar chise-turtle-ccs-prefix-alist nil)
172 (defun charset-code-point-format-spec (ccs)
173 (cond ((memq ccs '(=ucs))
176 (let ((ccs-name (symbol-name ccs)))
179 "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)"
183 "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)"
186 ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name)
188 ((string-match "hanyo-denshi/tk" ccs-name)
193 ;; (defun isd-turtle-uri-encode-feature-name (feature-name)
195 ;; ((eq '=ucs feature-name)
197 ;; ((eq '==>ucs@bucs feature-name)
200 ;; (mapconcat (lambda (c)
203 ;; (char-to-string c)))
204 ;; (www-uri-encode-feature-name feature-name)
206 (defun chise-turtle-uri-encode-ccs-name (feature-name)
208 ((eq '=ucs feature-name)
210 ((eq '=big5 feature-name)
212 ((eq '==>ucs@bucs feature-name)
215 (mapconcat (lambda (c)
224 (char-to-string c))))
225 (www-uri-encode-feature-name feature-name)
228 ;; (defun isd-turtle-format-ccs-code-point (ccs code-point)
229 ;; (unless (memq ccs isd-turtle-ccs-list)
230 ;; (setq isd-turtle-ccs-list (cons ccs isd-turtle-ccs-list)))
232 ;; (isd-turtle-uri-encode-feature-name ccs)
233 ;; (format (charset-code-point-format-spec ccs)
235 (defun chise-turtle-format-ccs-code-point (ccs code-point)
236 (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs)))
237 (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist)
238 (setq chise-turtle-ccs-prefix-alist
239 (cons (cons ccs-uri ccs)
240 chise-turtle-ccs-prefix-alist)))
243 (format (charset-code-point-format-spec ccs)
246 (defun isd-turtle-encode-char (object)
247 (let ((ccs-list est-coded-charset-priority-list)
249 (if (setq ret (encode-char object '=ucs))
250 (chise-turtle-format-ccs-code-point '=ucs ret)
252 (setq ccs (pop ccs-list))
253 (not (setq ret (encode-char object ccs 'defined-only)))))
255 (chise-turtle-format-ccs-code-point ccs ret)
257 ((and (setq ccs (car (split-char object)))
258 (setq ret (encode-char object ccs)))
259 (chise-turtle-format-ccs-code-point ccs ret)
262 (format (if est-hide-cgi-mode
263 "system-char-id=0x%X"
264 "system-char-id:0x%X")
265 (encode-char object 'system-char-id))
268 (defun isd-turtle-format-component (component separator level prefix)
269 (cond ((characterp component)
271 (isd-turtle-encode-char component)
276 (let ((ret (find-char component)))
279 (isd-turtle-encode-char ret) separator ret))
280 ((setq ret (assq 'ideographic-structure component))
281 (if (eq separator ?\;)
283 (isd-turtle-format-char nil nil (cdr ret) (1+ level)
285 (isd-turtle-format-char nil nil (cdr ret) (1+ level)
288 (defun isd-turtle-format-char (ccs code-point &optional ids-list level
289 prefix without-head-char)
294 (let ((indent (make-string (* level 4) ?\ ))
301 (if (and ccs code-point
302 (setq char (decode-char ccs code-point)))
303 (setq ids-list (get-char-attribute char 'ideographic-structure))))
304 (setq idc (car ids-list))
305 (setq c1 (nth 1 ids-list)
309 (setq idc (plist-get idc :char)))
311 (setq ret (find-char idc)))
314 (setq ret (find-char c1)))
317 (setq ret (find-char c2)))
320 (setq ret (find-char c3)))
323 ((eq idc ?\u2FF0) ; ⿰
341 ((memq idc '(?⿴ ?⿵ ?⿶ ?⿷ ?⿸ ?⿹ ?⿺))
349 ((and idc (eq (encode-char idc '=ucs-itaiji-001) #x2FF6))
354 ((and idc (eq (encode-char idc '=ucs-var-001) #x2FF0))
355 (setq idc-str "⿰・SLR")
359 ((and idc (eq (encode-char idc '=>iwds-1) 307))
364 ((and idc (eq (encode-char idc '=>iwds-1) 305))
369 ((and idc (eq (encode-char idc '=>ucs@component) #x2FF5))
378 %s %s:structure [ a idc:%s ;
383 (if without-head-char
385 (if (and ccs code-point)
387 (chise-turtle-format-ccs-code-point ccs code-point)
390 indent prefix (or idc-str (char-to-string idc))
391 indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level) prefix)
392 indent prefix p2 (isd-turtle-format-component c2 ?\; (1+ level) prefix)
393 indent prefix p3 (isd-turtle-format-component c3 ?\ (1+ level) prefix)
395 (if without-head-char
404 %s %s:structure [ a idc:%s ;
408 (if without-head-char
410 (if (and ccs code-point)
412 (chise-turtle-format-ccs-code-point ccs code-point)
415 indent prefix (or idc-str (char-to-string idc))
416 indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level) prefix)
417 indent prefix p2 (isd-turtle-format-component c2 ?\ (1+ level) prefix)
419 (if without-head-char
427 (defun isd-turtle-insert-char (ccs code-point)
428 (let ((ret (isd-turtle-format-char ccs code-point)))
433 (defun isd-turtle-insert-ccs-ranges (ccs &rest ranges)
434 (let (range code max-code)
436 (setq range (car ranges))
438 (setq code (car range)
439 max-code (cdr range))
440 (while (<= code max-code)
441 (isd-turtle-insert-char ccs code)
442 (setq code (1+ code)))
445 (isd-turtle-insert-char ccs range)
447 (t (error 'wrong-type-argument range)))
448 (setq ranges (cdr ranges)))))
450 (defun isd-turtle-dump-range (file path func &rest args)
452 (let ((coding-system-for-write 'utf-8-mcs-er)
453 ;; isd-turtle-ccs-list
454 chise-turtle-ccs-prefix-alist)
455 (if (file-directory-p path)
456 (setq path (expand-file-name file path)))
458 (goto-char (point-min))
459 ;; (dolist (ccs (sort isd-turtle-ccs-list
460 ;; #'char-attribute-name<))
461 ;; (insert (format "@prefix %s: <%s%s=> .\n"
462 ;; (isd-turtle-uri-encode-feature-name ccs)
463 ;; "http://www.chise.org/est/view/character/"
464 ;; (www-uri-encode-feature-name ccs))))
465 (dolist (cell (sort chise-turtle-ccs-prefix-alist
467 (char-attribute-name< (cdr a)(cdr b)))))
468 (insert (format "@prefix %s: <%s/%s=> .\n"
470 "http://www.chise.org/est/view/character"
471 (www-uri-encode-feature-name (cdr cell)))))
473 (goto-char (point-min))
474 (insert "# -*- coding: utf-8-mcs-er -*-\n")
475 (insert "@prefix : <http://rdf.chise.org/rdf/property/character/isd/> .
476 @prefix idc: <http://rdf.chise.org/rdf/type/character/idc/> .\n")
477 (write-region (point-min)(point-max) path))))
480 (defun isd-turtle-dump-ucs-basic (filename)
481 (interactive "Fdump ISD-UCS-Basic : ")
482 (isd-turtle-dump-range "ISD-UCS-Basic.ttl" filename
483 #'isd-turtle-insert-ccs-ranges
484 '=ucs '(#x4E00 . #x9FA5)))
487 (defun isd-turtle-dump-ucs-ext-a (filename)
488 (interactive "Fdump ISD-UCS-Ext-A : ")
489 (isd-turtle-dump-range "ISD-UCS-Ext-A.ttl" filename
490 #'isd-turtle-insert-ccs-ranges
491 '=ucs '(#x3400 . #x4DB5) #xFA1F #xFA23))
494 (defun isd-turtle-dump-ucs-ext-b-1 (filename)
495 (interactive "Fdump IDS-UCS-Ext-B-1 : ")
496 (isd-turtle-dump-range "ISD-UCS-Ext-B-1.ttl" filename
497 #'isd-turtle-insert-ccs-ranges
498 'ucs '(#x20000 . #x21FFF)))
501 (defun isd-turtle-dump-ucs-ext-b-2 (filename)
502 (interactive "Fdump IDS-UCS-Ext-B-2 : ")
503 (isd-turtle-dump-range "ISD-UCS-Ext-B-2.ttl" filename
504 #'isd-turtle-insert-ccs-ranges
505 'ucs '(#x22000 . #x23FFF)))
508 (defun isd-turtle-dump-ucs-ext-b-3 (filename)
509 (interactive "Fdump IDS-UCS-Ext-B-3 : ")
510 (isd-turtle-dump-range "ISD-UCS-Ext-B-3.ttl" filename
511 #'isd-turtle-insert-ccs-ranges
512 'ucs '(#x24000 . #x25FFF)))
515 (defun isd-turtle-dump-ucs-ext-b-4 (filename)
516 (interactive "Fdump IDS-UCS-Ext-B-4 : ")
517 (isd-turtle-dump-range "ISD-UCS-Ext-B-4.ttl" filename
518 #'isd-turtle-insert-ccs-ranges
519 'ucs '(#x26000 . #x27FFF)))
522 (defun isd-turtle-dump-ucs-ext-b-5 (filename)
523 (interactive "Fdump IDS-UCS-Ext-B-5 : ")
524 (isd-turtle-dump-range "ISD-UCS-Ext-B-5.ttl" filename
525 #'isd-turtle-insert-ccs-ranges
526 'ucs '(#x28000 . #x29FFF)))
529 (defun isd-turtle-dump-ucs-ext-b-6 (filename)
530 (interactive "Fdump IDS-UCS-Ext-B-6 : ")
531 (isd-turtle-dump-range "ISD-UCS-Ext-B-6.ttl" filename
532 #'isd-turtle-insert-ccs-ranges
533 'ucs '(#x2A000 . #x2A6D6)))
536 (defun isd-turtle-dump-ucs-ext-c (filename)
537 (interactive "Fdump IDS-UCS-Ext-C : ")
538 (isd-turtle-dump-range "ISD-UCS-Ext-C.ttl" filename
539 #'isd-turtle-insert-ccs-ranges
540 'ucs '(#x2A700 . #x2B734)))
543 (defun isd-turtle-dump-ucs-ext-d (filename)
544 (interactive "Fdump IDS-UCS-Ext-D : ")
545 (isd-turtle-dump-range "ISD-UCS-Ext-D.ttl" filename
546 #'isd-turtle-insert-ccs-ranges
547 'ucs '(#x2B740 . #x2B81D)))
550 (defun isd-turtle-dump-ucs-ext-e (filename)
551 (interactive "Fdump IDS-UCS-Ext-E : ")
552 (isd-turtle-dump-range "ISD-UCS-Ext-E.ttl" filename
553 #'isd-turtle-insert-ccs-ranges
554 'ucs '(#x2B820 . #x2CEA1)))
557 (defun isd-turtle-dump-mj-0 (filename)
558 (interactive "Fdump ISD-MJ-0 : ")
559 (isd-turtle-dump-range "ISD-MJ-0.ttl" filename
560 #'isd-turtle-insert-ccs-ranges
564 (defun isd-turtle-dump-mj-1 (filename)
565 (interactive "Fdump ISD-MJ-1 : ")
566 (isd-turtle-dump-range "ISD-MJ-1.ttl" filename
567 #'isd-turtle-insert-ccs-ranges
568 '=mj '(10000 . 19999)))
571 (defun isd-turtle-dump-mj-2 (filename)
572 (interactive "Fdump ISD-MJ-2 : ")
573 (isd-turtle-dump-range "ISD-MJ-2.ttl" filename
574 #'isd-turtle-insert-ccs-ranges
575 '=mj '(20000 . 29999)))
578 (defun isd-turtle-dump-mj-3 (filename)
579 (interactive "Fdump ISD-MJ-3 : ")
580 (isd-turtle-dump-range "ISD-MJ-3.ttl" filename
581 #'isd-turtle-insert-ccs-ranges
582 '=mj '(30000 . 39999)))
585 (defun isd-turtle-dump-mj-4 (filename)
586 (interactive "Fdump ISD-MJ-4 : ")
587 (isd-turtle-dump-range "ISD-MJ-4.ttl" filename
588 #'isd-turtle-insert-ccs-ranges
589 '=mj '(40000 . 49999)))
592 (defun isd-turtle-dump-mj-5 (filename)
593 (interactive "Fdump ISD-MJ-5 : ")
594 (isd-turtle-dump-range "ISD-MJ-5.ttl" filename
595 #'isd-turtle-insert-ccs-ranges
596 '=mj '(50000 . 59999)))
599 (defun isd-turtle-dump-mj-6 (filename)
600 (interactive "Fdump ISD-MJ-6 : ")
601 (isd-turtle-dump-range "ISD-MJ-6.ttl" filename
602 #'isd-turtle-insert-ccs-ranges
603 '=mj '(60000 . 69999)))
606 (defun isd-turtle-dump-all (directory)
607 (interactive "DISD directory : ")
608 (isd-turtle-dump-ucs-basic directory)
609 (isd-turtle-dump-ucs-ext-a directory)
610 (isd-turtle-dump-ucs-ext-b-1 directory)
611 (isd-turtle-dump-ucs-ext-b-2 directory)
612 (isd-turtle-dump-ucs-ext-b-3 directory)
613 (isd-turtle-dump-ucs-ext-b-4 directory)
614 (isd-turtle-dump-ucs-ext-b-5 directory)
615 (isd-turtle-dump-ucs-ext-b-6 directory)
616 (isd-turtle-dump-ucs-ext-c directory)
617 (isd-turtle-dump-ucs-ext-d directory)
618 (isd-turtle-dump-ucs-ext-e directory)
619 (isd-turtle-dump-mj-0 directory)
620 (isd-turtle-dump-mj-1 directory)
621 (isd-turtle-dump-mj-2 directory)
622 (isd-turtle-dump-mj-3 directory)
623 (isd-turtle-dump-mj-4 directory)
624 (isd-turtle-dump-mj-5 directory)
625 (isd-turtle-dump-mj-6 directory)
632 (provide 'isd-turtle)
634 ;;; isd-turtle.el ends here