1 ;;; isd-turtle.el --- Utility to dump ideographic-structure as Turtle files
3 ;; Copyright (C) 2017 MORIOKA Tomohiko
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: Ideographic Structures (漢字構造、解字), IDS, CHISE, RDF, Turtle
8 ;; This file is a part of CHISE-ISD (Ideographic Structure Database).
10 ;; This program is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
15 ;; This program is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with this program; see the file COPYING. If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 (require 'cwiki-common)
29 (defvar isd-url-prefix "http://rdf.chise.org/data/")
31 (setq est-coded-charset-priority-list
36 =jis-x0208 =jis-x0208@1990
38 =jis-x0213-1@2000 =jis-x0213-1@2004
52 =cns11643-1 =cns11643-2 =cns11643-3
53 =cns11643-4 =cns11643-5 =cns11643-6 =cns11643-7
60 =zinbun-oracle =>zinbun-oracle
64 =>jis-x0208 =>jis-x0213-1
71 =+>ucs@iso =+>ucs@unicode
72 =>ucs@jis =>ucs@cns =>ucs@ks
73 =>>ucs@iso =>>ucs@unicode
74 =>>ucs@jis =>>ucs@cns =>>ucs@ks
76 =>>jis-x0208 =>>jis-x0213-1 =>>jis-x0213-2
77 =+>jis-x0208 =+>jis-x0213-1 =+>jis-x0213-2
82 =jis-x0208@1983 =jis-x0208@1978
92 ;; (defvar isd-turtle-ccs-list nil)
93 (defvar chise-turtle-ccs-prefix-alist nil)
95 (defun charset-code-point-format-spec (ccs)
96 (cond ((memq ccs '(=ucs))
99 =gt-k =daikanwa =adobe-japan1
100 =cbeta =zinbun-oracle))
102 ((memq ccs '(=hanyo-denshi/ks
105 ((memq ccs '(=hanyo-denshi/tk ==hanyo-denshi/tk))
110 ;; (defun isd-turtle-uri-encode-feature-name (feature-name)
112 ;; ((eq '=ucs feature-name)
114 ;; ((eq '==>ucs@bucs feature-name)
117 ;; (mapconcat (lambda (c)
120 ;; (char-to-string c)))
121 ;; (www-uri-encode-feature-name feature-name)
123 (defun chise-turtle-uri-encode-ccs-name (feature-name)
125 ((eq '=ucs feature-name)
127 ((eq '=big5 feature-name)
129 ((eq '==>ucs@bucs feature-name)
132 (mapconcat (lambda (c)
141 (char-to-string c))))
142 (www-uri-encode-feature-name feature-name)
145 ;; (defun isd-turtle-format-ccs-code-point (ccs code-point)
146 ;; (unless (memq ccs isd-turtle-ccs-list)
147 ;; (setq isd-turtle-ccs-list (cons ccs isd-turtle-ccs-list)))
149 ;; (isd-turtle-uri-encode-feature-name ccs)
150 ;; (format (charset-code-point-format-spec ccs)
152 (defun chise-turtle-format-ccs-code-point (ccs code-point)
153 (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs)))
154 (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist)
155 (setq chise-turtle-ccs-prefix-alist
156 (cons (cons ccs-uri ccs)
157 chise-turtle-ccs-prefix-alist)))
160 (format (charset-code-point-format-spec ccs)
163 (defun isd-turtle-encode-char (object)
164 (let ((ccs-list est-coded-charset-priority-list)
166 (if (setq ret (encode-char object '=ucs))
167 (chise-turtle-format-ccs-code-point '=ucs ret)
169 (setq ccs (pop ccs-list))
170 (not (setq ret (encode-char object ccs 'defined-only)))))
172 (chise-turtle-format-ccs-code-point ccs ret)
174 ((and (setq ccs (car (split-char object)))
175 (setq ret (encode-char object ccs)))
176 (chise-turtle-format-ccs-code-point ccs ret)
179 (format (if est-hide-cgi-mode
180 "system-char-id=0x%X"
181 "system-char-id:0x%X")
182 (encode-char object 'system-char-id))
185 (defun isd-turtle-format-component (component separator level)
186 (cond ((characterp component)
188 (isd-turtle-encode-char component)
193 (let ((ret (find-char component)))
196 (isd-turtle-encode-char ret) separator ret))
197 ((setq ret (assq 'ideographic-structure component))
198 (if (eq separator ?\;)
200 (isd-turtle-format-char nil nil (cdr ret) (1+ level)))
201 (isd-turtle-format-char nil nil (cdr ret) (1+ level)))))))))
203 (defun isd-turtle-format-char (ccs code-point &optional ids-list level
204 prefix without-head-char)
209 (let ((indent (make-string (* level 4) ?\ ))
216 (if (and ccs code-point
217 (setq char (decode-char ccs code-point)))
218 (setq ids-list (get-char-attribute char 'ideographic-structure))))
219 (setq idc (car ids-list))
220 (setq c1 (nth 1 ids-list)
224 (setq idc (plist-get idc :char)))
226 (setq ret (find-char idc)))
229 (setq ret (find-char c1)))
232 (setq ret (find-char c2)))
235 (setq ret (find-char c3)))
238 ((eq idc ?\u2FF0) ; ⿰
256 ((memq idc '(?⿴ ?⿵ ?⿶ ?⿷ ?⿸ ?⿹ ?⿺))
267 %s %s:structure [ a idc:%c ;
272 (if without-head-char
274 (if (and ccs code-point)
276 (chise-turtle-format-ccs-code-point ccs code-point)
280 indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level))
281 indent prefix p2 (isd-turtle-format-component c2 ?\; (1+ level))
282 indent prefix p3 (isd-turtle-format-component c3 ?\ (1+ level))
284 (if without-head-char
293 %s %s:structure [ a idc:%c ;
297 (if without-head-char
299 (if (and ccs code-point)
301 (chise-turtle-format-ccs-code-point ccs code-point)
305 indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level))
306 indent prefix p2 (isd-turtle-format-component c2 ?\ (1+ level))
308 (if without-head-char
316 (defun isd-turtle-insert-char (ccs code-point)
317 (let ((ret (isd-turtle-format-char ccs code-point)))
322 (defun isd-turtle-insert-ccs-ranges (ccs &rest ranges)
323 (let (range code max-code)
325 (setq range (car ranges))
327 (setq code (car range)
328 max-code (cdr range))
329 (while (<= code max-code)
330 (isd-turtle-insert-char ccs code)
331 (setq code (1+ code)))
334 (isd-turtle-insert-char ccs range)
336 (t (error 'wrong-type-argument range)))
337 (setq ranges (cdr ranges)))))
339 (defun isd-turtle-dump-range (file path func &rest args)
341 (let ((coding-system-for-write 'utf-8-mcs-er)
342 ;; isd-turtle-ccs-list
343 chise-turtle-ccs-prefix-alist)
344 (if (file-directory-p path)
345 (setq path (expand-file-name file path)))
347 (goto-char (point-min))
348 ;; (dolist (ccs (sort isd-turtle-ccs-list
349 ;; #'char-attribute-name<))
350 ;; (insert (format "@prefix %s: <%s%s=> .\n"
351 ;; (isd-turtle-uri-encode-feature-name ccs)
352 ;; "http://www.chise.org/est/view/character/"
353 ;; (www-uri-encode-feature-name ccs))))
354 (dolist (cell (sort chise-turtle-ccs-prefix-alist
356 (char-attribute-name< (cdr a)(cdr b)))))
357 (insert (format "@prefix %s: <%s/%s=> .\n"
359 "http://www.chise.org/est/view/character"
360 (www-uri-encode-feature-name (cdr cell)))))
362 (goto-char (point-min))
363 (insert "# -*- coding: utf-8-mcs-er -*-\n")
364 (insert "@prefix : <http://rdf.chise.org/rdf/property/character/isd/> .
365 @prefix idc: <http://rdf.chise.org/rdf/type/character/idc/> .\n")
366 (write-region (point-min)(point-max) path))))
369 (defun isd-turtle-dump-ucs-basic (filename)
370 (interactive "Fdump ISD-UCS-Basic : ")
371 (isd-turtle-dump-range "ISD-UCS-Basic.ttl" filename
372 #'isd-turtle-insert-ccs-ranges
373 '=ucs '(#x4E00 . #x9FA5)))
376 (defun isd-turtle-dump-ucs-ext-a (filename)
377 (interactive "Fdump ISD-UCS-Ext-A : ")
378 (isd-turtle-dump-range "ISD-UCS-Ext-A.ttl" filename
379 #'isd-turtle-insert-ccs-ranges
380 '=ucs '(#x3400 . #x4DB5) #xFA1F #xFA23))
383 (defun isd-turtle-dump-ucs-ext-b-1 (filename)
384 (interactive "Fdump IDS-UCS-Ext-B-1 : ")
385 (isd-turtle-dump-range "ISD-UCS-Ext-B-1.ttl" filename
386 #'isd-turtle-insert-ccs-ranges
387 'ucs '(#x20000 . #x21FFF)))
390 (defun isd-turtle-dump-ucs-ext-b-2 (filename)
391 (interactive "Fdump IDS-UCS-Ext-B-2 : ")
392 (isd-turtle-dump-range "ISD-UCS-Ext-B-2.ttl" filename
393 #'isd-turtle-insert-ccs-ranges
394 'ucs '(#x22000 . #x23FFF)))
397 (defun isd-turtle-dump-ucs-ext-b-3 (filename)
398 (interactive "Fdump IDS-UCS-Ext-B-3 : ")
399 (isd-turtle-dump-range "ISD-UCS-Ext-B-3.ttl" filename
400 #'isd-turtle-insert-ccs-ranges
401 'ucs '(#x24000 . #x25FFF)))
404 (defun isd-turtle-dump-ucs-ext-b-4 (filename)
405 (interactive "Fdump IDS-UCS-Ext-B-4 : ")
406 (isd-turtle-dump-range "ISD-UCS-Ext-B-4.ttl" filename
407 #'isd-turtle-insert-ccs-ranges
408 'ucs '(#x26000 . #x27FFF)))
411 (defun isd-turtle-dump-ucs-ext-b-5 (filename)
412 (interactive "Fdump IDS-UCS-Ext-B-5 : ")
413 (isd-turtle-dump-range "ISD-UCS-Ext-B-5.ttl" filename
414 #'isd-turtle-insert-ccs-ranges
415 'ucs '(#x28000 . #x29FFF)))
418 (defun isd-turtle-dump-ucs-ext-b-6 (filename)
419 (interactive "Fdump IDS-UCS-Ext-B-6 : ")
420 (isd-turtle-dump-range "ISD-UCS-Ext-B-6.ttl" filename
421 #'isd-turtle-insert-ccs-ranges
422 'ucs '(#x2A000 . #x2A6D6)))
425 (defun isd-turtle-dump-ucs-ext-c (filename)
426 (interactive "Fdump IDS-UCS-Ext-C : ")
427 (isd-turtle-dump-range "ISD-UCS-Ext-C.ttl" filename
428 #'isd-turtle-insert-ccs-ranges
429 'ucs '(#x2A700 . #x2B734)))
432 (defun isd-turtle-dump-ucs-ext-d (filename)
433 (interactive "Fdump IDS-UCS-Ext-D : ")
434 (isd-turtle-dump-range "ISD-UCS-Ext-D.ttl" filename
435 #'isd-turtle-insert-ccs-ranges
436 'ucs '(#x2B740 . #x2B81D)))
439 (defun isd-turtle-dump-ucs-ext-e (filename)
440 (interactive "Fdump IDS-UCS-Ext-E : ")
441 (isd-turtle-dump-range "ISD-UCS-Ext-E.ttl" filename
442 #'isd-turtle-insert-ccs-ranges
443 'ucs '(#x2B820 . #x2CEA1)))
446 (defun isd-turtle-dump-mj-0 (filename)
447 (interactive "Fdump ISD-MJ-0 : ")
448 (isd-turtle-dump-range "ISD-MJ-0.ttl" filename
449 #'isd-turtle-insert-ccs-ranges
453 (defun isd-turtle-dump-mj-1 (filename)
454 (interactive "Fdump ISD-MJ-1 : ")
455 (isd-turtle-dump-range "ISD-MJ-1.ttl" filename
456 #'isd-turtle-insert-ccs-ranges
457 '=mj '(10000 . 19999)))
460 (defun isd-turtle-dump-mj-2 (filename)
461 (interactive "Fdump ISD-MJ-2 : ")
462 (isd-turtle-dump-range "ISD-MJ-2.ttl" filename
463 #'isd-turtle-insert-ccs-ranges
464 '=mj '(20000 . 29999)))
467 (defun isd-turtle-dump-mj-3 (filename)
468 (interactive "Fdump ISD-MJ-3 : ")
469 (isd-turtle-dump-range "ISD-MJ-3.ttl" filename
470 #'isd-turtle-insert-ccs-ranges
471 '=mj '(30000 . 39999)))
474 (defun isd-turtle-dump-mj-4 (filename)
475 (interactive "Fdump ISD-MJ-4 : ")
476 (isd-turtle-dump-range "ISD-MJ-4.ttl" filename
477 #'isd-turtle-insert-ccs-ranges
478 '=mj '(40000 . 49999)))
481 (defun isd-turtle-dump-mj-5 (filename)
482 (interactive "Fdump ISD-MJ-5 : ")
483 (isd-turtle-dump-range "ISD-MJ-5.ttl" filename
484 #'isd-turtle-insert-ccs-ranges
485 '=mj '(50000 . 59999)))
488 (defun isd-turtle-dump-mj-6 (filename)
489 (interactive "Fdump ISD-MJ-6 : ")
490 (isd-turtle-dump-range "ISD-MJ-6.ttl" filename
491 #'isd-turtle-insert-ccs-ranges
492 '=mj '(60000 . 69999)))
495 (defun isd-turtle-dump-all (directory)
496 (interactive "DISD directory : ")
497 (isd-turtle-dump-ucs-basic directory)
498 (isd-turtle-dump-ucs-ext-a directory)
499 (isd-turtle-dump-ucs-ext-b-1 directory)
500 (isd-turtle-dump-ucs-ext-b-2 directory)
501 (isd-turtle-dump-ucs-ext-b-3 directory)
502 (isd-turtle-dump-ucs-ext-b-4 directory)
503 (isd-turtle-dump-ucs-ext-b-5 directory)
504 (isd-turtle-dump-ucs-ext-b-6 directory)
505 (isd-turtle-dump-ucs-ext-c directory)
506 (isd-turtle-dump-ucs-ext-d directory)
507 (isd-turtle-dump-ucs-ext-e directory)
508 (isd-turtle-dump-mj-0 directory)
509 (isd-turtle-dump-mj-1 directory)
510 (isd-turtle-dump-mj-2 directory)
511 (isd-turtle-dump-mj-3 directory)
512 (isd-turtle-dump-mj-4 directory)
513 (isd-turtle-dump-mj-5 directory)
514 (isd-turtle-dump-mj-6 directory)
521 (provide 'isd-turtle)
523 ;;; isd-turtle.el ends here