1 ;;; isd-turtle.el --- Utility to dump ideographic-structure as Turtle files
3 ;; Copyright (C) 2017 MORIOKA Tomohiko
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: Ideographic Structures (漢字構造、解字), IDS, CHISE, RDF, Turtle
8 ;; This file is a part of CHISE-ISD (Ideographic Structure Database).
10 ;; This program is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
15 ;; This program is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with this program; see the file COPYING. If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 (require 'cwiki-common)
29 (defvar isd-url-prefix "http://rdf.chise.org/data/")
31 (setq est-coded-charset-priority-list
36 =jis-x0208 =jis-x0208@1990
38 =jis-x0213-1@2000 =jis-x0213-1@2004
51 =cns11643-1 =cns11643-2 =cns11643-3
52 =cns11643-4 =cns11643-5 =cns11643-6 =cns11643-7
59 =zinbun-oracle =>zinbun-oracle
63 =>jis-x0208 =>jis-x0213-1
70 =+>ucs@iso =+>ucs@unicode
71 =>ucs@jis =>ucs@cns =>ucs@ks
72 =>>ucs@iso =>>ucs@unicode
73 =>>ucs@jis =>>ucs@cns =>>ucs@ks
75 =>>jis-x0208 =>>jis-x0213-1 =>>jis-x0213-2
76 =+>jis-x0208 =+>jis-x0213-1 =+>jis-x0213-2
81 =jis-x0208@1983 =jis-x0208@1978
90 (defvar isd-turtle-ccs-list nil)
92 (defun charset-code-point-format-spec (ccs)
93 (cond ((memq ccs '(=ucs))
96 =gt-k =daikanwa =adobe-japan1
97 =cbeta =zinbun-oracle))
99 ((memq ccs '(=hanyo-denshi/ks
102 ((memq ccs '(=hanyo-denshi/tk ==hanyo-denshi/tk))
107 (defun isd-turtle-uri-encode-feature-name (feature-name)
109 ((eq '=ucs feature-name)
111 ((eq '==>ucs@bucs feature-name)
114 (mapconcat (lambda (c)
118 (www-uri-encode-feature-name feature-name)
121 (defun isd-turtle-format-ccs-code-point (ccs code-point)
122 (unless (memq ccs isd-turtle-ccs-list)
123 (setq isd-turtle-ccs-list (cons ccs isd-turtle-ccs-list)))
125 (isd-turtle-uri-encode-feature-name ccs)
126 (format (charset-code-point-format-spec ccs)
129 (defun isd-turtle-encode-char (object)
130 (let ((ccs-list est-coded-charset-priority-list)
132 (if (setq ret (encode-char object '=ucs))
133 (isd-turtle-format-ccs-code-point '=ucs ret)
135 (setq ccs (pop ccs-list))
136 (not (setq ret (encode-char object ccs 'defined-only)))))
138 (isd-turtle-format-ccs-code-point ccs ret)
140 ((and (setq ccs (car (split-char object)))
141 (setq ret (encode-char object ccs)))
142 (isd-turtle-format-ccs-code-point ccs ret)
145 (format (if est-hide-cgi-mode
146 "system-char-id=0x%X"
147 "system-char-id:0x%X")
148 (encode-char object 'system-char-id))
151 (defun isd-turtle-format-component (component separator level)
152 (cond ((characterp component)
154 (isd-turtle-encode-char component)
159 (let ((ret (find-char component)))
162 (isd-turtle-encode-char ret) separator ret))
163 ((setq ret (assq 'ideographic-structure component))
164 (if (eq separator ?\;)
166 (isd-turtle-format-char nil nil (cdr ret) (1+ level)))
167 (isd-turtle-format-char nil nil (cdr ret) (1+ level)))))))))
169 (defun isd-turtle-format-char (ccs code-point &optional ids-list level)
172 (let ((indent (make-string (* level 4) ?\ ))
179 (if (and ccs code-point
180 (setq char (decode-char ccs code-point)))
181 (setq ids-list (get-char-attribute char 'ideographic-structure))))
182 (setq idc (car ids-list))
183 (setq c1 (nth 1 ids-list)
187 (setq idc (plist-get idc :char)))
189 (setq ret (find-char idc)))
192 (setq ret (find-char c1)))
195 (setq ret (find-char c2)))
198 (setq ret (find-char c3)))
201 ((eq idc ?\u2FF0) ; ⿰
219 ((memq idc '(?⿴ ?⿵ ?⿶ ?⿷ ?⿸ ?⿹ ?⿺))
230 %s :structure [ a idc:%c ;
235 (if (and ccs code-point)
237 (isd-turtle-format-ccs-code-point ccs code-point)
241 indent p1 (isd-turtle-format-component c1 ?\; (1+ level))
242 indent p2 (isd-turtle-format-component c2 ?\; (1+ level))
243 indent p3 (isd-turtle-format-component c3 ?\ (1+ level))
252 %s :structure [ a idc:%c ;
256 (if (and ccs code-point)
258 (isd-turtle-format-ccs-code-point ccs code-point)
262 indent p1 (isd-turtle-format-component c1 ?\; (1+ level))
263 indent p2 (isd-turtle-format-component c2 ?\ (1+ level))
271 (defun isd-turtle-insert-char (ccs code-point)
272 (let ((ret (isd-turtle-format-char ccs code-point)))
277 (defun isd-turtle-insert-ccs-ranges (ccs &rest ranges)
278 (let (range code max-code)
280 (setq range (car ranges))
282 (setq code (car range)
283 max-code (cdr range))
284 (while (<= code max-code)
285 (isd-turtle-insert-char ccs code)
286 (setq code (1+ code)))
289 (isd-turtle-insert-char ccs range)
291 (t (error 'wrong-type-argument range)))
292 (setq ranges (cdr ranges)))))
294 (defun isd-turtle-dump-range (file path func &rest args)
296 (let ((coding-system-for-write 'utf-8-mcs-er)
298 (if (file-directory-p path)
299 (setq path (expand-file-name file path)))
301 (goto-char (point-min))
302 (dolist (ccs (sort isd-turtle-ccs-list
303 #'char-attribute-name<))
304 (insert (format "@prefix %s: <%s%s=> .\n"
305 (isd-turtle-uri-encode-feature-name ccs)
306 "http://www.chise.org/est/view/character/"
307 (www-uri-encode-feature-name ccs))))
309 (goto-char (point-min))
310 (insert "# -*- coding: utf-8-mcs-er -*-\n")
311 (insert "@prefix : <http://rdf.chise.org/rdf/property/character/isd/> .
312 @prefix idc: <http://rdf.chise.org/rdf/type/character/idc/> .\n")
313 (write-region (point-min)(point-max) path))))
316 (defun isd-turtle-dump-ucs-basic (filename)
317 (interactive "Fdump ISD-UCS-Basic : ")
318 (isd-turtle-dump-range "ISD-UCS-Basic.ttl" filename
319 #'isd-turtle-insert-ccs-ranges
320 '=ucs '(#x4E00 . #x9FA5)))
323 (defun isd-turtle-dump-ucs-ext-a (filename)
324 (interactive "Fdump ISD-UCS-Ext-A : ")
325 (isd-turtle-dump-range "ISD-UCS-Ext-A.ttl" filename
326 #'isd-turtle-insert-ccs-ranges
327 '=ucs '(#x3400 . #x4DB5) #xFA1F #xFA23))
330 (defun isd-turtle-dump-ucs-ext-b-1 (filename)
331 (interactive "Fdump IDS-UCS-Ext-B-1 : ")
332 (isd-turtle-dump-range "ISD-UCS-Ext-B-1.ttl" filename
333 #'isd-turtle-insert-ccs-ranges
334 'ucs '(#x20000 . #x21FFF)))
337 (defun isd-turtle-dump-ucs-ext-b-2 (filename)
338 (interactive "Fdump IDS-UCS-Ext-B-2 : ")
339 (isd-turtle-dump-range "ISD-UCS-Ext-B-2.ttl" filename
340 #'isd-turtle-insert-ccs-ranges
341 'ucs '(#x22000 . #x23FFF)))
344 (defun isd-turtle-dump-ucs-ext-b-3 (filename)
345 (interactive "Fdump IDS-UCS-Ext-B-3 : ")
346 (isd-turtle-dump-range "ISD-UCS-Ext-B-3.ttl" filename
347 #'isd-turtle-insert-ccs-ranges
348 'ucs '(#x24000 . #x25FFF)))
351 (defun isd-turtle-dump-ucs-ext-b-4 (filename)
352 (interactive "Fdump IDS-UCS-Ext-B-4 : ")
353 (isd-turtle-dump-range "ISD-UCS-Ext-B-4.ttl" filename
354 #'isd-turtle-insert-ccs-ranges
355 'ucs '(#x26000 . #x27FFF)))
358 (defun isd-turtle-dump-ucs-ext-b-5 (filename)
359 (interactive "Fdump IDS-UCS-Ext-B-5 : ")
360 (isd-turtle-dump-range "ISD-UCS-Ext-B-5.ttl" filename
361 #'isd-turtle-insert-ccs-ranges
362 'ucs '(#x28000 . #x29FFF)))
365 (defun isd-turtle-dump-ucs-ext-b-6 (filename)
366 (interactive "Fdump IDS-UCS-Ext-B-6 : ")
367 (isd-turtle-dump-range "ISD-UCS-Ext-B-6.ttl" filename
368 #'isd-turtle-insert-ccs-ranges
369 'ucs '(#x2A000 . #x2A6D6)))
372 (defun isd-turtle-dump-ucs-ext-c (filename)
373 (interactive "Fdump IDS-UCS-Ext-C : ")
374 (isd-turtle-dump-range "ISD-UCS-Ext-C.ttl" filename
375 #'isd-turtle-insert-ccs-ranges
376 'ucs '(#x2A700 . #x2B734)))
379 (defun isd-turtle-dump-ucs-ext-d (filename)
380 (interactive "Fdump IDS-UCS-Ext-D : ")
381 (isd-turtle-dump-range "ISD-UCS-Ext-D.ttl" filename
382 #'isd-turtle-insert-ccs-ranges
383 'ucs '(#x2B740 . #x2B81D)))
386 (defun isd-turtle-dump-ucs-ext-e (filename)
387 (interactive "Fdump IDS-UCS-Ext-E : ")
388 (isd-turtle-dump-range "ISD-UCS-Ext-E.ttl" filename
389 #'isd-turtle-insert-ccs-ranges
390 'ucs '(#x2B820 . #x2CEA1)))
393 (defun isd-turtle-dump-mj-0 (filename)
394 (interactive "Fdump ISD-MJ-0 : ")
395 (isd-turtle-dump-range "ISD-MJ-0.ttl" filename
396 #'isd-turtle-insert-ccs-ranges
400 (defun isd-turtle-dump-mj-1 (filename)
401 (interactive "Fdump ISD-MJ-1 : ")
402 (isd-turtle-dump-range "ISD-MJ-1.ttl" filename
403 #'isd-turtle-insert-ccs-ranges
404 '=mj '(10000 . 19999)))
407 (defun isd-turtle-dump-mj-2 (filename)
408 (interactive "Fdump ISD-MJ-2 : ")
409 (isd-turtle-dump-range "ISD-MJ-2.ttl" filename
410 #'isd-turtle-insert-ccs-ranges
411 '=mj '(20000 . 29999)))
414 (defun isd-turtle-dump-mj-3 (filename)
415 (interactive "Fdump ISD-MJ-3 : ")
416 (isd-turtle-dump-range "ISD-MJ-3.ttl" filename
417 #'isd-turtle-insert-ccs-ranges
418 '=mj '(30000 . 39999)))
421 (defun isd-turtle-dump-mj-4 (filename)
422 (interactive "Fdump ISD-MJ-4 : ")
423 (isd-turtle-dump-range "ISD-MJ-4.ttl" filename
424 #'isd-turtle-insert-ccs-ranges
425 '=mj '(40000 . 49999)))
428 (defun isd-turtle-dump-mj-5 (filename)
429 (interactive "Fdump ISD-MJ-5 : ")
430 (isd-turtle-dump-range "ISD-MJ-5.ttl" filename
431 #'isd-turtle-insert-ccs-ranges
432 '=mj '(50000 . 59999)))
435 (defun isd-turtle-dump-mj-6 (filename)
436 (interactive "Fdump ISD-MJ-6 : ")
437 (isd-turtle-dump-range "ISD-MJ-6.ttl" filename
438 #'isd-turtle-insert-ccs-ranges
439 '=mj '(60000 . 69999)))
445 (provide 'isd-turtle)
447 ;;; isd-turtle.el ends here