1 ;;; isd-turtle.el --- Utility to dump ideographic-structure as Turtle files
3 ;; Copyright (C) 2017 MORIOKA Tomohiko
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: Ideographic Structures (漢字構造、解字), IDS, CHISE, RDF, Turtle
8 ;; This file is a part of CHISE-ISD (Ideographic Structure Database).
10 ;; This program is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
15 ;; This program is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with this program; see the file COPYING. If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 (require 'cwiki-common)
29 (defvar isd-url-prefix "http://rdf.chise.org/data/")
31 (setq est-coded-charset-priority-list
36 =jis-x0208 =jis-x0208@1990
38 =jis-x0213-1@2000 =jis-x0213-1@2004
48 =cns11643-1 =cns11643-2 =cns11643-3
49 =cns11643-4 =cns11643-5 =cns11643-6 =cns11643-7
56 =zinbun-oracle =>zinbun-oracle
60 =>jis-x0208 =>jis-x0213-1
67 =+>ucs@iso =+>ucs@unicode
68 =>ucs@jis =>ucs@cns =>ucs@ks
69 =>>ucs@iso =>>ucs@unicode
70 =>>ucs@jis =>>ucs@cns =>>ucs@ks
72 =>>jis-x0208 =>>jis-x0213-1 =>>jis-x0213-2
73 =+>jis-x0208 =+>jis-x0213-1 =+>jis-x0213-2
78 =jis-x0208@1983 =jis-x0208@1978
86 (defvar isd-turtle-ccs-list nil)
88 (defun charset-code-point-format-spec (ccs)
89 (cond ((memq ccs '(=ucs))
92 =gt-k =daikanwa =adobe-japan1
93 =cbeta =zinbun-oracle))
95 ((memq ccs '(=hanyo-denshi/ks
98 ((memq ccs '(=hanyo-denshi/tk ==hanyo-denshi/tk))
103 (defun isd-turtle-uri-encode-feature-name (feature-name)
105 ((eq '=ucs feature-name)
107 ((eq '==>ucs@bucs feature-name)
110 (mapconcat (lambda (c)
114 (www-uri-encode-feature-name feature-name)
117 (defun isd-turtle-format-ccs-code-point (ccs code-point)
118 (unless (memq ccs isd-turtle-ccs-list)
119 (setq isd-turtle-ccs-list (cons ccs isd-turtle-ccs-list)))
121 (isd-turtle-uri-encode-feature-name ccs)
122 (format (charset-code-point-format-spec ccs)
125 (defun isd-turtle-encode-char (object)
126 (let ((ccs-list est-coded-charset-priority-list)
128 (if (setq ret (encode-char object '=ucs))
129 (isd-turtle-format-ccs-code-point '=ucs ret)
131 (setq ccs (pop ccs-list))
132 (not (setq ret (encode-char object ccs 'defined-only)))))
134 (isd-turtle-format-ccs-code-point ccs ret)
136 ((and (setq ccs (car (split-char object)))
137 (setq ret (encode-char object ccs)))
138 (isd-turtle-format-ccs-code-point ccs ret)
141 (format (if est-hide-cgi-mode
142 "system-char-id=0x%X"
143 "system-char-id:0x%X")
144 (encode-char object 'system-char-id))
147 (defun isd-turtle-format-component (component separator level)
148 (cond ((characterp component)
150 (isd-turtle-encode-char component)
155 (let ((ret (find-char component)))
158 (isd-turtle-encode-char ret) separator ret))
159 ((setq ret (assq 'ideographic-structure component))
160 (if (eq separator ?\;)
162 (isd-turtle-format-char nil nil (cdr ret) (1+ level)))
163 (isd-turtle-format-char nil nil (cdr ret) (1+ level)))))))))
165 (defun isd-turtle-format-char (ccs code-point &optional ids-list level)
168 (let ((indent (make-string (* level 4) ?\ ))
175 (if (and ccs code-point
176 (setq char (decode-char ccs code-point)))
177 (setq ids-list (get-char-attribute char 'ideographic-structure))))
178 (setq idc (car ids-list))
179 (setq c1 (nth 1 ids-list)
183 (setq idc (plist-get idc :char)))
185 (setq ret (find-char idc)))
188 (setq ret (find-char c1)))
191 (setq ret (find-char c2)))
194 (setq ret (find-char c3)))
197 ((eq idc ?\u2FF0) ; ⿰
215 ((memq idc '(?⿴ ?⿵ ?⿶ ?⿷ ?⿸ ?⿹ ?⿺))
226 %s :structure [ a idc:%c ;
231 (if (and ccs code-point)
233 (isd-turtle-format-ccs-code-point ccs code-point)
237 indent p1 (isd-turtle-format-component c1 ?\; (1+ level))
238 indent p2 (isd-turtle-format-component c2 ?\; (1+ level))
239 indent p3 (isd-turtle-format-component c3 ?\ (1+ level))
248 %s :structure [ a idc:%c ;
252 (if (and ccs code-point)
254 (isd-turtle-format-ccs-code-point ccs code-point)
258 indent p1 (isd-turtle-format-component c1 ?\; (1+ level))
259 indent p2 (isd-turtle-format-component c2 ?\ (1+ level))
267 (defun isd-turtle-insert-char (ccs code-point)
268 (let ((ret (isd-turtle-format-char ccs code-point)))
273 (defun isd-turtle-insert-ccs-ranges (ccs &rest ranges)
274 (let (range code max-code)
276 (setq range (car ranges))
278 (setq code (car range)
279 max-code (cdr range))
280 (while (<= code max-code)
281 (isd-turtle-insert-char ccs code)
282 (setq code (1+ code)))
285 (isd-turtle-insert-char ccs range)
287 (t (error 'wrong-type-argument range)))
288 (setq ranges (cdr ranges)))))
290 (defun isd-turtle-dump-range (file path func &rest args)
292 (let ((coding-system-for-write 'utf-8-mcs-er)
294 (if (file-directory-p path)
295 (setq path (expand-file-name file path)))
297 (goto-char (point-min))
298 (dolist (ccs (sort isd-turtle-ccs-list
299 #'char-attribute-name<))
300 (insert (format "@prefix %s: <%s%s=> .\n"
301 (isd-turtle-uri-encode-feature-name ccs)
302 "http://www.chise.org/est/view/character/"
303 (www-uri-encode-feature-name ccs))))
305 (goto-char (point-min))
306 (insert "# -*- coding: utf-8-mcs-er -*-\n")
307 (insert "@prefix : <http://rdf.chise.org/rdf/property/character/isd/> .
308 @prefix idc: <http://rdf.chise.org/rdf/type/character/idc/> .\n")
309 (write-region (point-min)(point-max) path))))
312 (defun isd-turtle-dump-ucs-basic (filename)
313 (interactive "Fdump ISD-UCS-Basic : ")
314 (isd-turtle-dump-range "ISD-UCS-Basic.ttl" filename
315 #'isd-turtle-insert-ccs-ranges
316 '=ucs '(#x4E00 . #x9FA5)))
319 (defun isd-turtle-dump-ucs-ext-a (filename)
320 (interactive "Fdump ISD-UCS-Ext-A : ")
321 (isd-turtle-dump-range "ISD-UCS-Ext-A.ttl" filename
322 #'isd-turtle-insert-ccs-ranges
323 '=ucs '(#x3400 . #x4DB5) #xFA1F #xFA23))
326 (defun isd-turtle-dump-mj-0 (filename)
327 (interactive "Fdump ISD-MJ-0 : ")
328 (isd-turtle-dump-range "ISD-MJ-0.ttl" filename
329 #'isd-turtle-insert-ccs-ranges
333 (defun isd-turtle-dump-mj-1 (filename)
334 (interactive "Fdump ISD-MJ-1 : ")
335 (isd-turtle-dump-range "ISD-MJ-1.ttl" filename
336 #'isd-turtle-insert-ccs-ranges
337 '=mj '(10000 . 19999)))
340 (defun isd-turtle-dump-mj-2 (filename)
341 (interactive "Fdump ISD-MJ-2 : ")
342 (isd-turtle-dump-range "ISD-MJ-2.ttl" filename
343 #'isd-turtle-insert-ccs-ranges
344 '=mj '(20000 . 29999)))
347 (defun isd-turtle-dump-mj-3 (filename)
348 (interactive "Fdump ISD-MJ-3 : ")
349 (isd-turtle-dump-range "ISD-MJ-3.ttl" filename
350 #'isd-turtle-insert-ccs-ranges
351 '=mj '(30000 . 39999)))
354 (defun isd-turtle-dump-mj-4 (filename)
355 (interactive "Fdump ISD-MJ-4 : ")
356 (isd-turtle-dump-range "ISD-MJ-4.ttl" filename
357 #'isd-turtle-insert-ccs-ranges
358 '=mj '(40000 . 49999)))
361 (defun isd-turtle-dump-mj-5 (filename)
362 (interactive "Fdump ISD-MJ-5 : ")
363 (isd-turtle-dump-range "ISD-MJ-5.ttl" filename
364 #'isd-turtle-insert-ccs-ranges
365 '=mj '(50000 . 59999)))
368 (defun isd-turtle-dump-mj-6 (filename)
369 (interactive "Fdump ISD-MJ-6 : ")
370 (isd-turtle-dump-range "ISD-MJ-6.ttl" filename
371 #'isd-turtle-insert-ccs-ranges
372 '=mj '(60000 . 69999)))
378 (provide 'isd-turtle)
380 ;;; isd-turtle.el ends here