1 ;;; isd-turtle.el --- Utility to dump ideographic-structure as Turtle files
3 ;; Copyright (C) 2017 MORIOKA Tomohiko
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: Ideographic Structures (漢字構造、解字), IDS, CHISE, RDF, Turtle
8 ;; This file is a part of CHISE-ISD (Ideographic Structure Database).
10 ;; This program is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
15 ;; This program is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with this program; see the file COPYING. If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 (require 'cwiki-common)
29 (defvar isd-url-prefix "http://rdf.chise.org/data/")
31 (setq est-coded-charset-priority-list
36 =jis-x0208 =jis-x0208@1990
38 =jis-x0213-1@2000 =jis-x0213-1@2004
47 =cns11643-1 =cns11643-2 =cns11643-3
48 =cns11643-4 =cns11643-5 =cns11643-6 =cns11643-7
55 =zinbun-oracle =>zinbun-oracle
59 =>jis-x0208 =>jis-x0213-1
66 =+>ucs@iso =+>ucs@unicode
67 =>ucs@jis =>ucs@cns =>ucs@ks
68 =>>ucs@iso =>>ucs@unicode
69 =>>ucs@jis =>>ucs@cns =>>ucs@ks
71 =>>jis-x0208 =>>jis-x0213-1 =>>jis-x0213-2
72 =+>jis-x0208 =+>jis-x0213-1 =+>jis-x0213-2
77 =jis-x0208@1983 =jis-x0208@1978
85 (defvar isd-turtle-ccs-list nil)
87 (defun charset-code-point-format-spec (ccs)
88 (cond ((memq ccs '(=ucs))
91 =gt-k =daikanwa =adobe-japan1
92 =cbeta =zinbun-oracle))
94 ((memq ccs '(=hanyo-denshi/ks
100 (defun isd-turtle-uri-encode-feature-name (feature-name)
102 ((eq '=ucs feature-name)
104 ((eq '==>ucs@bucs feature-name)
107 (mapconcat (lambda (c)
111 (www-uri-encode-feature-name feature-name)
114 (defun isd-turtle-format-ccs-code-point (ccs code-point)
116 (isd-turtle-uri-encode-feature-name ccs)
117 (format (charset-code-point-format-spec ccs)
120 ;; (defun isd-turtle-encode-char (char)
121 ;; (let ((ucs (encode-char char '=ucs)))
123 ;; (format "ucs:0x%04X" ucs)
124 ;; (www-uri-encode-object char))))
126 (defun isd-turtle-encode-char (object)
127 (let ((ccs-list est-coded-charset-priority-list)
129 (if (setq ret (encode-char object '=ucs))
131 ;; (format "a.ucs:0x%04X" ret)
132 (isd-turtle-format-ccs-code-point '=ucs ret)
133 (unless (memq '=ucs isd-turtle-ccs-list)
134 (setq isd-turtle-ccs-list (cons '=ucs isd-turtle-ccs-list))))
136 (setq ccs (pop ccs-list))
137 (not (setq ret (encode-char object ccs 'defined-only)))))
139 (unless (memq ccs isd-turtle-ccs-list)
140 (setq isd-turtle-ccs-list (cons ccs isd-turtle-ccs-list)))
141 ;; (format (cond ((memq ccs '(=gt
142 ;; =gt-k =daikanwa =adobe-japan1
143 ;; =cbeta =zinbun-oracle))
145 ;; ((memq ccs '(=hanyo-denshi/ks
151 ;; (isd-turtle-uri-encode-feature-name ccs)
153 (isd-turtle-format-ccs-code-point ccs ret)
155 ((and (setq ccs (car (split-char object)))
156 (setq ret (encode-char object ccs)))
157 (unless (memq ccs isd-turtle-ccs-list)
158 (setq isd-turtle-ccs-list (cons ccs isd-turtle-ccs-list)))
160 ;; (isd-turtle-uri-encode-feature-name ccs)
162 (isd-turtle-format-ccs-code-point ccs ret)
165 (format (if est-hide-cgi-mode
166 "system-char-id=0x%X"
167 "system-char-id:0x%X")
168 (encode-char object 'system-char-id))
171 (defun isd-turtle-format-component (component separator level)
172 (cond ((characterp component)
174 (isd-turtle-encode-char component)
179 (let ((ret (find-char component)))
182 (isd-turtle-encode-char ret) separator ret))
183 ((setq ret (assq 'ideographic-structure component))
184 (if (eq separator ?\;)
186 (isd-turtle-format-char nil (cdr ret) (1+ level)))
187 (isd-turtle-format-char nil (cdr ret) (1+ level)))))))))
189 (defun isd-turtle-format-char (char &optional ids-list level)
191 (setq ids-list (get-char-attribute char 'ideographic-structure)))
194 (let ((indent (make-string (* level 4) ?\ ))
197 (c1 (nth 1 ids-list))
198 (c2 (nth 2 ids-list))
199 (c3 (nth 3 ids-list))
202 (setq idc (plist-get idc :char)))
204 (setq ret (find-char idc)))
207 (setq ret (find-char c1)))
210 (setq ret (find-char c2)))
213 (setq ret (find-char c3)))
216 ((eq idc ?\u2FF0) ; ⿰
234 ((memq idc '(?⿴ ?⿵ ?⿶ ?⿷ ?⿸ ?⿹ ?⿺))
245 %s :structure [ a idc:%c ;
251 (isd-turtle-format-component char ?\ 0)
254 indent p1 (isd-turtle-format-component c1 ?\; (1+ level))
255 indent p2 (isd-turtle-format-component c2 ?\; (1+ level))
256 indent p3 (isd-turtle-format-component c3 ?\ (1+ level))
265 %s :structure [ a idc:%c ;
270 (isd-turtle-format-component char ?\ 0)
273 indent p1 (isd-turtle-format-component c1 ?\; (1+ level))
274 indent p2 (isd-turtle-format-component c2 ?\ (1+ level))
282 (defun isd-turtle-insert-char (char)
283 (let ((ret (isd-turtle-format-char char)))
288 (defun isd-turtle-insert-ccs-ranges (ccs &rest ranges)
289 (let (range code max-code char)
291 (setq range (car ranges))
293 (setq code (car range)
294 max-code (cdr range))
295 (while (<= code max-code)
296 (if (setq char (decode-char ccs code))
297 (isd-turtle-insert-char char))
298 (setq code (1+ code))))
300 (if (setq char (decode-char ccs code))
301 (isd-turtle-insert-char char)))
302 (t (error 'wrong-type-argument range)))
303 (setq ranges (cdr ranges)))))
305 (defun isd-turtle-dump-range (file path func &rest args)
307 (let ((coding-system-for-write 'utf-8-mcs-er)
309 (if (file-directory-p path)
310 (setq path (expand-file-name file path)))
312 (goto-char (point-min))
313 (dolist (ccs (sort isd-turtle-ccs-list
314 #'char-attribute-name<))
315 (insert (format "@prefix %s: <%s%s=> .\n"
316 (isd-turtle-uri-encode-feature-name ccs)
317 "http://www.chise.org/est/view/character/"
318 (www-uri-encode-feature-name ccs))))
320 (goto-char (point-min))
321 (insert "# -*- coding: utf-8-mcs-er -*-\n")
322 (insert "@prefix : <http://rdf.chise.org/rdf/property/character/isd/> .
323 @prefix idc: <http://rdf.chise.org/rdf/type/character/idc/> .\n")
324 (write-region (point-min)(point-max) path))))
327 (defun isd-turtle-dump-ucs-basic (filename)
328 (interactive "Fdump ISD-UCS-Basic : ")
329 (isd-turtle-dump-range "ISD-UCS-Basic.ttl" filename
330 #'isd-turtle-insert-ccs-ranges
331 'ucs '(#x4E00 . #x9FA5)))
334 (defun isd-turtle-dump-ucs-ext-a (filename)
335 (interactive "Fdump ISD-UCS-Ext-A : ")
336 (isd-turtle-dump-range "ISD-UCS-Ext-A.ttl" filename
337 #'isd-turtle-insert-ccs-ranges
338 'ucs '(#x3400 . #x4DB5) #xFA1F #xFA23))
345 (provide 'isd-turtle)
347 ;;; isd-turtle.el ends here