1 ;;; chiset-common.el --- CHISET common utility -*- coding: utf-8-er; -*-
3 ;; Copyright (C) 2010,2011,2012,2013,2014,2015,2016,2017,2018 MORIOKA Tomohiko.
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: CHISE, RDF, Turtle, WWW
8 ;; This file is part of CHISET (CHISE/Turtle).
10 ;; XEmacs CHISE is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
15 ;; XEmacs CHISE is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with XEmacs CHISE; see the file COPYING. If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 (defun decode-uri-string (string &optional coding-system)
28 (if (> (length string) 0)
32 (mapconcat (lambda (char)
35 (char-to-string char)))
37 (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
38 (setq dest (concat dest
39 (substring string i (match-beginning 0))
42 (string-to-int (match-string 1 string) 16))))
45 (concat dest (substring string i))
49 ;;; @ URI representation
52 (defun est-uri-decode-feature-name-body (uri-feature)
53 (let ((len (length uri-feature))
60 (if (eq (aref uri-feature i) ?\.)
61 (if (and (< (+ i 2) len)
62 (eq (aref uri-feature (+ i 2)) ?\.))
65 ((eq (setq ch (aref uri-feature (1+ i))) ?\.)
70 (substring uri-feature i (+ i 3))
76 (char-to-string (aref uri-feature i))
80 (defun est-uri-encode-feature-name-body (feature)
81 (mapconcat (lambda (c)
86 (t (char-to-string c))))
89 (defun www-uri-decode-feature-name (uri-feature)
91 (setq uri-feature (decode-uri-string uri-feature 'utf-8-mcs-er))
93 ((string-match "^from\\." uri-feature)
94 (intern (format "<-%s"
95 (est-uri-decode-feature-name-body
96 (substring uri-feature (match-end 0)))))
98 ((string-match "^to\\." uri-feature)
99 (intern (format "->%s"
100 (est-uri-decode-feature-name-body
101 (substring uri-feature (match-end 0)))))
103 ((string-match "^rep\\." uri-feature)
104 (intern (format "=%s"
105 (est-uri-decode-feature-name-body
106 (substring uri-feature (match-end 0)))))
108 ((string-match "^rep[2i]\\." uri-feature)
109 (intern (format "===%s"
110 (est-uri-decode-feature-name-body
111 (substring uri-feature (match-end 0)))))
113 ((string-match "^g\\." uri-feature)
114 (intern (format "=>>%s"
115 (est-uri-decode-feature-name-body
116 (substring uri-feature (match-end 0)))))
118 ((string-match "^g[i2]\\." uri-feature)
119 (intern (format "==%s"
120 (est-uri-decode-feature-name-body
121 (substring uri-feature (match-end 0)))))
123 ((string-match "^gi\\([0-9]+\\)\\." uri-feature)
124 (intern (format "=>>%s%s"
125 (make-string (string-to-int
126 (match-string 1 uri-feature))
128 (est-uri-decode-feature-name-body
129 (substring uri-feature (match-end 0)))))
131 ((string-match "^o\\." uri-feature)
132 (intern (format "=+>%s"
133 (est-uri-decode-feature-name-body
134 (substring uri-feature (match-end 0)))))
136 ((string-match "^a\\." uri-feature)
137 (intern (format "=>%s"
138 (est-uri-decode-feature-name-body
139 (substring uri-feature (match-end 0)))))
141 ((string-match "^a\\([0-9]+\\)\\." uri-feature)
142 (intern (format "%s>%s"
143 (make-string (string-to-int
144 (match-string 1 uri-feature))
146 (est-uri-decode-feature-name-body
147 (substring uri-feature (match-end 0)))))
149 ((and (setq uri-feature (est-uri-decode-feature-name-body uri-feature))
150 (setq feature (intern (format "=>%s" uri-feature)))
151 (find-charset feature))
153 ((and (setq feature (intern (format "=>>%s" uri-feature)))
154 (find-charset feature))
156 ((and (setq feature (intern (format "=>>>%s" uri-feature)))
157 (find-charset feature))
159 ((and (setq feature (intern (format "=%s" uri-feature)))
160 (find-charset feature))
162 (t (intern uri-feature)))))
164 (defun www-uri-encode-feature-name (feature-name)
165 (setq feature-name (symbol-name feature-name))
167 ((string-match "^=\\+>\\([^=>]+\\)" feature-name)
169 (est-uri-encode-feature-name-body
170 (substring feature-name (match-beginning 1))))
172 ((string-match "^=\\([^=>]+\\)" feature-name)
174 (est-uri-encode-feature-name-body
175 (substring feature-name (match-beginning 1))))
177 ((string-match "^==\\([^=>]+\\)" feature-name)
179 (est-uri-encode-feature-name-body
180 (substring feature-name (match-beginning 1))))
182 ((string-match "^===\\([^=>]+\\)" feature-name)
184 (est-uri-encode-feature-name-body
185 (substring feature-name (match-beginning 1))))
187 ((string-match "^=>>\\([^=>]+\\)" feature-name)
189 (est-uri-encode-feature-name-body
190 (substring feature-name (match-beginning 1))))
192 ((string-match "^=>>>\\([^=>]+\\)" feature-name)
194 (est-uri-encode-feature-name-body
195 (substring feature-name (match-beginning 1))))
197 ((string-match "^=>>\\(>+\\)" feature-name)
199 (length (match-string 1 feature-name))
200 (est-uri-encode-feature-name-body
201 (substring feature-name (match-end 1))))
203 ((string-match "^=>\\([^=>]+\\)" feature-name)
205 (est-uri-encode-feature-name-body
206 (substring feature-name (match-beginning 1))))
208 ((string-match "^\\(=+\\)>" feature-name)
210 (length (match-string 1 feature-name))
211 (est-uri-encode-feature-name-body
212 (substring feature-name (match-end 0))))
214 ((string-match "^->" feature-name)
216 (est-uri-encode-feature-name-body
217 (substring feature-name (match-end 0))))
219 ((string-match "^<-" feature-name)
221 (est-uri-encode-feature-name-body
222 (substring feature-name (match-end 0))))
224 (t (est-uri-encode-feature-name-body feature-name))))
227 (defvar chise-turtle-ccs-prefix-alist nil)
229 (defun chise-turtle-uri-decode-feature-name (uri-feature)
230 (cond ((string= "a.ucs" uri-feature)
232 ((string= "a.big5" uri-feature)
235 (www-uri-decode-feature-name uri-feature))))
237 (defun chise-turtle-uri-encode-ccs-name (feature-name)
239 ((eq '=ucs feature-name)
241 ((eq '=big5 feature-name)
243 ((eq '==>ucs@bucs feature-name)
246 (mapconcat (lambda (c)
257 (char-to-string c))))
258 (www-uri-encode-feature-name feature-name)
261 (defun charset-code-point-format-spec (ccs)
262 (cond ((memq ccs '(=ucs))
265 (let ((ccs-name (symbol-name ccs)))
268 "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)"
272 "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)"
275 ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name)
277 ((string-match "hanyo-denshi/tk" ccs-name)
282 (defun chise-turtle-format-ccs-code-point (ccs code-point)
283 (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs)))
284 (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist)
285 (setq chise-turtle-ccs-prefix-alist
286 (cons (cons ccs-uri ccs)
287 chise-turtle-ccs-prefix-alist)))
290 (format (charset-code-point-format-spec ccs)
293 (defun chise-turtle-encode-char (object)
296 (if (setq ret (encode-char object '=ucs))
297 (chise-turtle-format-ccs-code-point '=ucs ret)
298 (setq spec (char-attribute-alist object))
300 (setq cell (pop spec)))
301 (if (and (find-charset (car cell))
302 (setq ret (cdr cell)))
303 (setq dest (cons cell dest))))
304 (setq ret (car (sort dest (lambda (a b)
305 (char-attribute-name< (car a)(car b)))))
309 (chise-turtle-format-ccs-code-point ccs ret)
311 ((and (setq ccs (car (split-char object)))
312 (setq ret (encode-char object ccs)))
313 (chise-turtle-format-ccs-code-point ccs ret)
315 ((setq ret (get-char-attribute object 'ideographic-combination))
316 (format "ideocomb:%s"
317 (mapconcat (lambda (cell)
318 (cond ((characterp cell)
319 (char-to-string cell)
321 ((setq ret2 (find-char cell))
322 (char-to-string ret2)
330 (format "system-char-id:0x%X"
331 (encode-char object 'system-char-id))
338 (provide 'chiset-common)
340 ;;; chiset-common.el ends here