1 ;;; chiset-common.el --- CHISET common utility -*- coding: utf-8-er; -*-
3 ;; Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018,
4 ;; 2019, 2021 MORIOKA Tomohiko.
6 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
7 ;; Keywords: CHISE, RDF, Turtle, WWW
9 ;; This file is part of CHISET (CHISE/Turtle).
11 ;; XEmacs CHISE is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
16 ;; XEmacs CHISE is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 ;; General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs CHISE; see the file COPYING. If not, write to
23 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
28 (defun decode-uri-string (string &optional coding-system)
29 (if (> (length string) 0)
33 (mapconcat (lambda (char)
36 (char-to-string char)))
38 (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
39 (setq dest (concat dest
40 (substring string i (match-beginning 0))
43 (string-to-int (match-string 1 string) 16))))
46 (concat dest (substring string i))
50 ;;; @ URI representation
53 (defun est-uri-decode-feature-name-body (uri-feature)
54 (let ((len (length uri-feature))
61 (if (eq (aref uri-feature i) ?\.)
62 (if (and (< (+ i 2) len)
63 (eq (aref uri-feature (+ i 2)) ?\.))
66 ((eq (setq ch (aref uri-feature (1+ i))) ?\.)
73 (substring uri-feature i (+ i 3))
79 (char-to-string (aref uri-feature i))
83 (defun est-uri-encode-feature-name-body (feature)
84 (mapconcat (lambda (c)
91 (t (char-to-string c))))
94 (defun est-uri-decode-feature-name (uri-feature)
96 (setq uri-feature (decode-uri-string uri-feature 'utf-8-mcs-er))
98 ((string-match "^from\\." uri-feature)
99 (intern (format "<-%s"
100 (est-uri-decode-feature-name-body
101 (substring uri-feature (match-end 0)))))
103 ((string-match "^to\\." uri-feature)
104 (intern (format "->%s"
105 (est-uri-decode-feature-name-body
106 (substring uri-feature (match-end 0)))))
108 ((string-match "^meta\\." uri-feature)
109 (intern (format "*%s"
110 (est-uri-decode-feature-name-body
111 (substring uri-feature (match-end 0)))))
113 ((string-match "^rep\\." uri-feature)
114 (intern (format "=%s"
115 (est-uri-decode-feature-name-body
116 (substring uri-feature (match-end 0)))))
118 ((string-match "^rep[2i]\\." uri-feature)
119 (intern (format "===%s"
120 (est-uri-decode-feature-name-body
121 (substring uri-feature (match-end 0)))))
123 ((string-match "^g\\." uri-feature)
124 (intern (format "=>>%s"
125 (est-uri-decode-feature-name-body
126 (substring uri-feature (match-end 0)))))
128 ((string-match "^g[i2]\\." uri-feature)
129 (intern (format "==%s"
130 (est-uri-decode-feature-name-body
131 (substring uri-feature (match-end 0)))))
133 ((string-match "^gi\\([0-9]+\\)\\." uri-feature)
134 (intern (format "=>>%s%s"
135 (make-string (string-to-int
136 (match-string 1 uri-feature))
138 (est-uri-decode-feature-name-body
139 (substring uri-feature (match-end 0)))))
141 ((string-match "^o\\." uri-feature)
142 (intern (format "=+>%s"
143 (est-uri-decode-feature-name-body
144 (substring uri-feature (match-end 0)))))
146 ((string-match "^a\\." uri-feature)
147 (intern (format "=>%s"
148 (est-uri-decode-feature-name-body
149 (substring uri-feature (match-end 0)))))
151 ((string-match "^a\\([0-9]+\\)\\." uri-feature)
152 (intern (format "%s>%s"
153 (make-string (string-to-int
154 (match-string 1 uri-feature))
156 (est-uri-decode-feature-name-body
157 (substring uri-feature (match-end 0)))))
160 (intern (est-uri-decode-feature-name-body uri-feature))
163 (defun www-uri-decode-feature-name (uri-feature)
165 (setq uri-feature (decode-uri-string uri-feature 'utf-8-mcs-er))
167 ((string-match "^from\\." uri-feature)
168 (intern (format "<-%s"
169 (est-uri-decode-feature-name-body
170 (substring uri-feature (match-end 0)))))
172 ((string-match "^to\\." uri-feature)
173 (intern (format "->%s"
174 (est-uri-decode-feature-name-body
175 (substring uri-feature (match-end 0)))))
177 ((string-match "^meta\\." uri-feature)
178 (intern (format "*%s"
179 (est-uri-decode-feature-name-body
180 (substring uri-feature (match-end 0)))))
182 ((string-match "^rep\\." uri-feature)
183 (intern (format "=%s"
184 (est-uri-decode-feature-name-body
185 (substring uri-feature (match-end 0)))))
187 ((string-match "^rep[2i]\\." uri-feature)
188 (intern (format "===%s"
189 (est-uri-decode-feature-name-body
190 (substring uri-feature (match-end 0)))))
192 ((string-match "^g\\." uri-feature)
193 (intern (format "=>>%s"
194 (est-uri-decode-feature-name-body
195 (substring uri-feature (match-end 0)))))
197 ((string-match "^g[i2]\\." uri-feature)
198 (intern (format "==%s"
199 (est-uri-decode-feature-name-body
200 (substring uri-feature (match-end 0)))))
202 ((string-match "^gi\\([0-9]+\\)\\." uri-feature)
203 (intern (format "=>>%s%s"
204 (make-string (string-to-int
205 (match-string 1 uri-feature))
207 (est-uri-decode-feature-name-body
208 (substring uri-feature (match-end 0)))))
210 ((string-match "^o\\." uri-feature)
211 (intern (format "=+>%s"
212 (est-uri-decode-feature-name-body
213 (substring uri-feature (match-end 0)))))
215 ((string-match "^a\\." uri-feature)
216 (intern (format "=>%s"
217 (est-uri-decode-feature-name-body
218 (substring uri-feature (match-end 0)))))
220 ((string-match "^a\\([0-9]+\\)\\." uri-feature)
221 (intern (format "%s>%s"
222 (make-string (string-to-int
223 (match-string 1 uri-feature))
225 (est-uri-decode-feature-name-body
226 (substring uri-feature (match-end 0)))))
228 ((and (setq uri-feature (est-uri-decode-feature-name-body uri-feature))
229 (setq feature (intern (format "=>%s" uri-feature)))
230 (find-charset feature))
232 ((and (setq feature (intern (format "=>>%s" uri-feature)))
233 (find-charset feature))
235 ((and (setq feature (intern (format "=>>>%s" uri-feature)))
236 (find-charset feature))
238 ((and (setq feature (intern (format "=%s" uri-feature)))
239 (find-charset feature))
241 (t (intern uri-feature)))))
243 (defun www-uri-encode-feature-name (feature-name)
244 (setq feature-name (format "%s" feature-name))
246 ((string-match "^=\\+>\\([^=>]+\\)" feature-name)
248 (est-uri-encode-feature-name-body
249 (substring feature-name (match-beginning 1))))
251 ((string-match "^=\\([^=>]+\\)" feature-name)
253 (est-uri-encode-feature-name-body
254 (substring feature-name (match-beginning 1))))
256 ((string-match "^==\\([^=>]+\\)" feature-name)
258 (est-uri-encode-feature-name-body
259 (substring feature-name (match-beginning 1))))
261 ((string-match "^===\\([^=>]+\\)" feature-name)
263 (est-uri-encode-feature-name-body
264 (substring feature-name (match-beginning 1))))
266 ((string-match "^=>>\\([^=>]+\\)" feature-name)
268 (est-uri-encode-feature-name-body
269 (substring feature-name (match-beginning 1))))
271 ((string-match "^=>>>\\([^=>]+\\)" feature-name)
273 (est-uri-encode-feature-name-body
274 (substring feature-name (match-beginning 1))))
276 ((string-match "^=>>\\(>+\\)" feature-name)
278 (length (match-string 1 feature-name))
279 (est-uri-encode-feature-name-body
280 (substring feature-name (match-end 1))))
282 ((string-match "^=>\\([^=>]+\\)" feature-name)
284 (est-uri-encode-feature-name-body
285 (substring feature-name (match-beginning 1))))
287 ((string-match "^\\(=+\\)>" feature-name)
289 (length (match-string 1 feature-name))
290 (est-uri-encode-feature-name-body
291 (substring feature-name (match-end 0))))
293 ((string-match "^->" feature-name)
295 (est-uri-encode-feature-name-body
296 (substring feature-name (match-end 0))))
298 ((string-match "^<-" feature-name)
300 (est-uri-encode-feature-name-body
301 (substring feature-name (match-end 0))))
303 ((string-match "^\\*" feature-name)
305 (est-uri-encode-feature-name-body
306 (substring feature-name (match-end 0))))
308 (t (est-uri-encode-feature-name-body feature-name))))
311 (defvar chise-turtle-ccs-prefix-alist nil)
313 (defun chise-turtle-uri-decode-feature-name (uri-feature)
314 (cond ((string= "a.ucs" uri-feature)
316 ((string= "a.big5" uri-feature)
319 (www-uri-decode-feature-name uri-feature))))
321 (defun chise-turtle-uri-encode-ccs-name (feature-name)
323 ((eq '=ucs feature-name)
325 ((eq '=big5 feature-name)
327 ((eq '==>ucs@bucs feature-name)
330 (mapconcat (lambda (c)
341 (char-to-string c))))
342 (www-uri-encode-feature-name feature-name)
345 (defun charset-code-point-format-spec (ccs)
346 (cond ((memq ccs '(=ucs))
349 (let ((ccs-name (symbol-name ccs)))
352 "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)"
356 "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)"
359 ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name)
361 ((string-match "hanyo-denshi/tk" ccs-name)
366 (defun chise-turtle-format-ccs-code-point (ccs code-point)
367 (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs)))
368 (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist)
369 (setq chise-turtle-ccs-prefix-alist
370 (cons (cons ccs-uri ccs)
371 chise-turtle-ccs-prefix-alist)))
374 (format (charset-code-point-format-spec ccs)
377 (defun chise-turtle-encode-char (object)
380 (if (setq ret (encode-char object '=ucs))
381 (chise-turtle-format-ccs-code-point '=ucs ret)
382 (setq spec (char-attribute-alist object))
384 (setq cell (pop spec)))
385 (if (and (find-charset (car cell))
386 (setq ret (cdr cell)))
387 (setq dest (cons cell dest))))
388 (setq ret (car (sort dest (lambda (a b)
389 (char-attribute-name< (car a)(car b)))))
393 (chise-turtle-format-ccs-code-point ccs ret)
395 ((and (setq ccs (car (split-char object)))
396 (setq ret (encode-char object ccs)))
397 (chise-turtle-format-ccs-code-point ccs ret)
399 ((setq ret (get-char-attribute object 'ideographic-combination))
400 (format "ideocomb:%s"
401 (mapconcat (lambda (cell)
402 (cond ((characterp cell)
403 (char-to-string cell)
405 ((setq ret2 (find-char cell))
406 (char-to-string ret2)
414 (format "system-char-id:0x%X"
415 (encode-char object 'system-char-id))
422 (provide 'chiset-common)
424 ;;; chiset-common.el ends here