1 ;;; chiset-common.el --- CHISET common utility -*- coding: utf-8-er; -*-
3 ;; Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018,
4 ;; 2019, 2021 MORIOKA Tomohiko.
6 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
7 ;; Keywords: CHISE, RDF, Turtle, WWW
9 ;; This file is part of CHISET (CHISE/Turtle).
11 ;; XEmacs CHISE is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
16 ;; XEmacs CHISE is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 ;; General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs CHISE; see the file COPYING. If not, write to
23 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
28 (defun decode-uri-string (string &optional coding-system)
29 (if (> (length string) 0)
33 (mapconcat (lambda (char)
36 (char-to-string char)))
38 (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
39 (setq dest (concat dest
40 (substring string i (match-beginning 0))
43 (string-to-int (match-string 1 string) 16))))
46 (concat dest (substring string i))
50 ;;; @ URI representation
53 (defun est-uri-decode-feature-name-body (uri-feature)
54 (let ((len (length uri-feature))
61 (if (eq (aref uri-feature i) ?\.)
62 (if (and (< (+ i 2) len)
63 (eq (aref uri-feature (+ i 2)) ?\.))
66 ((eq (setq ch (aref uri-feature (1+ i))) ?\.)
73 (substring uri-feature i (+ i 3))
79 (char-to-string (aref uri-feature i))
83 (defun est-uri-encode-feature-name-body (feature)
84 (mapconcat (lambda (c)
91 (t (char-to-string c))))
94 (defun www-uri-decode-feature-name (uri-feature)
96 (setq uri-feature (decode-uri-string uri-feature 'utf-8-mcs-er))
98 ((string-match "^from\\." uri-feature)
99 (intern (format "<-%s"
100 (est-uri-decode-feature-name-body
101 (substring uri-feature (match-end 0)))))
103 ((string-match "^to\\." uri-feature)
104 (intern (format "->%s"
105 (est-uri-decode-feature-name-body
106 (substring uri-feature (match-end 0)))))
108 ((string-match "^meta\\." uri-feature)
109 (intern (format "*%s"
110 (est-uri-decode-feature-name-body
111 (substring uri-feature (match-end 0)))))
113 ((string-match "^rep\\." uri-feature)
114 (intern (format "=%s"
115 (est-uri-decode-feature-name-body
116 (substring uri-feature (match-end 0)))))
118 ((string-match "^rep[2i]\\." uri-feature)
119 (intern (format "===%s"
120 (est-uri-decode-feature-name-body
121 (substring uri-feature (match-end 0)))))
123 ((string-match "^g\\." uri-feature)
124 (intern (format "=>>%s"
125 (est-uri-decode-feature-name-body
126 (substring uri-feature (match-end 0)))))
128 ((string-match "^g[i2]\\." uri-feature)
129 (intern (format "==%s"
130 (est-uri-decode-feature-name-body
131 (substring uri-feature (match-end 0)))))
133 ((string-match "^gi\\([0-9]+\\)\\." uri-feature)
134 (intern (format "=>>%s%s"
135 (make-string (string-to-int
136 (match-string 1 uri-feature))
138 (est-uri-decode-feature-name-body
139 (substring uri-feature (match-end 0)))))
141 ((string-match "^o\\." uri-feature)
142 (intern (format "=+>%s"
143 (est-uri-decode-feature-name-body
144 (substring uri-feature (match-end 0)))))
146 ((string-match "^a\\." uri-feature)
147 (intern (format "=>%s"
148 (est-uri-decode-feature-name-body
149 (substring uri-feature (match-end 0)))))
151 ((string-match "^a\\([0-9]+\\)\\." uri-feature)
152 (intern (format "%s>%s"
153 (make-string (string-to-int
154 (match-string 1 uri-feature))
156 (est-uri-decode-feature-name-body
157 (substring uri-feature (match-end 0)))))
159 ((and (setq uri-feature (est-uri-decode-feature-name-body uri-feature))
160 (setq feature (intern (format "=>%s" uri-feature)))
161 (find-charset feature))
163 ((and (setq feature (intern (format "=>>%s" uri-feature)))
164 (find-charset feature))
166 ((and (setq feature (intern (format "=>>>%s" uri-feature)))
167 (find-charset feature))
169 ((and (setq feature (intern (format "=%s" uri-feature)))
170 (find-charset feature))
172 (t (intern uri-feature)))))
174 (defun www-uri-encode-feature-name (feature-name)
175 (setq feature-name (symbol-name feature-name))
177 ((string-match "^=\\+>\\([^=>]+\\)" feature-name)
179 (est-uri-encode-feature-name-body
180 (substring feature-name (match-beginning 1))))
182 ((string-match "^=\\([^=>]+\\)" feature-name)
184 (est-uri-encode-feature-name-body
185 (substring feature-name (match-beginning 1))))
187 ((string-match "^==\\([^=>]+\\)" feature-name)
189 (est-uri-encode-feature-name-body
190 (substring feature-name (match-beginning 1))))
192 ((string-match "^===\\([^=>]+\\)" feature-name)
194 (est-uri-encode-feature-name-body
195 (substring feature-name (match-beginning 1))))
197 ((string-match "^=>>\\([^=>]+\\)" feature-name)
199 (est-uri-encode-feature-name-body
200 (substring feature-name (match-beginning 1))))
202 ((string-match "^=>>>\\([^=>]+\\)" feature-name)
204 (est-uri-encode-feature-name-body
205 (substring feature-name (match-beginning 1))))
207 ((string-match "^=>>\\(>+\\)" feature-name)
209 (length (match-string 1 feature-name))
210 (est-uri-encode-feature-name-body
211 (substring feature-name (match-end 1))))
213 ((string-match "^=>\\([^=>]+\\)" feature-name)
215 (est-uri-encode-feature-name-body
216 (substring feature-name (match-beginning 1))))
218 ((string-match "^\\(=+\\)>" feature-name)
220 (length (match-string 1 feature-name))
221 (est-uri-encode-feature-name-body
222 (substring feature-name (match-end 0))))
224 ((string-match "^->" feature-name)
226 (est-uri-encode-feature-name-body
227 (substring feature-name (match-end 0))))
229 ((string-match "^<-" feature-name)
231 (est-uri-encode-feature-name-body
232 (substring feature-name (match-end 0))))
234 ((string-match "^\\*" feature-name)
236 (est-uri-encode-feature-name-body
237 (substring feature-name (match-end 0))))
239 (t (est-uri-encode-feature-name-body feature-name))))
242 (defvar chise-turtle-ccs-prefix-alist nil)
244 (defun chise-turtle-uri-decode-feature-name (uri-feature)
245 (cond ((string= "a.ucs" uri-feature)
247 ((string= "a.big5" uri-feature)
250 (www-uri-decode-feature-name uri-feature))))
252 (defun chise-turtle-uri-encode-ccs-name (feature-name)
254 ((eq '=ucs feature-name)
256 ((eq '=big5 feature-name)
258 ((eq '==>ucs@bucs feature-name)
261 (mapconcat (lambda (c)
272 (char-to-string c))))
273 (www-uri-encode-feature-name feature-name)
276 (defun charset-code-point-format-spec (ccs)
277 (cond ((memq ccs '(=ucs))
280 (let ((ccs-name (symbol-name ccs)))
283 "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)"
287 "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)"
290 ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name)
292 ((string-match "hanyo-denshi/tk" ccs-name)
297 (defun chise-turtle-format-ccs-code-point (ccs code-point)
298 (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs)))
299 (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist)
300 (setq chise-turtle-ccs-prefix-alist
301 (cons (cons ccs-uri ccs)
302 chise-turtle-ccs-prefix-alist)))
305 (format (charset-code-point-format-spec ccs)
308 (defun chise-turtle-encode-char (object)
311 (if (setq ret (encode-char object '=ucs))
312 (chise-turtle-format-ccs-code-point '=ucs ret)
313 (setq spec (char-attribute-alist object))
315 (setq cell (pop spec)))
316 (if (and (find-charset (car cell))
317 (setq ret (cdr cell)))
318 (setq dest (cons cell dest))))
319 (setq ret (car (sort dest (lambda (a b)
320 (char-attribute-name< (car a)(car b)))))
324 (chise-turtle-format-ccs-code-point ccs ret)
326 ((and (setq ccs (car (split-char object)))
327 (setq ret (encode-char object ccs)))
328 (chise-turtle-format-ccs-code-point ccs ret)
330 ((setq ret (get-char-attribute object 'ideographic-combination))
331 (format "ideocomb:%s"
332 (mapconcat (lambda (cell)
333 (cond ((characterp cell)
334 (char-to-string cell)
336 ((setq ret2 (find-char cell))
337 (char-to-string ret2)
345 (format "system-char-id:0x%X"
346 (encode-char object 'system-char-id))
353 (provide 'chiset-common)
355 ;;; chiset-common.el ends here