1 ;;; chiset-common.el --- CHISET common utility -*- coding: utf-8-er; -*-
3 ;; Copyright (C) 2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 MORIOKA Tomohiko.
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: CHISE, RDF, Turtle, WWW
8 ;; This file is part of CHISET (CHISE/Turtle).
10 ;; XEmacs CHISE is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
15 ;; XEmacs CHISE is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with XEmacs CHISE; see the file COPYING. If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 (defun decode-uri-string (string &optional coding-system)
28 (if (> (length string) 0)
32 (mapconcat (lambda (char)
35 (char-to-string char)))
37 (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
38 (setq dest (concat dest
39 (substring string i (match-beginning 0))
42 (string-to-int (match-string 1 string) 16))))
45 (concat dest (substring string i))
49 ;;; @ URI representation
52 (defun est-uri-decode-feature-name-body (uri-feature)
53 (let ((len (length uri-feature))
60 (if (eq (aref uri-feature i) ?\.)
61 (if (and (< (+ i 2) len)
62 (eq (aref uri-feature (+ i 2)) ?\.))
65 ((eq (setq ch (aref uri-feature (1+ i))) ?\.)
72 (substring uri-feature i (+ i 3))
78 (char-to-string (aref uri-feature i))
82 (defun est-uri-encode-feature-name-body (feature)
83 (mapconcat (lambda (c)
90 (t (char-to-string c))))
93 (defun www-uri-decode-feature-name (uri-feature)
95 (setq uri-feature (decode-uri-string uri-feature 'utf-8-mcs-er))
97 ((string-match "^from\\." uri-feature)
98 (intern (format "<-%s"
99 (est-uri-decode-feature-name-body
100 (substring uri-feature (match-end 0)))))
102 ((string-match "^to\\." uri-feature)
103 (intern (format "->%s"
104 (est-uri-decode-feature-name-body
105 (substring uri-feature (match-end 0)))))
107 ((string-match "^meta\\." uri-feature)
108 (intern (format "*%s"
109 (est-uri-decode-feature-name-body
110 (substring uri-feature (match-end 0)))))
112 ((string-match "^rep\\." uri-feature)
113 (intern (format "=%s"
114 (est-uri-decode-feature-name-body
115 (substring uri-feature (match-end 0)))))
117 ((string-match "^rep[2i]\\." uri-feature)
118 (intern (format "===%s"
119 (est-uri-decode-feature-name-body
120 (substring uri-feature (match-end 0)))))
122 ((string-match "^g\\." uri-feature)
123 (intern (format "=>>%s"
124 (est-uri-decode-feature-name-body
125 (substring uri-feature (match-end 0)))))
127 ((string-match "^g[i2]\\." uri-feature)
128 (intern (format "==%s"
129 (est-uri-decode-feature-name-body
130 (substring uri-feature (match-end 0)))))
132 ((string-match "^gi\\([0-9]+\\)\\." uri-feature)
133 (intern (format "=>>%s%s"
134 (make-string (string-to-int
135 (match-string 1 uri-feature))
137 (est-uri-decode-feature-name-body
138 (substring uri-feature (match-end 0)))))
140 ((string-match "^o\\." uri-feature)
141 (intern (format "=+>%s"
142 (est-uri-decode-feature-name-body
143 (substring uri-feature (match-end 0)))))
145 ((string-match "^a\\." uri-feature)
146 (intern (format "=>%s"
147 (est-uri-decode-feature-name-body
148 (substring uri-feature (match-end 0)))))
150 ((string-match "^a\\([0-9]+\\)\\." uri-feature)
151 (intern (format "%s>%s"
152 (make-string (string-to-int
153 (match-string 1 uri-feature))
155 (est-uri-decode-feature-name-body
156 (substring uri-feature (match-end 0)))))
158 ((and (setq uri-feature (est-uri-decode-feature-name-body uri-feature))
159 (setq feature (intern (format "=>%s" uri-feature)))
160 (find-charset feature))
162 ((and (setq feature (intern (format "=>>%s" uri-feature)))
163 (find-charset feature))
165 ((and (setq feature (intern (format "=>>>%s" uri-feature)))
166 (find-charset feature))
168 ((and (setq feature (intern (format "=%s" uri-feature)))
169 (find-charset feature))
171 (t (intern uri-feature)))))
173 (defun www-uri-encode-feature-name (feature-name)
174 (setq feature-name (symbol-name feature-name))
176 ((string-match "^=\\+>\\([^=>]+\\)" feature-name)
178 (est-uri-encode-feature-name-body
179 (substring feature-name (match-beginning 1))))
181 ((string-match "^=\\([^=>]+\\)" feature-name)
183 (est-uri-encode-feature-name-body
184 (substring feature-name (match-beginning 1))))
186 ((string-match "^==\\([^=>]+\\)" feature-name)
188 (est-uri-encode-feature-name-body
189 (substring feature-name (match-beginning 1))))
191 ((string-match "^===\\([^=>]+\\)" feature-name)
193 (est-uri-encode-feature-name-body
194 (substring feature-name (match-beginning 1))))
196 ((string-match "^=>>\\([^=>]+\\)" feature-name)
198 (est-uri-encode-feature-name-body
199 (substring feature-name (match-beginning 1))))
201 ((string-match "^=>>>\\([^=>]+\\)" feature-name)
203 (est-uri-encode-feature-name-body
204 (substring feature-name (match-beginning 1))))
206 ((string-match "^=>>\\(>+\\)" feature-name)
208 (length (match-string 1 feature-name))
209 (est-uri-encode-feature-name-body
210 (substring feature-name (match-end 1))))
212 ((string-match "^=>\\([^=>]+\\)" feature-name)
214 (est-uri-encode-feature-name-body
215 (substring feature-name (match-beginning 1))))
217 ((string-match "^\\(=+\\)>" feature-name)
219 (length (match-string 1 feature-name))
220 (est-uri-encode-feature-name-body
221 (substring feature-name (match-end 0))))
223 ((string-match "^->" feature-name)
225 (est-uri-encode-feature-name-body
226 (substring feature-name (match-end 0))))
228 ((string-match "^<-" feature-name)
230 (est-uri-encode-feature-name-body
231 (substring feature-name (match-end 0))))
233 ((string-match "^\\*" feature-name)
235 (est-uri-encode-feature-name-body
236 (substring feature-name (match-end 0))))
238 (t (est-uri-encode-feature-name-body feature-name))))
241 (defvar chise-turtle-ccs-prefix-alist nil)
243 (defun chise-turtle-uri-decode-feature-name (uri-feature)
244 (cond ((string= "a.ucs" uri-feature)
246 ((string= "a.big5" uri-feature)
249 (www-uri-decode-feature-name uri-feature))))
251 (defun chise-turtle-uri-encode-ccs-name (feature-name)
253 ((eq '=ucs feature-name)
255 ((eq '=big5 feature-name)
257 ((eq '==>ucs@bucs feature-name)
260 (mapconcat (lambda (c)
271 (char-to-string c))))
272 (www-uri-encode-feature-name feature-name)
275 (defun charset-code-point-format-spec (ccs)
276 (cond ((memq ccs '(=ucs))
279 (let ((ccs-name (symbol-name ccs)))
282 "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)"
286 "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)"
289 ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name)
291 ((string-match "hanyo-denshi/tk" ccs-name)
296 (defun chise-turtle-format-ccs-code-point (ccs code-point)
297 (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs)))
298 (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist)
299 (setq chise-turtle-ccs-prefix-alist
300 (cons (cons ccs-uri ccs)
301 chise-turtle-ccs-prefix-alist)))
304 (format (charset-code-point-format-spec ccs)
307 (defun chise-turtle-encode-char (object)
310 (if (setq ret (encode-char object '=ucs))
311 (chise-turtle-format-ccs-code-point '=ucs ret)
312 (setq spec (char-attribute-alist object))
314 (setq cell (pop spec)))
315 (if (and (find-charset (car cell))
316 (setq ret (cdr cell)))
317 (setq dest (cons cell dest))))
318 (setq ret (car (sort dest (lambda (a b)
319 (char-attribute-name< (car a)(car b)))))
323 (chise-turtle-format-ccs-code-point ccs ret)
325 ((and (setq ccs (car (split-char object)))
326 (setq ret (encode-char object ccs)))
327 (chise-turtle-format-ccs-code-point ccs ret)
329 ((setq ret (get-char-attribute object 'ideographic-combination))
330 (format "ideocomb:%s"
331 (mapconcat (lambda (cell)
332 (cond ((characterp cell)
333 (char-to-string cell)
335 ((setq ret2 (find-char cell))
336 (char-to-string ret2)
344 (format "system-char-id:0x%X"
345 (encode-char object 'system-char-id))
352 (provide 'chiset-common)
354 ;;; chiset-common.el ends here