1 ;;; chiset-common.el --- CHISET common utility -*- coding: utf-8-er; -*-
3 ;; Copyright (C) 2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 MORIOKA Tomohiko.
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: CHISE, RDF, Turtle, WWW
8 ;; This file is part of CHISET (CHISE/Turtle).
10 ;; XEmacs CHISE is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
15 ;; XEmacs CHISE is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with XEmacs CHISE; see the file COPYING. If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 (defun decode-uri-string (string &optional coding-system)
28 (if (> (length string) 0)
32 (mapconcat (lambda (char)
35 (char-to-string char)))
37 (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
38 (setq dest (concat dest
39 (substring string i (match-beginning 0))
42 (string-to-int (match-string 1 string) 16))))
45 (concat dest (substring string i))
49 ;;; @ URI representation
52 (defun est-uri-decode-feature-name-body (uri-feature)
53 (let ((len (length uri-feature))
60 (if (eq (aref uri-feature i) ?\.)
61 (if (and (< (+ i 2) len)
62 (eq (aref uri-feature (+ i 2)) ?\.))
65 ((eq (setq ch (aref uri-feature (1+ i))) ?\.)
72 (substring uri-feature i (+ i 3))
78 (char-to-string (aref uri-feature i))
82 (defun est-uri-encode-feature-name-body (feature)
83 (mapconcat (lambda (c)
90 (t (char-to-string c))))
93 (defun www-uri-decode-feature-name (uri-feature)
95 (setq uri-feature (decode-uri-string uri-feature 'utf-8-mcs-er))
97 ((string-match "^from\\." uri-feature)
98 (intern (format "<-%s"
99 (est-uri-decode-feature-name-body
100 (substring uri-feature (match-end 0)))))
102 ((string-match "^to\\." uri-feature)
103 (intern (format "->%s"
104 (est-uri-decode-feature-name-body
105 (substring uri-feature (match-end 0)))))
107 ((string-match "^rep\\." uri-feature)
108 (intern (format "=%s"
109 (est-uri-decode-feature-name-body
110 (substring uri-feature (match-end 0)))))
112 ((string-match "^rep[2i]\\." uri-feature)
113 (intern (format "===%s"
114 (est-uri-decode-feature-name-body
115 (substring uri-feature (match-end 0)))))
117 ((string-match "^g\\." uri-feature)
118 (intern (format "=>>%s"
119 (est-uri-decode-feature-name-body
120 (substring uri-feature (match-end 0)))))
122 ((string-match "^g[i2]\\." uri-feature)
123 (intern (format "==%s"
124 (est-uri-decode-feature-name-body
125 (substring uri-feature (match-end 0)))))
127 ((string-match "^gi\\([0-9]+\\)\\." uri-feature)
128 (intern (format "=>>%s%s"
129 (make-string (string-to-int
130 (match-string 1 uri-feature))
132 (est-uri-decode-feature-name-body
133 (substring uri-feature (match-end 0)))))
135 ((string-match "^o\\." uri-feature)
136 (intern (format "=+>%s"
137 (est-uri-decode-feature-name-body
138 (substring uri-feature (match-end 0)))))
140 ((string-match "^a\\." uri-feature)
141 (intern (format "=>%s"
142 (est-uri-decode-feature-name-body
143 (substring uri-feature (match-end 0)))))
145 ((string-match "^a\\([0-9]+\\)\\." uri-feature)
146 (intern (format "%s>%s"
147 (make-string (string-to-int
148 (match-string 1 uri-feature))
150 (est-uri-decode-feature-name-body
151 (substring uri-feature (match-end 0)))))
153 ((and (setq uri-feature (est-uri-decode-feature-name-body uri-feature))
154 (setq feature (intern (format "=>%s" uri-feature)))
155 (find-charset feature))
157 ((and (setq feature (intern (format "=>>%s" uri-feature)))
158 (find-charset feature))
160 ((and (setq feature (intern (format "=>>>%s" uri-feature)))
161 (find-charset feature))
163 ((and (setq feature (intern (format "=%s" uri-feature)))
164 (find-charset feature))
166 (t (intern uri-feature)))))
168 (defun www-uri-encode-feature-name (feature-name)
169 (setq feature-name (symbol-name feature-name))
171 ((string-match "^=\\+>\\([^=>]+\\)" feature-name)
173 (est-uri-encode-feature-name-body
174 (substring feature-name (match-beginning 1))))
176 ((string-match "^=\\([^=>]+\\)" feature-name)
178 (est-uri-encode-feature-name-body
179 (substring feature-name (match-beginning 1))))
181 ((string-match "^==\\([^=>]+\\)" feature-name)
183 (est-uri-encode-feature-name-body
184 (substring feature-name (match-beginning 1))))
186 ((string-match "^===\\([^=>]+\\)" feature-name)
188 (est-uri-encode-feature-name-body
189 (substring feature-name (match-beginning 1))))
191 ((string-match "^=>>\\([^=>]+\\)" feature-name)
193 (est-uri-encode-feature-name-body
194 (substring feature-name (match-beginning 1))))
196 ((string-match "^=>>>\\([^=>]+\\)" feature-name)
198 (est-uri-encode-feature-name-body
199 (substring feature-name (match-beginning 1))))
201 ((string-match "^=>>\\(>+\\)" feature-name)
203 (length (match-string 1 feature-name))
204 (est-uri-encode-feature-name-body
205 (substring feature-name (match-end 1))))
207 ((string-match "^=>\\([^=>]+\\)" feature-name)
209 (est-uri-encode-feature-name-body
210 (substring feature-name (match-beginning 1))))
212 ((string-match "^\\(=+\\)>" feature-name)
214 (length (match-string 1 feature-name))
215 (est-uri-encode-feature-name-body
216 (substring feature-name (match-end 0))))
218 ((string-match "^->" feature-name)
220 (est-uri-encode-feature-name-body
221 (substring feature-name (match-end 0))))
223 ((string-match "^<-" feature-name)
225 (est-uri-encode-feature-name-body
226 (substring feature-name (match-end 0))))
228 (t (est-uri-encode-feature-name-body feature-name))))
231 (defvar chise-turtle-ccs-prefix-alist nil)
233 (defun chise-turtle-uri-decode-feature-name (uri-feature)
234 (cond ((string= "a.ucs" uri-feature)
236 ((string= "a.big5" uri-feature)
239 (www-uri-decode-feature-name uri-feature))))
241 (defun chise-turtle-uri-encode-ccs-name (feature-name)
243 ((eq '=ucs feature-name)
245 ((eq '=big5 feature-name)
247 ((eq '==>ucs@bucs feature-name)
250 (mapconcat (lambda (c)
261 (char-to-string c))))
262 (www-uri-encode-feature-name feature-name)
265 (defun charset-code-point-format-spec (ccs)
266 (cond ((memq ccs '(=ucs))
269 (let ((ccs-name (symbol-name ccs)))
272 "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)"
276 "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)"
279 ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name)
281 ((string-match "hanyo-denshi/tk" ccs-name)
286 (defun chise-turtle-format-ccs-code-point (ccs code-point)
287 (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs)))
288 (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist)
289 (setq chise-turtle-ccs-prefix-alist
290 (cons (cons ccs-uri ccs)
291 chise-turtle-ccs-prefix-alist)))
294 (format (charset-code-point-format-spec ccs)
297 (defun chise-turtle-encode-char (object)
300 (if (setq ret (encode-char object '=ucs))
301 (chise-turtle-format-ccs-code-point '=ucs ret)
302 (setq spec (char-attribute-alist object))
304 (setq cell (pop spec)))
305 (if (and (find-charset (car cell))
306 (setq ret (cdr cell)))
307 (setq dest (cons cell dest))))
308 (setq ret (car (sort dest (lambda (a b)
309 (char-attribute-name< (car a)(car b)))))
313 (chise-turtle-format-ccs-code-point ccs ret)
315 ((and (setq ccs (car (split-char object)))
316 (setq ret (encode-char object ccs)))
317 (chise-turtle-format-ccs-code-point ccs ret)
319 ((setq ret (get-char-attribute object 'ideographic-combination))
320 (format "ideocomb:%s"
321 (mapconcat (lambda (cell)
322 (cond ((characterp cell)
323 (char-to-string cell)
325 ((setq ret2 (find-char cell))
326 (char-to-string ret2)
334 (format "system-char-id:0x%X"
335 (encode-char object 'system-char-id))
342 (provide 'chiset-common)
344 ;;; chiset-common.el ends here