update.
[chise/tomoyo-tools.git] / chiset-common.el
1 ;;; chiset-common.el --- CHISET common utility -*- coding: utf-8-er; -*-
2
3 ;; Copyright (C) 2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 MORIOKA Tomohiko.
4
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: CHISE, RDF, Turtle, WWW
7
8 ;; This file is part of CHISET (CHISE/Turtle).
9
10 ;; XEmacs CHISE is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
14
15 ;; XEmacs CHISE is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 ;; General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;;; Code:
26
27 (defun decode-uri-string (string &optional coding-system)
28   (if (> (length string) 0)
29       (let ((i 0)
30             dest)
31         (setq string
32               (mapconcat (lambda (char)
33                            (if (eq char ?+)
34                                " "
35                              (char-to-string char)))
36                          string ""))
37         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
38           (setq dest (concat dest
39                              (substring string i (match-beginning 0))
40                              (char-to-string
41                               (int-char
42                                (string-to-int (match-string 1 string) 16))))
43                 i (match-end 0)))
44         (decode-coding-string
45          (concat dest (substring string i))
46          coding-system))))
47
48
49 ;;; @ URI representation
50 ;;;
51
52 (defun est-uri-decode-feature-name-body (uri-feature)
53   (let ((len (length uri-feature))
54         (i 0)
55         ch dest)
56     (while (< i len)
57       (setq dest
58             (concat
59              dest
60              (if (eq (aref uri-feature i) ?\.)
61                  (if (and (< (+ i 2) len)
62                           (eq (aref uri-feature (+ i 2)) ?\.))
63                      (prog1
64                          (cond
65                           ((eq (setq ch (aref uri-feature (1+ i))) ?\.)
66                            "/")
67                           ((eq ch ?-)
68                            "*")
69                           ((eq ch ?_)
70                            "+")
71                           (t
72                            (substring uri-feature i (+ i 3))
73                            ))
74                        (setq i (+ i 3)))
75                    (setq i (1+ i))
76                    ".")
77                (prog1
78                    (char-to-string (aref uri-feature i))
79                  (setq i (1+ i)))))))
80     dest))
81
82 (defun est-uri-encode-feature-name-body (feature)
83   (mapconcat (lambda (c)
84                (cond ((eq c ?*)
85                       ".-.")
86                      ((eq c ?/)
87                       "...")
88                      ((eq c ?+)
89                       "._.")
90                      (t (char-to-string c))))
91              feature ""))
92
93 (defun www-uri-decode-feature-name (uri-feature)
94   (let (feature)
95     (setq uri-feature (decode-uri-string uri-feature 'utf-8-mcs-er))
96     (cond
97      ((string-match "^from\\." uri-feature)
98       (intern (format "<-%s"
99                       (est-uri-decode-feature-name-body
100                        (substring uri-feature (match-end 0)))))
101       )
102      ((string-match "^to\\." uri-feature)
103       (intern (format "->%s"
104                       (est-uri-decode-feature-name-body
105                        (substring uri-feature (match-end 0)))))
106       )
107      ((string-match "^rep\\." uri-feature)
108       (intern (format "=%s"
109                       (est-uri-decode-feature-name-body
110                        (substring uri-feature (match-end 0)))))
111       )
112      ((string-match "^rep[2i]\\." uri-feature)
113       (intern (format "===%s"
114                       (est-uri-decode-feature-name-body
115                        (substring uri-feature (match-end 0)))))
116       )
117      ((string-match "^g\\." uri-feature)
118       (intern (format "=>>%s"
119                       (est-uri-decode-feature-name-body
120                        (substring uri-feature (match-end 0)))))
121       )
122      ((string-match "^g[i2]\\." uri-feature)
123       (intern (format "==%s"
124                       (est-uri-decode-feature-name-body
125                        (substring uri-feature (match-end 0)))))
126       )
127      ((string-match "^gi\\([0-9]+\\)\\." uri-feature)
128       (intern (format "=>>%s%s"
129                       (make-string (string-to-int
130                                     (match-string 1 uri-feature))
131                                    ?>)
132                       (est-uri-decode-feature-name-body
133                        (substring uri-feature (match-end 0)))))
134       )
135      ((string-match "^o\\." uri-feature)
136       (intern (format "=+>%s"
137                       (est-uri-decode-feature-name-body
138                        (substring uri-feature (match-end 0)))))
139       )
140      ((string-match "^a\\." uri-feature)
141       (intern (format "=>%s"
142                       (est-uri-decode-feature-name-body
143                        (substring uri-feature (match-end 0)))))
144       )
145      ((string-match "^a\\([0-9]+\\)\\." uri-feature)
146       (intern (format "%s>%s"
147                       (make-string (string-to-int
148                                     (match-string 1 uri-feature))
149                                    ?=)
150                       (est-uri-decode-feature-name-body
151                        (substring uri-feature (match-end 0)))))
152       )
153      ((and (setq uri-feature (est-uri-decode-feature-name-body uri-feature))
154            (setq feature (intern (format "=>%s" uri-feature)))
155            (find-charset feature))
156       feature)
157      ((and (setq feature (intern (format "=>>%s" uri-feature)))
158            (find-charset feature))
159       feature)
160      ((and (setq feature (intern (format "=>>>%s" uri-feature)))
161            (find-charset feature))
162       feature)
163      ((and (setq feature (intern (format "=%s" uri-feature)))
164            (find-charset feature))
165       feature)
166      (t (intern uri-feature)))))
167
168 (defun www-uri-encode-feature-name (feature-name)
169   (setq feature-name (symbol-name feature-name))
170   (cond
171    ((string-match "^=\\+>\\([^=>]+\\)" feature-name)
172     (concat "o."
173             (est-uri-encode-feature-name-body
174              (substring feature-name (match-beginning 1))))
175     )
176    ((string-match "^=\\([^=>]+\\)" feature-name)
177     (concat "rep."
178             (est-uri-encode-feature-name-body
179              (substring feature-name (match-beginning 1))))
180     )
181    ((string-match "^==\\([^=>]+\\)" feature-name)
182     (concat "g2."
183             (est-uri-encode-feature-name-body
184              (substring feature-name (match-beginning 1))))
185     )
186    ((string-match "^===\\([^=>]+\\)" feature-name)
187     (concat "repi."
188             (est-uri-encode-feature-name-body
189              (substring feature-name (match-beginning 1))))
190     )
191    ((string-match "^=>>\\([^=>]+\\)" feature-name)
192     (concat "g."
193             (est-uri-encode-feature-name-body
194              (substring feature-name (match-beginning 1))))
195     )
196    ((string-match "^=>>>\\([^=>]+\\)" feature-name)
197     (concat "gi."
198             (est-uri-encode-feature-name-body
199              (substring feature-name (match-beginning 1))))
200     )
201    ((string-match "^=>>\\(>+\\)" feature-name)
202     (format "gi%d.%s"
203             (length (match-string 1 feature-name))
204             (est-uri-encode-feature-name-body
205              (substring feature-name (match-end 1))))
206     )
207    ((string-match "^=>\\([^=>]+\\)" feature-name)
208     (concat "a."
209             (est-uri-encode-feature-name-body
210              (substring feature-name (match-beginning 1))))
211     )
212    ((string-match "^\\(=+\\)>" feature-name)
213     (format "a%d.%s"
214             (length (match-string 1 feature-name))
215             (est-uri-encode-feature-name-body
216              (substring feature-name (match-end 0))))
217     )
218    ((string-match "^->" feature-name)
219     (concat "to."
220             (est-uri-encode-feature-name-body
221              (substring feature-name (match-end 0))))
222     )
223    ((string-match "^<-" feature-name)
224     (concat "from."
225             (est-uri-encode-feature-name-body
226              (substring feature-name (match-end 0))))
227     )
228    (t (est-uri-encode-feature-name-body feature-name))))
229
230
231 (defvar chise-turtle-ccs-prefix-alist nil)
232
233 (defun chise-turtle-uri-decode-feature-name (uri-feature)
234   (cond ((string= "a.ucs" uri-feature)
235          '=ucs)
236         ((string= "a.big5" uri-feature)
237          '=big5)
238         (t
239          (www-uri-decode-feature-name uri-feature))))
240
241 (defun chise-turtle-uri-encode-ccs-name (feature-name)
242   (cond
243    ((eq '=ucs feature-name)
244     "a.ucs")
245    ((eq '=big5 feature-name)
246     "a.big5")
247    ((eq '==>ucs@bucs feature-name)
248     "bucs")
249    (t
250     (mapconcat (lambda (c)
251                  (cond
252                   ((eq c ?@)
253                    "_")
254                   ((eq c ?+)
255                    "._.")
256                   ((eq c ?=)
257                    ".:.")
258                   ((eq c ?|)
259                    "._cmp_.")
260                   (t
261                    (char-to-string c))))
262                (www-uri-encode-feature-name feature-name)
263                ""))))
264
265 (defun charset-code-point-format-spec (ccs)
266   (cond ((memq ccs '(=ucs))
267          "0x%04X")
268         (t
269          (let ((ccs-name (symbol-name ccs)))
270            (cond
271             ((string-match
272               "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)"
273               ccs-name)
274              "%04d")
275             ((string-match
276               "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)"
277               ccs-name)
278              "%05d")
279             ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name)
280              "%06d")
281             ((string-match "hanyo-denshi/tk" ccs-name)
282              "%08d")
283             (t
284              "0x%X"))))))
285
286 (defun chise-turtle-format-ccs-code-point (ccs code-point)
287   (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs)))
288     (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist)
289       (setq chise-turtle-ccs-prefix-alist
290             (cons (cons ccs-uri ccs)
291                   chise-turtle-ccs-prefix-alist)))
292     (format "%s:%s"
293             ccs-uri
294             (format (charset-code-point-format-spec ccs)
295                     code-point))))
296
297 (defun chise-turtle-encode-char (object)
298   (let (spec cell dest
299         ccs ret ret2)
300     (if (setq ret (encode-char object '=ucs))
301         (chise-turtle-format-ccs-code-point '=ucs ret)
302       (setq spec (char-attribute-alist object))
303       (while (and spec
304                   (setq cell (pop spec)))
305         (if (and (find-charset (car cell))
306                  (setq ret (cdr cell)))
307             (setq dest (cons cell dest))))
308       (setq ret (car (sort dest (lambda (a b)
309                                   (char-attribute-name< (car a)(car b)))))
310             ccs (car ret)
311             ret (cdr ret))
312       (cond (ret
313              (chise-turtle-format-ccs-code-point ccs ret)
314              )
315             ((and (setq ccs (car (split-char object)))
316                   (setq ret (encode-char object ccs)))
317              (chise-turtle-format-ccs-code-point ccs ret)
318              )
319             ((setq ret (get-char-attribute object 'ideographic-combination))
320              (format "ideocomb:%s"
321                      (mapconcat (lambda (cell)
322                                   (cond ((characterp cell)
323                                          (char-to-string cell)
324                                          )
325                                         ((setq ret2 (find-char cell))
326                                          (char-to-string ret2)
327                                          )
328                                         (t
329                                          (format "%S" cell)
330                                          )))
331                                 ret ""))
332              )
333             (t
334              (format "system-char-id:0x%X"
335                      (encode-char object 'system-char-id))
336              )))))
337
338
339 ;;; @ end
340 ;;;
341
342 (provide 'chiset-common)
343
344 ;;; chiset-common.el ends here