(tomoyo-modules-to-compile): Add `chiset-common', `isd-turtle' and
[chise/tomoyo-tools.git] / chiset-common.el
1 ;;; chiset-common.el --- CHISET common utility -*- coding: utf-8-er; -*-
2
3 ;; Copyright (C) 2010,2011,2012,2013,2014,2015,2016,2017,2018 MORIOKA Tomohiko.
4
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: CHISE, RDF, Turtle, WWW
7
8 ;; This file is part of CHISET (CHISE/Turtle).
9
10 ;; XEmacs CHISE is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
14
15 ;; XEmacs CHISE is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 ;; General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;;; Code:
26
27 (defun decode-uri-string (string &optional coding-system)
28   (if (> (length string) 0)
29       (let ((i 0)
30             dest)
31         (setq string
32               (mapconcat (lambda (char)
33                            (if (eq char ?+)
34                                " "
35                              (char-to-string char)))
36                          string ""))
37         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
38           (setq dest (concat dest
39                              (substring string i (match-beginning 0))
40                              (char-to-string
41                               (int-char
42                                (string-to-int (match-string 1 string) 16))))
43                 i (match-end 0)))
44         (decode-coding-string
45          (concat dest (substring string i))
46          coding-system))))
47
48
49 ;;; @ URI representation
50 ;;;
51
52 (defun est-uri-decode-feature-name-body (uri-feature)
53   (let ((len (length uri-feature))
54         (i 0)
55         ch dest)
56     (while (< i len)
57       (setq dest
58             (concat
59              dest
60              (if (eq (aref uri-feature i) ?\.)
61                  (if (and (< (+ i 2) len)
62                           (eq (aref uri-feature (+ i 2)) ?\.))
63                      (prog1
64                          (cond
65                           ((eq (setq ch (aref uri-feature (1+ i))) ?\.)
66                            "/")
67                           ((eq ch ?-)
68                            "*")
69                           (t
70                            (substring uri-feature i (+ i 3))
71                            ))
72                        (setq i (+ i 3)))
73                    (setq i (1+ i))
74                    ".")
75                (prog1
76                    (char-to-string (aref uri-feature i))
77                  (setq i (1+ i)))))))
78     dest))
79
80 (defun est-uri-encode-feature-name-body (feature)
81   (mapconcat (lambda (c)
82                (cond ((eq c ?*)
83                       ".-.")
84                      ((eq c ?/)
85                       "...")
86                      (t (char-to-string c))))
87              feature ""))
88
89 (defun www-uri-decode-feature-name (uri-feature)
90   (let (feature)
91     (setq uri-feature (decode-uri-string uri-feature 'utf-8-mcs-er))
92     (cond
93      ((string-match "^from\\." uri-feature)
94       (intern (format "<-%s"
95                       (est-uri-decode-feature-name-body
96                        (substring uri-feature (match-end 0)))))
97       )
98      ((string-match "^to\\." uri-feature)
99       (intern (format "->%s"
100                       (est-uri-decode-feature-name-body
101                        (substring uri-feature (match-end 0)))))
102       )
103      ((string-match "^rep\\." uri-feature)
104       (intern (format "=%s"
105                       (est-uri-decode-feature-name-body
106                        (substring uri-feature (match-end 0)))))
107       )
108      ((string-match "^rep[2i]\\." uri-feature)
109       (intern (format "===%s"
110                       (est-uri-decode-feature-name-body
111                        (substring uri-feature (match-end 0)))))
112       )
113      ((string-match "^g\\." uri-feature)
114       (intern (format "=>>%s"
115                       (est-uri-decode-feature-name-body
116                        (substring uri-feature (match-end 0)))))
117       )
118      ((string-match "^g[i2]\\." uri-feature)
119       (intern (format "==%s"
120                       (est-uri-decode-feature-name-body
121                        (substring uri-feature (match-end 0)))))
122       )
123      ((string-match "^gi\\([0-9]+\\)\\." uri-feature)
124       (intern (format "=>>%s%s"
125                       (make-string (string-to-int
126                                     (match-string 1 uri-feature))
127                                    ?>)
128                       (est-uri-decode-feature-name-body
129                        (substring uri-feature (match-end 0)))))
130       )
131      ((string-match "^o\\." uri-feature)
132       (intern (format "=+>%s"
133                       (est-uri-decode-feature-name-body
134                        (substring uri-feature (match-end 0)))))
135       )
136      ((string-match "^a\\." uri-feature)
137       (intern (format "=>%s"
138                       (est-uri-decode-feature-name-body
139                        (substring uri-feature (match-end 0)))))
140       )
141      ((string-match "^a\\([0-9]+\\)\\." uri-feature)
142       (intern (format "%s>%s"
143                       (make-string (string-to-int
144                                     (match-string 1 uri-feature))
145                                    ?=)
146                       (est-uri-decode-feature-name-body
147                        (substring uri-feature (match-end 0)))))
148       )
149      ((and (setq uri-feature (est-uri-decode-feature-name-body uri-feature))
150            (setq feature (intern (format "=>%s" uri-feature)))
151            (find-charset feature))
152       feature)
153      ((and (setq feature (intern (format "=>>%s" uri-feature)))
154            (find-charset feature))
155       feature)
156      ((and (setq feature (intern (format "=>>>%s" uri-feature)))
157            (find-charset feature))
158       feature)
159      ((and (setq feature (intern (format "=%s" uri-feature)))
160            (find-charset feature))
161       feature)
162      (t (intern uri-feature)))))
163
164 (defun www-uri-encode-feature-name (feature-name)
165   (setq feature-name (symbol-name feature-name))
166   (cond
167    ((string-match "^=\\+>\\([^=>]+\\)" feature-name)
168     (concat "o."
169             (est-uri-encode-feature-name-body
170              (substring feature-name (match-beginning 1))))
171     )
172    ((string-match "^=\\([^=>]+\\)" feature-name)
173     (concat "rep."
174             (est-uri-encode-feature-name-body
175              (substring feature-name (match-beginning 1))))
176     )
177    ((string-match "^==\\([^=>]+\\)" feature-name)
178     (concat "g2."
179             (est-uri-encode-feature-name-body
180              (substring feature-name (match-beginning 1))))
181     )
182    ((string-match "^===\\([^=>]+\\)" feature-name)
183     (concat "repi."
184             (est-uri-encode-feature-name-body
185              (substring feature-name (match-beginning 1))))
186     )
187    ((string-match "^=>>\\([^=>]+\\)" feature-name)
188     (concat "g."
189             (est-uri-encode-feature-name-body
190              (substring feature-name (match-beginning 1))))
191     )
192    ((string-match "^=>>>\\([^=>]+\\)" feature-name)
193     (concat "gi."
194             (est-uri-encode-feature-name-body
195              (substring feature-name (match-beginning 1))))
196     )
197    ((string-match "^=>>\\(>+\\)" feature-name)
198     (format "gi%d.%s"
199             (length (match-string 1 feature-name))
200             (est-uri-encode-feature-name-body
201              (substring feature-name (match-end 1))))
202     )
203    ((string-match "^=>\\([^=>]+\\)" feature-name)
204     (concat "a."
205             (est-uri-encode-feature-name-body
206              (substring feature-name (match-beginning 1))))
207     )
208    ((string-match "^\\(=+\\)>" feature-name)
209     (format "a%d.%s"
210             (length (match-string 1 feature-name))
211             (est-uri-encode-feature-name-body
212              (substring feature-name (match-end 0))))
213     )
214    ((string-match "^->" feature-name)
215     (concat "to."
216             (est-uri-encode-feature-name-body
217              (substring feature-name (match-end 0))))
218     )
219    ((string-match "^<-" feature-name)
220     (concat "from."
221             (est-uri-encode-feature-name-body
222              (substring feature-name (match-end 0))))
223     )
224    (t (est-uri-encode-feature-name-body feature-name))))
225
226
227 (defvar chise-turtle-ccs-prefix-alist nil)
228
229 (defun chise-turtle-uri-decode-feature-name (uri-feature)
230   (cond ((string= "a.ucs" uri-feature)
231          '=ucs)
232         ((string= "a.big5" uri-feature)
233          '=big5)
234         (t
235          (www-uri-decode-feature-name uri-feature))))
236
237 (defun chise-turtle-uri-encode-ccs-name (feature-name)
238   (cond
239    ((eq '=ucs feature-name)
240     "a.ucs")
241    ((eq '=big5 feature-name)
242     "a.big5")
243    ((eq '==>ucs@bucs feature-name)
244     "bucs")
245    (t
246     (mapconcat (lambda (c)
247                  (cond
248                   ((eq c ?@)
249                    "_")
250                   ((eq c ?+)
251                    "._.")
252                   ((eq c ?=)
253                    ".:.")
254                   ((eq c ?|)
255                    "._cmp_.")
256                   (t
257                    (char-to-string c))))
258                (www-uri-encode-feature-name feature-name)
259                ""))))
260
261 (defun charset-code-point-format-spec (ccs)
262   (cond ((memq ccs '(=ucs))
263          "0x%04X")
264         (t
265          (let ((ccs-name (symbol-name ccs)))
266            (cond
267             ((string-match
268               "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)"
269               ccs-name)
270              "%04d")
271             ((string-match
272               "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)"
273               ccs-name)
274              "%05d")
275             ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name)
276              "%06d")
277             ((string-match "hanyo-denshi/tk" ccs-name)
278              "%08d")
279             (t
280              "0x%X"))))))
281
282 (defun chise-turtle-format-ccs-code-point (ccs code-point)
283   (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs)))
284     (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist)
285       (setq chise-turtle-ccs-prefix-alist
286             (cons (cons ccs-uri ccs)
287                   chise-turtle-ccs-prefix-alist)))
288     (format "%s:%s"
289             ccs-uri
290             (format (charset-code-point-format-spec ccs)
291                     code-point))))
292
293 (defun chise-turtle-encode-char (object)
294   (let (spec cell dest
295         ccs ret ret2)
296     (if (setq ret (encode-char object '=ucs))
297         (chise-turtle-format-ccs-code-point '=ucs ret)
298       (setq spec (char-attribute-alist object))
299       (while (and spec
300                   (setq cell (pop spec)))
301         (if (and (find-charset (car cell))
302                  (setq ret (cdr cell)))
303             (setq dest (cons cell dest))))
304       (setq ret (car (sort dest (lambda (a b)
305                                   (char-attribute-name< (car a)(car b)))))
306             ccs (car ret)
307             ret (cdr ret))
308       (cond (ret
309              (chise-turtle-format-ccs-code-point ccs ret)
310              )
311             ((and (setq ccs (car (split-char object)))
312                   (setq ret (encode-char object ccs)))
313              (chise-turtle-format-ccs-code-point ccs ret)
314              )
315             ((setq ret (get-char-attribute object 'ideographic-combination))
316              (format "ideocomb:%s"
317                      (mapconcat (lambda (cell)
318                                   (cond ((characterp cell)
319                                          (char-to-string cell)
320                                          )
321                                         ((setq ret2 (find-char cell))
322                                          (char-to-string ret2)
323                                          )
324                                         (t
325                                          (format "%S" cell)
326                                          )))
327                                 ret ""))
328              )
329             (t
330              (format "system-char-id:0x%X"
331                      (encode-char object 'system-char-id))
332              )))))
333
334
335 ;;; @ end
336 ;;;
337
338 (provide 'chiset-common)
339
340 ;;; chiset-common.el ends here