(www-uri-decode-feature-name): Decode "meta.<name>" -> `*<name>'.
[chise/tomoyo-tools.git] / chiset-common.el
1 ;;; chiset-common.el --- CHISET common utility -*- coding: utf-8-er; -*-
2
3 ;; Copyright (C) 2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 MORIOKA Tomohiko.
4
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: CHISE, RDF, Turtle, WWW
7
8 ;; This file is part of CHISET (CHISE/Turtle).
9
10 ;; XEmacs CHISE is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
14
15 ;; XEmacs CHISE is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 ;; General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;;; Code:
26
27 (defun decode-uri-string (string &optional coding-system)
28   (if (> (length string) 0)
29       (let ((i 0)
30             dest)
31         (setq string
32               (mapconcat (lambda (char)
33                            (if (eq char ?+)
34                                " "
35                              (char-to-string char)))
36                          string ""))
37         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
38           (setq dest (concat dest
39                              (substring string i (match-beginning 0))
40                              (char-to-string
41                               (int-char
42                                (string-to-int (match-string 1 string) 16))))
43                 i (match-end 0)))
44         (decode-coding-string
45          (concat dest (substring string i))
46          coding-system))))
47
48
49 ;;; @ URI representation
50 ;;;
51
52 (defun est-uri-decode-feature-name-body (uri-feature)
53   (let ((len (length uri-feature))
54         (i 0)
55         ch dest)
56     (while (< i len)
57       (setq dest
58             (concat
59              dest
60              (if (eq (aref uri-feature i) ?\.)
61                  (if (and (< (+ i 2) len)
62                           (eq (aref uri-feature (+ i 2)) ?\.))
63                      (prog1
64                          (cond
65                           ((eq (setq ch (aref uri-feature (1+ i))) ?\.)
66                            "/")
67                           ((eq ch ?-)
68                            "*")
69                           ((eq ch ?_)
70                            "+")
71                           (t
72                            (substring uri-feature i (+ i 3))
73                            ))
74                        (setq i (+ i 3)))
75                    (setq i (1+ i))
76                    ".")
77                (prog1
78                    (char-to-string (aref uri-feature i))
79                  (setq i (1+ i)))))))
80     dest))
81
82 (defun est-uri-encode-feature-name-body (feature)
83   (mapconcat (lambda (c)
84                (cond ((eq c ?*)
85                       ".-.")
86                      ((eq c ?/)
87                       "...")
88                      ((eq c ?+)
89                       "._.")
90                      (t (char-to-string c))))
91              feature ""))
92
93 (defun www-uri-decode-feature-name (uri-feature)
94   (let (feature)
95     (setq uri-feature (decode-uri-string uri-feature 'utf-8-mcs-er))
96     (cond
97      ((string-match "^from\\." uri-feature)
98       (intern (format "<-%s"
99                       (est-uri-decode-feature-name-body
100                        (substring uri-feature (match-end 0)))))
101       )
102      ((string-match "^to\\." uri-feature)
103       (intern (format "->%s"
104                       (est-uri-decode-feature-name-body
105                        (substring uri-feature (match-end 0)))))
106       )
107      ((string-match "^meta\\." uri-feature)
108       (intern (format "*%s"
109                       (est-uri-decode-feature-name-body
110                        (substring uri-feature (match-end 0)))))
111       )
112      ((string-match "^rep\\." uri-feature)
113       (intern (format "=%s"
114                       (est-uri-decode-feature-name-body
115                        (substring uri-feature (match-end 0)))))
116       )
117      ((string-match "^rep[2i]\\." uri-feature)
118       (intern (format "===%s"
119                       (est-uri-decode-feature-name-body
120                        (substring uri-feature (match-end 0)))))
121       )
122      ((string-match "^g\\." uri-feature)
123       (intern (format "=>>%s"
124                       (est-uri-decode-feature-name-body
125                        (substring uri-feature (match-end 0)))))
126       )
127      ((string-match "^g[i2]\\." uri-feature)
128       (intern (format "==%s"
129                       (est-uri-decode-feature-name-body
130                        (substring uri-feature (match-end 0)))))
131       )
132      ((string-match "^gi\\([0-9]+\\)\\." uri-feature)
133       (intern (format "=>>%s%s"
134                       (make-string (string-to-int
135                                     (match-string 1 uri-feature))
136                                    ?>)
137                       (est-uri-decode-feature-name-body
138                        (substring uri-feature (match-end 0)))))
139       )
140      ((string-match "^o\\." uri-feature)
141       (intern (format "=+>%s"
142                       (est-uri-decode-feature-name-body
143                        (substring uri-feature (match-end 0)))))
144       )
145      ((string-match "^a\\." uri-feature)
146       (intern (format "=>%s"
147                       (est-uri-decode-feature-name-body
148                        (substring uri-feature (match-end 0)))))
149       )
150      ((string-match "^a\\([0-9]+\\)\\." uri-feature)
151       (intern (format "%s>%s"
152                       (make-string (string-to-int
153                                     (match-string 1 uri-feature))
154                                    ?=)
155                       (est-uri-decode-feature-name-body
156                        (substring uri-feature (match-end 0)))))
157       )
158      ((and (setq uri-feature (est-uri-decode-feature-name-body uri-feature))
159            (setq feature (intern (format "=>%s" uri-feature)))
160            (find-charset feature))
161       feature)
162      ((and (setq feature (intern (format "=>>%s" uri-feature)))
163            (find-charset feature))
164       feature)
165      ((and (setq feature (intern (format "=>>>%s" uri-feature)))
166            (find-charset feature))
167       feature)
168      ((and (setq feature (intern (format "=%s" uri-feature)))
169            (find-charset feature))
170       feature)
171      (t (intern uri-feature)))))
172
173 (defun www-uri-encode-feature-name (feature-name)
174   (setq feature-name (symbol-name feature-name))
175   (cond
176    ((string-match "^=\\+>\\([^=>]+\\)" feature-name)
177     (concat "o."
178             (est-uri-encode-feature-name-body
179              (substring feature-name (match-beginning 1))))
180     )
181    ((string-match "^=\\([^=>]+\\)" feature-name)
182     (concat "rep."
183             (est-uri-encode-feature-name-body
184              (substring feature-name (match-beginning 1))))
185     )
186    ((string-match "^==\\([^=>]+\\)" feature-name)
187     (concat "g2."
188             (est-uri-encode-feature-name-body
189              (substring feature-name (match-beginning 1))))
190     )
191    ((string-match "^===\\([^=>]+\\)" feature-name)
192     (concat "repi."
193             (est-uri-encode-feature-name-body
194              (substring feature-name (match-beginning 1))))
195     )
196    ((string-match "^=>>\\([^=>]+\\)" feature-name)
197     (concat "g."
198             (est-uri-encode-feature-name-body
199              (substring feature-name (match-beginning 1))))
200     )
201    ((string-match "^=>>>\\([^=>]+\\)" feature-name)
202     (concat "gi."
203             (est-uri-encode-feature-name-body
204              (substring feature-name (match-beginning 1))))
205     )
206    ((string-match "^=>>\\(>+\\)" feature-name)
207     (format "gi%d.%s"
208             (length (match-string 1 feature-name))
209             (est-uri-encode-feature-name-body
210              (substring feature-name (match-end 1))))
211     )
212    ((string-match "^=>\\([^=>]+\\)" feature-name)
213     (concat "a."
214             (est-uri-encode-feature-name-body
215              (substring feature-name (match-beginning 1))))
216     )
217    ((string-match "^\\(=+\\)>" feature-name)
218     (format "a%d.%s"
219             (length (match-string 1 feature-name))
220             (est-uri-encode-feature-name-body
221              (substring feature-name (match-end 0))))
222     )
223    ((string-match "^->" feature-name)
224     (concat "to."
225             (est-uri-encode-feature-name-body
226              (substring feature-name (match-end 0))))
227     )
228    ((string-match "^<-" feature-name)
229     (concat "from."
230             (est-uri-encode-feature-name-body
231              (substring feature-name (match-end 0))))
232     )
233    ((string-match "^\\*" feature-name)
234     (concat "meta."
235             (est-uri-encode-feature-name-body
236              (substring feature-name (match-end 0))))
237     )
238    (t (est-uri-encode-feature-name-body feature-name))))
239
240
241 (defvar chise-turtle-ccs-prefix-alist nil)
242
243 (defun chise-turtle-uri-decode-feature-name (uri-feature)
244   (cond ((string= "a.ucs" uri-feature)
245          '=ucs)
246         ((string= "a.big5" uri-feature)
247          '=big5)
248         (t
249          (www-uri-decode-feature-name uri-feature))))
250
251 (defun chise-turtle-uri-encode-ccs-name (feature-name)
252   (cond
253    ((eq '=ucs feature-name)
254     "a.ucs")
255    ((eq '=big5 feature-name)
256     "a.big5")
257    ((eq '==>ucs@bucs feature-name)
258     "bucs")
259    (t
260     (mapconcat (lambda (c)
261                  (cond
262                   ((eq c ?@)
263                    "_")
264                   ((eq c ?+)
265                    "._.")
266                   ((eq c ?=)
267                    ".:.")
268                   ((eq c ?|)
269                    "._cmp_.")
270                   (t
271                    (char-to-string c))))
272                (www-uri-encode-feature-name feature-name)
273                ""))))
274
275 (defun charset-code-point-format-spec (ccs)
276   (cond ((memq ccs '(=ucs))
277          "0x%04X")
278         (t
279          (let ((ccs-name (symbol-name ccs)))
280            (cond
281             ((string-match
282               "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)"
283               ccs-name)
284              "%04d")
285             ((string-match
286               "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)"
287               ccs-name)
288              "%05d")
289             ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name)
290              "%06d")
291             ((string-match "hanyo-denshi/tk" ccs-name)
292              "%08d")
293             (t
294              "0x%X"))))))
295
296 (defun chise-turtle-format-ccs-code-point (ccs code-point)
297   (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs)))
298     (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist)
299       (setq chise-turtle-ccs-prefix-alist
300             (cons (cons ccs-uri ccs)
301                   chise-turtle-ccs-prefix-alist)))
302     (format "%s:%s"
303             ccs-uri
304             (format (charset-code-point-format-spec ccs)
305                     code-point))))
306
307 (defun chise-turtle-encode-char (object)
308   (let (spec cell dest
309         ccs ret ret2)
310     (if (setq ret (encode-char object '=ucs))
311         (chise-turtle-format-ccs-code-point '=ucs ret)
312       (setq spec (char-attribute-alist object))
313       (while (and spec
314                   (setq cell (pop spec)))
315         (if (and (find-charset (car cell))
316                  (setq ret (cdr cell)))
317             (setq dest (cons cell dest))))
318       (setq ret (car (sort dest (lambda (a b)
319                                   (char-attribute-name< (car a)(car b)))))
320             ccs (car ret)
321             ret (cdr ret))
322       (cond (ret
323              (chise-turtle-format-ccs-code-point ccs ret)
324              )
325             ((and (setq ccs (car (split-char object)))
326                   (setq ret (encode-char object ccs)))
327              (chise-turtle-format-ccs-code-point ccs ret)
328              )
329             ((setq ret (get-char-attribute object 'ideographic-combination))
330              (format "ideocomb:%s"
331                      (mapconcat (lambda (cell)
332                                   (cond ((characterp cell)
333                                          (char-to-string cell)
334                                          )
335                                         ((setq ret2 (find-char cell))
336                                          (char-to-string ret2)
337                                          )
338                                         (t
339                                          (format "%S" cell)
340                                          )))
341                                 ret ""))
342              )
343             (t
344              (format "system-char-id:0x%X"
345                      (encode-char object 'system-char-id))
346              )))))
347
348
349 ;;; @ end
350 ;;;
351
352 (provide 'chiset-common)
353
354 ;;; chiset-common.el ends here