Update Copyright header.
[chise/tomoyo-tools.git] / chiset-common.el
1 ;;; chiset-common.el --- CHISET common utility -*- coding: utf-8-er; -*-
2
3 ;; Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018,
4 ;;   2019, 2021 MORIOKA Tomohiko.
5
6 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
7 ;; Keywords: CHISE, RDF, Turtle, WWW
8
9 ;; This file is part of CHISET (CHISE/Turtle).
10
11 ;; XEmacs CHISE is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
15
16 ;; XEmacs CHISE is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
23 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Code:
27
28 (defun decode-uri-string (string &optional coding-system)
29   (if (> (length string) 0)
30       (let ((i 0)
31             dest)
32         (setq string
33               (mapconcat (lambda (char)
34                            (if (eq char ?+)
35                                " "
36                              (char-to-string char)))
37                          string ""))
38         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
39           (setq dest (concat dest
40                              (substring string i (match-beginning 0))
41                              (char-to-string
42                               (int-char
43                                (string-to-int (match-string 1 string) 16))))
44                 i (match-end 0)))
45         (decode-coding-string
46          (concat dest (substring string i))
47          coding-system))))
48
49
50 ;;; @ URI representation
51 ;;;
52
53 (defun est-uri-decode-feature-name-body (uri-feature)
54   (let ((len (length uri-feature))
55         (i 0)
56         ch dest)
57     (while (< i len)
58       (setq dest
59             (concat
60              dest
61              (if (eq (aref uri-feature i) ?\.)
62                  (if (and (< (+ i 2) len)
63                           (eq (aref uri-feature (+ i 2)) ?\.))
64                      (prog1
65                          (cond
66                           ((eq (setq ch (aref uri-feature (1+ i))) ?\.)
67                            "/")
68                           ((eq ch ?-)
69                            "*")
70                           ((eq ch ?_)
71                            "+")
72                           (t
73                            (substring uri-feature i (+ i 3))
74                            ))
75                        (setq i (+ i 3)))
76                    (setq i (1+ i))
77                    ".")
78                (prog1
79                    (char-to-string (aref uri-feature i))
80                  (setq i (1+ i)))))))
81     dest))
82
83 (defun est-uri-encode-feature-name-body (feature)
84   (mapconcat (lambda (c)
85                (cond ((eq c ?*)
86                       ".-.")
87                      ((eq c ?/)
88                       "...")
89                      ((eq c ?+)
90                       "._.")
91                      (t (char-to-string c))))
92              feature ""))
93
94 (defun www-uri-decode-feature-name (uri-feature)
95   (let (feature)
96     (setq uri-feature (decode-uri-string uri-feature 'utf-8-mcs-er))
97     (cond
98      ((string-match "^from\\." uri-feature)
99       (intern (format "<-%s"
100                       (est-uri-decode-feature-name-body
101                        (substring uri-feature (match-end 0)))))
102       )
103      ((string-match "^to\\." uri-feature)
104       (intern (format "->%s"
105                       (est-uri-decode-feature-name-body
106                        (substring uri-feature (match-end 0)))))
107       )
108      ((string-match "^meta\\." uri-feature)
109       (intern (format "*%s"
110                       (est-uri-decode-feature-name-body
111                        (substring uri-feature (match-end 0)))))
112       )
113      ((string-match "^rep\\." uri-feature)
114       (intern (format "=%s"
115                       (est-uri-decode-feature-name-body
116                        (substring uri-feature (match-end 0)))))
117       )
118      ((string-match "^rep[2i]\\." uri-feature)
119       (intern (format "===%s"
120                       (est-uri-decode-feature-name-body
121                        (substring uri-feature (match-end 0)))))
122       )
123      ((string-match "^g\\." uri-feature)
124       (intern (format "=>>%s"
125                       (est-uri-decode-feature-name-body
126                        (substring uri-feature (match-end 0)))))
127       )
128      ((string-match "^g[i2]\\." uri-feature)
129       (intern (format "==%s"
130                       (est-uri-decode-feature-name-body
131                        (substring uri-feature (match-end 0)))))
132       )
133      ((string-match "^gi\\([0-9]+\\)\\." uri-feature)
134       (intern (format "=>>%s%s"
135                       (make-string (string-to-int
136                                     (match-string 1 uri-feature))
137                                    ?>)
138                       (est-uri-decode-feature-name-body
139                        (substring uri-feature (match-end 0)))))
140       )
141      ((string-match "^o\\." uri-feature)
142       (intern (format "=+>%s"
143                       (est-uri-decode-feature-name-body
144                        (substring uri-feature (match-end 0)))))
145       )
146      ((string-match "^a\\." uri-feature)
147       (intern (format "=>%s"
148                       (est-uri-decode-feature-name-body
149                        (substring uri-feature (match-end 0)))))
150       )
151      ((string-match "^a\\([0-9]+\\)\\." uri-feature)
152       (intern (format "%s>%s"
153                       (make-string (string-to-int
154                                     (match-string 1 uri-feature))
155                                    ?=)
156                       (est-uri-decode-feature-name-body
157                        (substring uri-feature (match-end 0)))))
158       )
159      ((and (setq uri-feature (est-uri-decode-feature-name-body uri-feature))
160            (setq feature (intern (format "=>%s" uri-feature)))
161            (find-charset feature))
162       feature)
163      ((and (setq feature (intern (format "=>>%s" uri-feature)))
164            (find-charset feature))
165       feature)
166      ((and (setq feature (intern (format "=>>>%s" uri-feature)))
167            (find-charset feature))
168       feature)
169      ((and (setq feature (intern (format "=%s" uri-feature)))
170            (find-charset feature))
171       feature)
172      (t (intern uri-feature)))))
173
174 (defun www-uri-encode-feature-name (feature-name)
175   (setq feature-name (symbol-name feature-name))
176   (cond
177    ((string-match "^=\\+>\\([^=>]+\\)" feature-name)
178     (concat "o."
179             (est-uri-encode-feature-name-body
180              (substring feature-name (match-beginning 1))))
181     )
182    ((string-match "^=\\([^=>]+\\)" feature-name)
183     (concat "rep."
184             (est-uri-encode-feature-name-body
185              (substring feature-name (match-beginning 1))))
186     )
187    ((string-match "^==\\([^=>]+\\)" feature-name)
188     (concat "g2."
189             (est-uri-encode-feature-name-body
190              (substring feature-name (match-beginning 1))))
191     )
192    ((string-match "^===\\([^=>]+\\)" feature-name)
193     (concat "repi."
194             (est-uri-encode-feature-name-body
195              (substring feature-name (match-beginning 1))))
196     )
197    ((string-match "^=>>\\([^=>]+\\)" feature-name)
198     (concat "g."
199             (est-uri-encode-feature-name-body
200              (substring feature-name (match-beginning 1))))
201     )
202    ((string-match "^=>>>\\([^=>]+\\)" feature-name)
203     (concat "gi."
204             (est-uri-encode-feature-name-body
205              (substring feature-name (match-beginning 1))))
206     )
207    ((string-match "^=>>\\(>+\\)" feature-name)
208     (format "gi%d.%s"
209             (length (match-string 1 feature-name))
210             (est-uri-encode-feature-name-body
211              (substring feature-name (match-end 1))))
212     )
213    ((string-match "^=>\\([^=>]+\\)" feature-name)
214     (concat "a."
215             (est-uri-encode-feature-name-body
216              (substring feature-name (match-beginning 1))))
217     )
218    ((string-match "^\\(=+\\)>" feature-name)
219     (format "a%d.%s"
220             (length (match-string 1 feature-name))
221             (est-uri-encode-feature-name-body
222              (substring feature-name (match-end 0))))
223     )
224    ((string-match "^->" feature-name)
225     (concat "to."
226             (est-uri-encode-feature-name-body
227              (substring feature-name (match-end 0))))
228     )
229    ((string-match "^<-" feature-name)
230     (concat "from."
231             (est-uri-encode-feature-name-body
232              (substring feature-name (match-end 0))))
233     )
234    ((string-match "^\\*" feature-name)
235     (concat "meta."
236             (est-uri-encode-feature-name-body
237              (substring feature-name (match-end 0))))
238     )
239    (t (est-uri-encode-feature-name-body feature-name))))
240
241
242 (defvar chise-turtle-ccs-prefix-alist nil)
243
244 (defun chise-turtle-uri-decode-feature-name (uri-feature)
245   (cond ((string= "a.ucs" uri-feature)
246          '=ucs)
247         ((string= "a.big5" uri-feature)
248          '=big5)
249         (t
250          (www-uri-decode-feature-name uri-feature))))
251
252 (defun chise-turtle-uri-encode-ccs-name (feature-name)
253   (cond
254    ((eq '=ucs feature-name)
255     "a.ucs")
256    ((eq '=big5 feature-name)
257     "a.big5")
258    ((eq '==>ucs@bucs feature-name)
259     "bucs")
260    (t
261     (mapconcat (lambda (c)
262                  (cond
263                   ((eq c ?@)
264                    "_")
265                   ((eq c ?+)
266                    "._.")
267                   ((eq c ?=)
268                    ".:.")
269                   ((eq c ?|)
270                    "._cmp_.")
271                   (t
272                    (char-to-string c))))
273                (www-uri-encode-feature-name feature-name)
274                ""))))
275
276 (defun charset-code-point-format-spec (ccs)
277   (cond ((memq ccs '(=ucs))
278          "0x%04X")
279         (t
280          (let ((ccs-name (symbol-name ccs)))
281            (cond
282             ((string-match
283               "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)"
284               ccs-name)
285              "%04d")
286             ((string-match
287               "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)"
288               ccs-name)
289              "%05d")
290             ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name)
291              "%06d")
292             ((string-match "hanyo-denshi/tk" ccs-name)
293              "%08d")
294             (t
295              "0x%X"))))))
296
297 (defun chise-turtle-format-ccs-code-point (ccs code-point)
298   (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs)))
299     (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist)
300       (setq chise-turtle-ccs-prefix-alist
301             (cons (cons ccs-uri ccs)
302                   chise-turtle-ccs-prefix-alist)))
303     (format "%s:%s"
304             ccs-uri
305             (format (charset-code-point-format-spec ccs)
306                     code-point))))
307
308 (defun chise-turtle-encode-char (object)
309   (let (spec cell dest
310         ccs ret ret2)
311     (if (setq ret (encode-char object '=ucs))
312         (chise-turtle-format-ccs-code-point '=ucs ret)
313       (setq spec (char-attribute-alist object))
314       (while (and spec
315                   (setq cell (pop spec)))
316         (if (and (find-charset (car cell))
317                  (setq ret (cdr cell)))
318             (setq dest (cons cell dest))))
319       (setq ret (car (sort dest (lambda (a b)
320                                   (char-attribute-name< (car a)(car b)))))
321             ccs (car ret)
322             ret (cdr ret))
323       (cond (ret
324              (chise-turtle-format-ccs-code-point ccs ret)
325              )
326             ((and (setq ccs (car (split-char object)))
327                   (setq ret (encode-char object ccs)))
328              (chise-turtle-format-ccs-code-point ccs ret)
329              )
330             ((setq ret (get-char-attribute object 'ideographic-combination))
331              (format "ideocomb:%s"
332                      (mapconcat (lambda (cell)
333                                   (cond ((characterp cell)
334                                          (char-to-string cell)
335                                          )
336                                         ((setq ret2 (find-char cell))
337                                          (char-to-string ret2)
338                                          )
339                                         (t
340                                          (format "%S" cell)
341                                          )))
342                                 ret ""))
343              )
344             (t
345              (format "system-char-id:0x%X"
346                      (encode-char object 'system-char-id))
347              )))))
348
349
350 ;;; @ end
351 ;;;
352
353 (provide 'chiset-common)
354
355 ;;; chiset-common.el ends here