(www-uri-encode-feature-name): Fix problem when feature-name is not
[chise/tomoyo-tools.git] / chiset-common.el
1 ;;; chiset-common.el --- CHISET common utility -*- coding: utf-8-er; -*-
2
3 ;; Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018,
4 ;;   2019, 2021 MORIOKA Tomohiko.
5
6 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
7 ;; Keywords: CHISE, RDF, Turtle, WWW
8
9 ;; This file is part of CHISET (CHISE/Turtle).
10
11 ;; XEmacs CHISE is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
15
16 ;; XEmacs CHISE is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
23 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Code:
27
28 (defun decode-uri-string (string &optional coding-system)
29   (if (> (length string) 0)
30       (let ((i 0)
31             dest)
32         (setq string
33               (mapconcat (lambda (char)
34                            (if (eq char ?+)
35                                " "
36                              (char-to-string char)))
37                          string ""))
38         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
39           (setq dest (concat dest
40                              (substring string i (match-beginning 0))
41                              (char-to-string
42                               (int-char
43                                (string-to-int (match-string 1 string) 16))))
44                 i (match-end 0)))
45         (decode-coding-string
46          (concat dest (substring string i))
47          coding-system))))
48
49
50 ;;; @ URI representation
51 ;;;
52
53 (defun est-uri-decode-feature-name-body (uri-feature)
54   (let ((len (length uri-feature))
55         (i 0)
56         ch dest)
57     (while (< i len)
58       (setq dest
59             (concat
60              dest
61              (if (eq (aref uri-feature i) ?\.)
62                  (if (and (< (+ i 2) len)
63                           (eq (aref uri-feature (+ i 2)) ?\.))
64                      (prog1
65                          (cond
66                           ((eq (setq ch (aref uri-feature (1+ i))) ?\.)
67                            "/")
68                           ((eq ch ?-)
69                            "*")
70                           ((eq ch ?_)
71                            "+")
72                           (t
73                            (substring uri-feature i (+ i 3))
74                            ))
75                        (setq i (+ i 3)))
76                    (setq i (1+ i))
77                    ".")
78                (prog1
79                    (char-to-string (aref uri-feature i))
80                  (setq i (1+ i)))))))
81     dest))
82
83 (defun est-uri-encode-feature-name-body (feature)
84   (mapconcat (lambda (c)
85                (cond ((eq c ?*)
86                       ".-.")
87                      ((eq c ?/)
88                       "...")
89                      ((eq c ?+)
90                       "._.")
91                      (t (char-to-string c))))
92              feature ""))
93
94 (defun est-uri-decode-feature-name (uri-feature)
95   (let (feature)
96     (setq uri-feature (decode-uri-string uri-feature 'utf-8-mcs-er))
97     (cond
98      ((string-match "^from\\." uri-feature)
99       (intern (format "<-%s"
100                       (est-uri-decode-feature-name-body
101                        (substring uri-feature (match-end 0)))))
102       )
103      ((string-match "^to\\." uri-feature)
104       (intern (format "->%s"
105                       (est-uri-decode-feature-name-body
106                        (substring uri-feature (match-end 0)))))
107       )
108      ((string-match "^meta\\." uri-feature)
109       (intern (format "*%s"
110                       (est-uri-decode-feature-name-body
111                        (substring uri-feature (match-end 0)))))
112       )
113      ((string-match "^rep\\." uri-feature)
114       (intern (format "=%s"
115                       (est-uri-decode-feature-name-body
116                        (substring uri-feature (match-end 0)))))
117       )
118      ((string-match "^rep[2i]\\." uri-feature)
119       (intern (format "===%s"
120                       (est-uri-decode-feature-name-body
121                        (substring uri-feature (match-end 0)))))
122       )
123      ((string-match "^g\\." uri-feature)
124       (intern (format "=>>%s"
125                       (est-uri-decode-feature-name-body
126                        (substring uri-feature (match-end 0)))))
127       )
128      ((string-match "^g[i2]\\." uri-feature)
129       (intern (format "==%s"
130                       (est-uri-decode-feature-name-body
131                        (substring uri-feature (match-end 0)))))
132       )
133      ((string-match "^gi\\([0-9]+\\)\\." uri-feature)
134       (intern (format "=>>%s%s"
135                       (make-string (string-to-int
136                                     (match-string 1 uri-feature))
137                                    ?>)
138                       (est-uri-decode-feature-name-body
139                        (substring uri-feature (match-end 0)))))
140       )
141      ((string-match "^o\\." uri-feature)
142       (intern (format "=+>%s"
143                       (est-uri-decode-feature-name-body
144                        (substring uri-feature (match-end 0)))))
145       )
146      ((string-match "^a\\." uri-feature)
147       (intern (format "=>%s"
148                       (est-uri-decode-feature-name-body
149                        (substring uri-feature (match-end 0)))))
150       )
151      ((string-match "^a\\([0-9]+\\)\\." uri-feature)
152       (intern (format "%s>%s"
153                       (make-string (string-to-int
154                                     (match-string 1 uri-feature))
155                                    ?=)
156                       (est-uri-decode-feature-name-body
157                        (substring uri-feature (match-end 0)))))
158       )
159      (t
160       (intern (est-uri-decode-feature-name-body uri-feature))
161       ))))
162
163 (defun www-uri-decode-feature-name (uri-feature)
164   (let (feature)
165     (setq uri-feature (decode-uri-string uri-feature 'utf-8-mcs-er))
166     (cond
167      ((string-match "^from\\." uri-feature)
168       (intern (format "<-%s"
169                       (est-uri-decode-feature-name-body
170                        (substring uri-feature (match-end 0)))))
171       )
172      ((string-match "^to\\." uri-feature)
173       (intern (format "->%s"
174                       (est-uri-decode-feature-name-body
175                        (substring uri-feature (match-end 0)))))
176       )
177      ((string-match "^meta\\." uri-feature)
178       (intern (format "*%s"
179                       (est-uri-decode-feature-name-body
180                        (substring uri-feature (match-end 0)))))
181       )
182      ((string-match "^rep\\." uri-feature)
183       (intern (format "=%s"
184                       (est-uri-decode-feature-name-body
185                        (substring uri-feature (match-end 0)))))
186       )
187      ((string-match "^rep[2i]\\." uri-feature)
188       (intern (format "===%s"
189                       (est-uri-decode-feature-name-body
190                        (substring uri-feature (match-end 0)))))
191       )
192      ((string-match "^g\\." uri-feature)
193       (intern (format "=>>%s"
194                       (est-uri-decode-feature-name-body
195                        (substring uri-feature (match-end 0)))))
196       )
197      ((string-match "^g[i2]\\." uri-feature)
198       (intern (format "==%s"
199                       (est-uri-decode-feature-name-body
200                        (substring uri-feature (match-end 0)))))
201       )
202      ((string-match "^gi\\([0-9]+\\)\\." uri-feature)
203       (intern (format "=>>%s%s"
204                       (make-string (string-to-int
205                                     (match-string 1 uri-feature))
206                                    ?>)
207                       (est-uri-decode-feature-name-body
208                        (substring uri-feature (match-end 0)))))
209       )
210      ((string-match "^o\\." uri-feature)
211       (intern (format "=+>%s"
212                       (est-uri-decode-feature-name-body
213                        (substring uri-feature (match-end 0)))))
214       )
215      ((string-match "^a\\." uri-feature)
216       (intern (format "=>%s"
217                       (est-uri-decode-feature-name-body
218                        (substring uri-feature (match-end 0)))))
219       )
220      ((string-match "^a\\([0-9]+\\)\\." uri-feature)
221       (intern (format "%s>%s"
222                       (make-string (string-to-int
223                                     (match-string 1 uri-feature))
224                                    ?=)
225                       (est-uri-decode-feature-name-body
226                        (substring uri-feature (match-end 0)))))
227       )
228      ((and (setq uri-feature (est-uri-decode-feature-name-body uri-feature))
229            (setq feature (intern (format "=>%s" uri-feature)))
230            (find-charset feature))
231       feature)
232      ((and (setq feature (intern (format "=>>%s" uri-feature)))
233            (find-charset feature))
234       feature)
235      ((and (setq feature (intern (format "=>>>%s" uri-feature)))
236            (find-charset feature))
237       feature)
238      ((and (setq feature (intern (format "=%s" uri-feature)))
239            (find-charset feature))
240       feature)
241      (t (intern uri-feature)))))
242
243 (defun www-uri-encode-feature-name (feature-name)
244   (setq feature-name (format "%s" feature-name))
245   (cond
246    ((string-match "^=\\+>\\([^=>]+\\)" feature-name)
247     (concat "o."
248             (est-uri-encode-feature-name-body
249              (substring feature-name (match-beginning 1))))
250     )
251    ((string-match "^=\\([^=>]+\\)" feature-name)
252     (concat "rep."
253             (est-uri-encode-feature-name-body
254              (substring feature-name (match-beginning 1))))
255     )
256    ((string-match "^==\\([^=>]+\\)" feature-name)
257     (concat "g2."
258             (est-uri-encode-feature-name-body
259              (substring feature-name (match-beginning 1))))
260     )
261    ((string-match "^===\\([^=>]+\\)" feature-name)
262     (concat "repi."
263             (est-uri-encode-feature-name-body
264              (substring feature-name (match-beginning 1))))
265     )
266    ((string-match "^=>>\\([^=>]+\\)" feature-name)
267     (concat "g."
268             (est-uri-encode-feature-name-body
269              (substring feature-name (match-beginning 1))))
270     )
271    ((string-match "^=>>>\\([^=>]+\\)" feature-name)
272     (concat "gi."
273             (est-uri-encode-feature-name-body
274              (substring feature-name (match-beginning 1))))
275     )
276    ((string-match "^=>>\\(>+\\)" feature-name)
277     (format "gi%d.%s"
278             (length (match-string 1 feature-name))
279             (est-uri-encode-feature-name-body
280              (substring feature-name (match-end 1))))
281     )
282    ((string-match "^=>\\([^=>]+\\)" feature-name)
283     (concat "a."
284             (est-uri-encode-feature-name-body
285              (substring feature-name (match-beginning 1))))
286     )
287    ((string-match "^\\(=+\\)>" feature-name)
288     (format "a%d.%s"
289             (length (match-string 1 feature-name))
290             (est-uri-encode-feature-name-body
291              (substring feature-name (match-end 0))))
292     )
293    ((string-match "^->" feature-name)
294     (concat "to."
295             (est-uri-encode-feature-name-body
296              (substring feature-name (match-end 0))))
297     )
298    ((string-match "^<-" feature-name)
299     (concat "from."
300             (est-uri-encode-feature-name-body
301              (substring feature-name (match-end 0))))
302     )
303    ((string-match "^\\*" feature-name)
304     (concat "meta."
305             (est-uri-encode-feature-name-body
306              (substring feature-name (match-end 0))))
307     )
308    (t (est-uri-encode-feature-name-body feature-name))))
309
310
311 (defvar chise-turtle-ccs-prefix-alist nil)
312
313 (defun chise-turtle-uri-decode-feature-name (uri-feature)
314   (cond ((string= "a.ucs" uri-feature)
315          '=ucs)
316         ((string= "a.big5" uri-feature)
317          '=big5)
318         (t
319          (www-uri-decode-feature-name uri-feature))))
320
321 (defun chise-turtle-uri-encode-ccs-name (feature-name)
322   (cond
323    ((eq '=ucs feature-name)
324     "a.ucs")
325    ((eq '=big5 feature-name)
326     "a.big5")
327    ((eq '==>ucs@bucs feature-name)
328     "bucs")
329    (t
330     (mapconcat (lambda (c)
331                  (cond
332                   ((eq c ?@)
333                    "_")
334                   ((eq c ?+)
335                    "._.")
336                   ((eq c ?=)
337                    ".:.")
338                   ((eq c ?|)
339                    "._cmp_.")
340                   (t
341                    (char-to-string c))))
342                (www-uri-encode-feature-name feature-name)
343                ""))))
344
345 (defun charset-code-point-format-spec (ccs)
346   (cond ((memq ccs '(=ucs))
347          "0x%04X")
348         (t
349          (let ((ccs-name (symbol-name ccs)))
350            (cond
351             ((string-match
352               "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)"
353               ccs-name)
354              "%04d")
355             ((string-match
356               "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)"
357               ccs-name)
358              "%05d")
359             ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name)
360              "%06d")
361             ((string-match "hanyo-denshi/tk" ccs-name)
362              "%08d")
363             (t
364              "0x%X"))))))
365
366 (defun chise-turtle-format-ccs-code-point (ccs code-point)
367   (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs)))
368     (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist)
369       (setq chise-turtle-ccs-prefix-alist
370             (cons (cons ccs-uri ccs)
371                   chise-turtle-ccs-prefix-alist)))
372     (format "%s:%s"
373             ccs-uri
374             (format (charset-code-point-format-spec ccs)
375                     code-point))))
376
377 (defun chise-turtle-encode-char (object)
378   (let (spec cell dest
379         ccs ret ret2)
380     (if (setq ret (encode-char object '=ucs))
381         (chise-turtle-format-ccs-code-point '=ucs ret)
382       (setq spec (char-attribute-alist object))
383       (while (and spec
384                   (setq cell (pop spec)))
385         (if (and (find-charset (car cell))
386                  (setq ret (cdr cell)))
387             (setq dest (cons cell dest))))
388       (setq ret (car (sort dest (lambda (a b)
389                                   (char-attribute-name< (car a)(car b)))))
390             ccs (car ret)
391             ret (cdr ret))
392       (cond (ret
393              (chise-turtle-format-ccs-code-point ccs ret)
394              )
395             ((and (setq ccs (car (split-char object)))
396                   (setq ret (encode-char object ccs)))
397              (chise-turtle-format-ccs-code-point ccs ret)
398              )
399             ((setq ret (get-char-attribute object 'ideographic-combination))
400              (format "ideocomb:%s"
401                      (mapconcat (lambda (cell)
402                                   (cond ((characterp cell)
403                                          (char-to-string cell)
404                                          )
405                                         ((setq ret2 (find-char cell))
406                                          (char-to-string ret2)
407                                          )
408                                         (t
409                                          (format "%S" cell)
410                                          )))
411                                 ret ""))
412              )
413             (t
414              (format "system-char-id:0x%X"
415                      (encode-char object 'system-char-id))
416              )))))
417
418
419 ;;; @ end
420 ;;;
421
422 (provide 'chiset-common)
423
424 ;;; chiset-common.el ends here