(www-uri-decode-char): Expect char-rep to be encoded as URI.
[chise/est.git] / cwiki-common.el
1 ;; -*- coding: utf-8-mcs-er -*-
2 (require 'char-db-util)
3
4 (defvar chise-wiki-view-url "view.cgi")
5 (defvar chise-wiki-edit-url "edit/edit.cgi")
6
7 (defvar chise-wiki-glyphs-url
8   "http://chise.zinbun.kyoto-u.ac.jp/glyphs/")
9
10 (defun decode-uri-string (string &optional coding-system)
11   (if (> (length string) 0)
12       (let ((i 0)
13             dest)
14         (setq string
15               (mapconcat (lambda (char)
16                            (if (eq char ?+)
17                                " "
18                              (char-to-string char)))
19                          string ""))
20         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
21           (setq dest (concat dest
22                              (substring string i (match-beginning 0))
23                              (char-to-string
24                               (int-char
25                                (string-to-int (match-string 1 string) 16))))
26                 i (match-end 0)))
27         (decode-coding-string
28          (concat dest (substring string i))
29          coding-system))))
30
31 (defun www-feature-type (feature-name)
32   (or (char-feature-property feature-name 'type)
33       (let ((str (symbol-name feature-name)))
34         (cond
35          ((string-match "^\\(->\\|<-\\)" str)
36           'relation)
37          ((string-match "^ideographic-structure\\(@\\|$\\)" str)
38           'structure)
39          ))))
40
41 (defun www-feature-value-format (feature-name)
42   (or (char-feature-property feature-name 'value-format)
43       (let ((type (www-feature-type feature-name)))
44         (cond ((eq type 'relation)
45                'space-separated-char-list)
46               ((eq type 'structure)
47                'space-separated-ids)))
48       (if (find-charset feature-name)
49           (if (and (= (charset-dimension feature-name) 2)
50                    (= (charset-chars feature-name) 94))
51               '("0x" (HEX)
52                 " (" (decimal) ") <" (ku-ten) ">")
53             '("0x" (HEX) " (" (decimal) ")")))))
54
55
56 ;;; @ URI representation
57 ;;;
58
59 (defun www-uri-decode-feature-name (uri-feature)
60   (let (feature)
61     (cond
62      ((string-match "^from\\." uri-feature)
63       (intern (format "<-%s" (substring uri-feature (match-end 0))))
64       )
65      ((string-match "^to\\." uri-feature)
66       (intern (format "->%s" (substring uri-feature (match-end 0))))
67       )
68      ((string-match "^rep\\." uri-feature)
69       (intern (format "=%s" (substring uri-feature (match-end 0))))
70       )
71      ((string-match "^g\\." uri-feature)
72       (intern (format "=>>%s" (substring uri-feature (match-end 0))))
73       )
74      ((string-match "^gi\\." uri-feature)
75       (intern (format "=>>>%s" (substring uri-feature (match-end 0))))
76       )
77      ((string-match "^gi\\([0-9]+\\)\\." uri-feature)
78       (intern (format "=>>%s%s"
79                       (make-string (string-to-int
80                                     (match-string 1 uri-feature))
81                                    ?>)
82                       (substring uri-feature (match-end 0))))
83       )
84      ((string-match "^a\\." uri-feature)
85       (intern (format "=>%s" (substring uri-feature (match-end 0))))
86       )
87      ((string-match "^a\\([0-9]+\\)\\." uri-feature)
88       (intern (format "%s>%s"
89                       (make-string (string-to-int
90                                     (match-string 1 uri-feature))
91                                    ?=)
92                       (substring uri-feature (match-end 0))))
93       )
94      ((and (setq feature (intern (format "=>%s" uri-feature)))
95            (find-charset feature))
96       feature)
97      ((and (setq feature (intern (format "=>>%s" uri-feature)))
98            (find-charset feature))
99       feature)
100      ((and (setq feature (intern (format "=>>>%s" uri-feature)))
101            (find-charset feature))
102       feature)
103      ((and (setq feature (intern (format "=%s" uri-feature)))
104            (find-charset feature))
105       feature)
106      (t (intern uri-feature)))))
107
108 (defun www-uri-encode-feature-name (feature-name)
109   (setq feature-name (symbol-name feature-name))
110   (cond
111    ((string-match "^=\\([^=>]+\\)" feature-name)
112     (concat "rep." (substring feature-name (match-beginning 1)))
113     )
114    ((string-match "^=>>\\([^=>]+\\)" feature-name)
115     (concat "g." (substring feature-name (match-beginning 1)))
116     )
117    ((string-match "^=>>>\\([^=>]+\\)" feature-name)
118     (concat "gi." (substring feature-name (match-beginning 1)))
119     )
120    ((string-match "^=>>\\(>+\\)" feature-name)
121     (format "gi%d.%s"
122             (length (match-string 1 feature-name))
123             (substring feature-name (match-end 1)))
124     )
125    ((string-match "^=>\\([^=>]+\\)" feature-name)
126     (concat "a." (substring feature-name (match-beginning 1)))
127     )
128    ((string-match "^\\(=+\\)>" feature-name)
129     (format "a%d.%s"
130             (length (match-string 1 feature-name))
131             (substring feature-name (match-end 0)))
132     )
133    ((string-match "^->" feature-name)
134     (concat "to." (substring feature-name (match-end 0)))
135     )
136    ((string-match "^<-" feature-name)
137     (concat "from." (substring feature-name (match-end 0)))
138     )
139    (t feature-name)))
140
141 (defun www-uri-decode-char (char-rep)
142   (let (ccs cpos)
143     (cond
144      ((string-match "%3A" char-rep)
145       (setq ccs (substring char-rep 0 (match-beginning 0))
146             cpos (substring char-rep (match-end 0)))
147       (setq ccs (www-uri-decode-feature-name ccs))
148       (cond
149        ((string-match "^0x" cpos)
150         (setq cpos
151               (string-to-number (substring cpos (match-end 0)) 16))
152         )
153        (t
154         (setq cpos (string-to-number cpos))
155         ))
156       (if (numberp cpos)
157           (decode-char ccs cpos))
158       )
159      (t
160       (setq char-rep (decode-uri-string char-rep 'utf-8-mcs-er))
161       (when (= (length char-rep) 1)
162         (aref char-rep 0))
163       ))))
164
165 (defun www-uri-encode-char (char)
166   (if (encode-char char '=ucs)
167       (mapconcat
168        (lambda (byte)
169          (format "%%%02X" byte))
170        (encode-coding-string (char-to-string char) 'utf-8-mcs-er)
171        "")
172     (let ((ccs-list '(; =ucs
173                       =cns11643-1 =cns11643-2 =cns11643-3
174                       =cns11643-4 =cns11643-5 =cns11643-6 =cns11643-7
175                       =gb2312 =gb12345
176                       =jis-x0208 =jis-x0208@1990
177                       =jis-x0212
178                       =cbeta =jef-china3
179                       =jis-x0213-1@2000 =jis-x0213-1@2004
180                       =jis-x0208@1983 =jis-x0208@1978
181                       =zinbun-oracle
182                       =daikanwa
183                       =gt =gt-k
184                       =big5
185                       =big5-cdp
186                       =>>jis-x0208 =>>jis-x0213-1
187                       =>jis-x0208 =>jis-x0213-1))
188           ccs ret)
189       (while (and ccs-list
190                   (setq ccs (pop ccs-list))
191                   (not (setq ret (encode-char char ccs 'defined-only)))))
192       (cond (ret
193              (format "%s:0x%X"
194                      (www-uri-encode-feature-name ccs)
195                      ret))
196             ((setq ccs (car (split-char char)))
197              (format "%s:0x%X"
198                      (www-uri-encode-feature-name ccs)
199                      (encode-char char ccs)))))))
200
201
202 ;;; @ Feature name presentation
203 ;;;
204
205 (defun www-format-feature-name-default (feature-name)
206   (mapconcat
207    #'capitalize
208    (split-string
209     (symbol-name feature-name)
210     "-")
211    " "))
212
213 (defun www-format-feature-name-as-rel-to (feature-name)
214   (concat "\u2192" (substring (symbol-name feature-name) 2)))
215
216 (defun www-format-feature-name-as-rel-from (feature-name)
217   (concat "\u2190" (substring (symbol-name feature-name) 2)))
218
219 (defun www-format-feature-name-as-CCS (feature-name)
220   (let* ((rest
221           (split-string
222            (symbol-name feature-name)
223            "-"))
224          (dest (upcase (pop rest))))
225     (when (string-match "^=+>*" dest)
226       (setq dest (concat (substring dest 0 (match-end 0))
227                          " "
228                          (substring dest (match-end 0)))))
229     (cond
230      (rest
231       (while (cdr rest)
232         (setq dest (concat dest " " (upcase (pop rest)))))
233       (if (string-match "^[0-9]+$" (car rest))
234           (concat dest "-" (car rest))
235         (concat dest " " (upcase (car rest))))
236       )
237      (t dest))))
238
239 (defun www-format-feature-name (feature-name &optional lang)
240   (let (name)
241     (www-format-encode-string
242      (cond
243       ((or (and lang
244                 (char-feature-property
245                  feature-name
246                  (intern (format "name@%s" lang))))
247            (char-feature-property
248             feature-name 'name)))
249       ((find-charset feature-name)
250        (www-format-feature-name-as-CCS feature-name))
251       ((and (setq name (symbol-name feature-name))
252             (string-match "^\\(->\\)" name))
253        (www-format-feature-name-as-rel-to feature-name))
254       ((string-match "^\\(<-\\)" name)
255        (www-format-feature-name-as-rel-from feature-name))
256       (t
257        (www-format-feature-name-default feature-name))))))
258
259
260 ;;; @ Feature value presentation
261 ;;;
262
263 (defun www-format-value-as-kuten (value)
264   (format "%02d-%02d"
265           (- (lsh value -8) 32)
266           (- (logand value 255) 32)))
267
268 (defun www-format-value-as-char-list (value &optional without-tags)
269   (if (listp value)
270       (mapconcat
271        (if without-tags
272            (lambda (unit)
273              (www-format-encode-string
274               (format (if (characterp unit)
275                           "%c"
276                         "%s")
277                       unit)
278               'without-tags))
279          (lambda (unit)
280            (if (characterp unit)
281                (format "<a href=\"%s?char=%s\">%s</a>"
282                        chise-wiki-view-url
283                        (www-uri-encode-char unit)
284                        (www-format-encode-string (char-to-string unit)))
285              (www-format-encode-string (format "%s" unit)))))
286        value " ")
287     (www-format-encode-string (format "%s" value) without-tags)))
288
289 (defun www-format-value-as-ids (value &optional without-tags)
290   (if (listp value)
291       (mapconcat
292        (if without-tags
293            (lambda (unit)
294              (www-format-encode-string
295               (format (if (characterp unit)
296                           "%c"
297                         "%s")
298                       unit)
299               'without-tags))
300          (lambda (unit)
301            (if (characterp unit)
302                (format "<a href=\"%s?char=%s\">%s</a>"
303                        chise-wiki-view-url
304                        (www-uri-encode-char unit)
305                        (www-format-encode-string (char-to-string unit)))
306              (www-format-encode-string (format "%s" unit)))))
307        (ideographic-structure-to-ids value) " ")
308     (www-format-encode-string (format "%s" value) without-tags)))
309
310 (defun www-format-value-as-S-exp (value &optional without-tags)
311   (www-format-encode-string (format "%S" value) without-tags))
312
313 (defun www-format-value-as-HEX (value)
314   (if (integerp value)
315       (format "%X" value)
316     (www-format-value-as-S-exp value)))
317
318 (defun www-format-value-as-CCS-default (value)
319   (if (integerp value)
320       (format "0x%s (%d)"
321               (www-format-value-as-HEX value)
322               value)
323     (www-format-value-as-S-exp value)))
324
325 (defun www-format-value-as-CCS-94x94 (value)
326   (if (integerp value)
327       (format "0x%s [%s] (%d)"
328               (www-format-value-as-HEX value)
329               (www-format-value-as-kuten value)
330               value)
331     (www-format-value-as-S-exp value)))
332
333 (defun www-format-value (value &optional feature-name format without-tags)
334   ;; (cond
335   ;;  ((find-charset feature-name)
336   ;;   (cond
337   ;;    ((and (= (charset-chars feature-name) 94)
338   ;;          (= (charset-dimension feature-name) 2))
339   ;;     (www-format-value-as-CCS-94x94 value))
340   ;;    (t
341   ;;     (www-format-value-as-CCS-default value)))
342   ;;   )
343   ;;  (t
344   ;;   (www-format-value-as-S-exp value)))
345   (www-format-apply-value format nil value nil nil without-tags)
346   )
347
348
349 ;;; @ format evaluator
350 ;;;
351
352 (defun www-format-encode-string (string &optional without-tags)
353   (with-temp-buffer
354     (insert string)
355     (let (plane code)
356       (goto-char (point-min))
357       (while (search-forward "<" nil t)
358         (replace-match "&lt;" nil t))
359       (goto-char (point-min))
360       (while (search-forward ">" nil t)
361         (replace-match "&gt;" nil t))
362       (if without-tags
363           (encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
364         (let ((coded-charset-entity-reference-alist
365                (list*
366                 '(=cns11643-1           "C1-" 4 X)
367                 '(=cns11643-2           "C2-" 4 X)
368                 '(=cns11643-3           "C3-" 4 X)
369                 '(=cns11643-4           "C4-" 4 X)
370                 '(=cns11643-5           "C5-" 4 X)
371                 '(=cns11643-6           "C6-" 4 X)
372                 '(=cns11643-7           "C7-" 4 X)
373                 '(=gb2312               "G0-" 4 X)
374                 '(=gb12345              "G1-" 4 X)
375                 '(=jis-x0208@1990       "J90-" 4 X)
376                 '(=jis-x0212            "JSP-" 4 X)
377                 '(=cbeta                "CB" 5 d)
378                 '(=jef-china3           "JC3-" 4 X)
379                 '(=jis-x0208@1997       "J97-" 4 X)
380                 '(=jis-x0208@1978       "J78-" 4 X)
381                 '(=jis-x0208@1983       "J83-" 4 X)
382                 '(=zinbun-oracle        "ZOB-" 4 d)
383                 '(=daikanwa             "M-" 5 d)
384                 coded-charset-entity-reference-alist)))
385           (encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
386
387           (goto-char (point-min))
388           (while (re-search-forward "&CB\\([0-9]+\\);" nil t)
389             (setq code (string-to-int (match-string 1)))
390             (replace-match
391              (format "<img alt=\"CB%05d\" src=\"%s/cb-gaiji/%02d/CB%05d.gif\">"
392                      code
393                      chise-wiki-glyphs-url
394                      (/ code 1000) code)
395              t 'literal))
396
397           (goto-char (point-min))
398           (while (re-search-forward "&J\\(78\\|83\\|90\\|97\\|SP\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
399             (setq plane (match-string 1)
400                   code (string-to-int (match-string 2) 16))
401             (replace-match
402              (format "<img alt=\"J%s-%04X\" src=\"%s/JIS-%s/%02d-%02d.gif\">"
403                      plane code
404                      chise-wiki-glyphs-url
405                      plane
406                      (- (lsh code -8) 32)
407                      (- (logand code 255) 32))
408              t 'literal))
409
410           (goto-char (point-min))
411           (while (re-search-forward "&G\\([01]\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
412             (setq plane (string-to-int (match-string 1))
413                   code (string-to-int (match-string 2) 16))
414             (replace-match
415              (format "<img alt=\"GB%d-%04X\" src=\"%s/GB%d/%02d-%02d.gif\">"
416                      plane code
417                      chise-wiki-glyphs-url
418                      plane
419                      (- (lsh code -8) 32)
420                      (- (logand code 255) 32))
421              t 'literal))
422
423           (goto-char (point-min))
424           (while (re-search-forward "&C\\([1-7]\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
425             (setq plane (string-to-int (match-string 1))
426                   code (string-to-int (match-string 2) 16))
427             (replace-match
428              (format "<img alt=\"CNS%d-%04X\" src=\"%s/CNS%d/%04X.gif\">"
429                      plane code
430                      chise-wiki-glyphs-url
431                      plane code)
432              t 'literal))
433           ))
434       (goto-char (point-min))
435       (while (search-forward "&GT-" nil t)
436         (replace-match "&amp;GT-" t 'literal))
437
438       (buffer-string))))
439
440 (defun www-format-props-to-string (props &optional format)
441   (unless format
442     (setq format (plist-get props :format)))
443   (concat "%"
444           (plist-get props :flag)
445           (if (plist-get props :zero-padding)
446               "0")
447           (if (plist-get props :len)
448               (format "%d" (plist-get props :len)))
449           (cond
450            ((eq format 'decimal) "d")
451            ((eq format 'hex) "x")
452            ((eq format 'HEX) "X")
453            ((eq format 'S-exp) "S")
454            (t "s"))))      
455
456 (defun www-format-apply-value (format props value
457                                       &optional uri-char uri-feature
458                                       without-tags)
459   (let (ret)
460     (setq ret
461           (cond
462            ((memq format '(decimal hex HEX))
463             (if (integerp value)
464                 (format (www-format-props-to-string props format)
465                         value)
466               (www-format-encode-string
467                (format "%s" value)
468                without-tags))
469             )
470            ((eq format 'S-exp)
471             (www-format-encode-string
472              (format (www-format-props-to-string props format)
473                      value)
474              without-tags))
475            ((eq format 'ku-ten)
476             (www-format-value-as-kuten value))
477            ((eq format 'space-separated-char-list)
478             (www-format-value-as-char-list value without-tags))
479            ((eq format 'space-separated-ids)
480             (www-format-value-as-ids value without-tags))
481            (t
482             (setq format 'default)
483             (www-format-encode-string
484              (format (www-format-props-to-string props 'default)
485                      value)
486              without-tags))))
487     (if (or without-tags (eq (plist-get props :mode) 'peek))
488         ret
489       (format "%s <a href=\"%s?char=%s&feature=%s&format=%s\"
490 ><input type=\"submit\" value=\"edit\" /></a>"
491               ret
492               chise-wiki-edit-url
493               uri-char uri-feature format))))
494
495 (defun www-format-eval-feature-value (char
496                                       feature-name
497                                       &optional format lang uri-char value)
498   (unless value
499     (setq value (char-feature char feature-name)))
500   (unless format
501     (setq format (www-feature-value-format feature-name)))
502   (cond
503    ((symbolp format)
504     (www-format-apply-value
505      format nil value
506      uri-char (www-uri-encode-feature-name feature-name))
507     )
508    ((consp format)
509     (cond ((null (cdr format))
510            (setq format (car format))
511            (www-format-apply-value
512             (car format) (nth 1 format) value
513             uri-char (www-uri-encode-feature-name feature-name))
514            )
515           (t
516            (www-format-eval-list format char feature-name lang uri-char)
517            )))))
518
519 (defun www-format-eval-unit (exp char feature-name
520                                  &optional lang uri-char value)
521   (unless value
522     (setq value (char-feature char feature-name)))
523   (unless uri-char
524     (setq uri-char (www-uri-encode-char char)))
525   (cond
526    ((stringp exp) (www-format-encode-string exp))
527    ((null exp) "")
528    ((consp exp)
529     (cond
530      ((memq (car exp) '(value decimal hex HEX ku-ten S-exp default))
531       (if (eq (car exp) 'value)
532           (www-format-eval-feature-value char feature-name
533                                          (plist-get (nth 1 exp) :format)
534                                          lang uri-char value)
535         (www-format-apply-value
536          (car exp) (nth 1 exp) value
537          uri-char (www-uri-encode-feature-name feature-name)))
538       )
539      ((eq (car exp) 'name)
540       (format "<a href=\"%s?feature=%s&char=%s\">%s</a>"
541               chise-wiki-view-url
542               (www-uri-encode-feature-name feature-name)
543               uri-char
544               (www-format-feature-name feature-name lang))
545       )
546      ((eq (car exp) 'link)
547       (format "<a
548  href=\"%s\"
549 >%s</a
550 >"
551               (www-format-eval-list (plist-get (nth 1 exp) :ref)
552                                     char feature-name lang uri-char)
553               (www-format-eval-list (nthcdr 2 exp)
554                                     char feature-name lang uri-char)))
555      (t
556       (format "<%s
557 >%s</%s
558 >"
559               (car exp)
560               (www-format-eval-list (nthcdr 2 exp) char feature-name
561                                     lang uri-char)
562               (car exp)))))))
563
564 (defun www-format-eval-list (format-list char feature-name
565                                          &optional lang uri-char)
566   (if (consp format-list)
567       (mapconcat
568        (lambda (exp)
569          (www-format-eval-unit exp char feature-name lang uri-char))
570        format-list "")
571     (www-format-eval-unit format-list char feature-name lang uri-char)))
572
573
574 ;;; @ HTML generator
575 ;;;
576
577 (defun www-html-display-text (text)
578   (princ
579    (with-temp-buffer
580      (insert text)
581      (goto-char (point-min))
582      (while (search-forward "<" nil t)
583        (replace-match "&lt;" nil t))
584      (goto-char (point-min))
585      (while (search-forward ">" nil t)
586        (replace-match "&gt;" nil t))
587      (goto-char (point-min))
588      (while (re-search-forward "\\[\\[\\([^]|[]+\\)|\\([^][]+\\)\\]\\]" nil t)
589        (replace-match
590         (format "<a href=\"%s\">%s</a>"
591                 (match-string 2)
592                 (match-string 1))
593         nil t))
594      (encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
595      (goto-char (point-min))
596      (while (search-forward "&GT-" nil t)
597        (replace-match "&amp;GT-" nil t))
598      (buffer-string))))
599
600 (defun www-html-display-paragraph (text)
601   (princ "<p>")
602   (www-html-display-text text)
603   (princ "</p>\n"))
604
605 (provide 'cwiki-common)