(www-uri-decode-char):
[chise/est.git] / cwiki-common.el
1 ;; -*- coding: utf-8-mcs-er -*-
2 (require 'char-db-util)
3
4 (defvar chise-wiki-view-url "view.cgi")
5 (defvar chise-wiki-edit-url "edit/edit.cgi")
6
7 (defvar chise-wiki-glyphs-url
8   "http://chise.zinbun.kyoto-u.ac.jp/glyphs/")
9
10 (defun decode-uri-string (string &optional coding-system)
11   (if (> (length string) 0)
12       (let ((i 0)
13             dest)
14         (setq string
15               (mapconcat (lambda (char)
16                            (if (eq char ?+)
17                                " "
18                              (char-to-string char)))
19                          string ""))
20         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
21           (setq dest (concat dest
22                              (substring string i (match-beginning 0))
23                              (char-to-string
24                               (int-char
25                                (string-to-int (match-string 1 string) 16))))
26                 i (match-end 0)))
27         (decode-coding-string
28          (concat dest (substring string i))
29          coding-system))))
30
31 (defun www-feature-type (feature-name)
32   (or (char-feature-property feature-name 'type)
33       (let ((str (symbol-name feature-name)))
34         (cond
35          ((string-match "^\\(->\\|<-\\)" str)
36           'relation)
37          ((string-match "^ideographic-structure\\(@\\|$\\)" str)
38           'structure)
39          ))))
40
41 (defun www-feature-value-format (feature-name)
42   (or (char-feature-property feature-name 'value-format)
43       (let ((type (www-feature-type feature-name)))
44         (cond ((eq type 'relation)
45                'space-separated-char-list)
46               ((eq type 'structure)
47                'space-separated-ids)))
48       (if (find-charset feature-name)
49           (if (and (= (charset-dimension feature-name) 2)
50                    (= (charset-chars feature-name) 94))
51               '("0x" (HEX)
52                 " (" (decimal) ") <" (ku-ten) ">")
53             '("0x" (HEX) " (" (decimal) ")")))))
54
55
56 ;;; @ URI representation
57 ;;;
58
59 (defun www-uri-decode-feature-name (uri-feature)
60   (let (feature)
61     (cond
62      ((string-match "^from\\." uri-feature)
63       (intern (format "<-%s" (substring uri-feature (match-end 0))))
64       )
65      ((string-match "^to\\." uri-feature)
66       (intern (format "->%s" (substring uri-feature (match-end 0))))
67       )
68      ((string-match "^rep\\." uri-feature)
69       (intern (format "=%s" (substring uri-feature (match-end 0))))
70       )
71      ((string-match "^g\\." uri-feature)
72       (intern (format "=>>%s" (substring uri-feature (match-end 0))))
73       )
74      ((string-match "^gi\\." uri-feature)
75       (intern (format "=>>>%s" (substring uri-feature (match-end 0))))
76       )
77      ((string-match "^gi\\([0-9]+\\)\\." uri-feature)
78       (intern (format "=>>%s%s"
79                       (make-string (string-to-int
80                                     (match-string 1 uri-feature))
81                                    ?>)
82                       (substring uri-feature (match-end 0))))
83       )
84      ((string-match "^a\\." uri-feature)
85       (intern (format "=>%s" (substring uri-feature (match-end 0))))
86       )
87      ((string-match "^a\\([0-9]+\\)\\." uri-feature)
88       (intern (format "%s>%s"
89                       (make-string (string-to-int
90                                     (match-string 1 uri-feature))
91                                    ?=)
92                       (substring uri-feature (match-end 0))))
93       )
94      ((and (setq feature (intern (format "=>%s" uri-feature)))
95            (find-charset feature))
96       feature)
97      ((and (setq feature (intern (format "=>>%s" uri-feature)))
98            (find-charset feature))
99       feature)
100      ((and (setq feature (intern (format "=>>>%s" uri-feature)))
101            (find-charset feature))
102       feature)
103      ((and (setq feature (intern (format "=%s" uri-feature)))
104            (find-charset feature))
105       feature)
106      (t (intern uri-feature)))))
107
108 (defun www-uri-encode-feature-name (feature-name)
109   (setq feature-name (symbol-name feature-name))
110   (cond
111    ((string-match "^=\\([^=>]+\\)" feature-name)
112     (concat "rep." (substring feature-name (match-beginning 1)))
113     )
114    ((string-match "^=>>\\([^=>]+\\)" feature-name)
115     (concat "g." (substring feature-name (match-beginning 1)))
116     )
117    ((string-match "^=>>>\\([^=>]+\\)" feature-name)
118     (concat "gi." (substring feature-name (match-beginning 1)))
119     )
120    ((string-match "^=>>\\(>+\\)" feature-name)
121     (format "gi%d.%s"
122             (length (match-string 1 feature-name))
123             (substring feature-name (match-end 1)))
124     )
125    ((string-match "^=>\\([^=>]+\\)" feature-name)
126     (concat "a." (substring feature-name (match-beginning 1)))
127     )
128    ((string-match "^\\(=+\\)>" feature-name)
129     (format "a%d.%s"
130             (length (match-string 1 feature-name))
131             (substring feature-name (match-end 0)))
132     )
133    ((string-match "^->" feature-name)
134     (concat "to." (substring feature-name (match-end 0)))
135     )
136    ((string-match "^<-" feature-name)
137     (concat "from." (substring feature-name (match-end 0)))
138     )
139    (t feature-name)))
140
141 (defun www-uri-decode-char (char-rep)
142   (let (ccs cpos)
143     (cond
144      ((string-match "\\(%3A\\|:\\)" char-rep)
145       (setq ccs (substring char-rep 0 (match-beginning 0))
146             cpos (substring char-rep (match-end 0)))
147       (setq ccs (www-uri-decode-feature-name ccs))
148       (cond
149        ((string-match "^0x" cpos)
150         (setq cpos
151               (string-to-number (substring cpos (match-end 0)) 16))
152         )
153        (t
154         (setq cpos (string-to-number cpos))
155         ))
156       (if (numberp cpos)
157           (decode-char ccs cpos))
158       )
159      (t
160       (setq char-rep (decode-uri-string char-rep 'utf-8-mcs-er))
161       (when (= (length char-rep) 1)
162         (aref char-rep 0))
163       ))))
164
165 (defun www-uri-encode-char (char)
166   (if (encode-char char '=ucs)
167       (mapconcat
168        (lambda (byte)
169          (format "%%%02X" byte))
170        (encode-coding-string (char-to-string char) 'utf-8-mcs-er)
171        "")
172     (let ((ccs-list '(; =ucs
173                       =cns11643-1 =cns11643-2 =cns11643-3
174                       =cns11643-4 =cns11643-5 =cns11643-6 =cns11643-7
175                       =gb2312 =gb12345
176                       =jis-x0208 =jis-x0208@1990
177                       =jis-x0212
178                       =cbeta =jef-china3
179                       =jis-x0213-1@2000 =jis-x0213-1@2004
180                       =jis-x0208@1983 =jis-x0208@1978
181                       =zinbun-oracle
182                       =daikanwa
183                       =gt =gt-k
184                       =>>jis-x0208 =>>jis-x0213-1
185                       =>jis-x0208 =>jis-x0213-1
186                       =big5
187                       =big5-cdp))
188           ccs ret)
189       (while (and ccs-list
190                   (setq ccs (pop ccs-list))
191                   (not (setq ret (encode-char char ccs 'defined-only)))))
192       (cond (ret
193              (format "%s:0x%X"
194                      (www-uri-encode-feature-name ccs)
195                      ret))
196             ((and (setq ccs (car (split-char char)))
197                   (setq ret (encode-char char ccs)))
198              (format "%s:0x%X"
199                      (www-uri-encode-feature-name ccs)
200                      ret))
201             (t
202              (format "system-char-id:0x%X"
203                      (encode-char char 'system-char-id))
204              )))))
205
206
207 ;;; @ Feature name presentation
208 ;;;
209
210 (defun www-format-feature-name-default (feature-name)
211   (mapconcat
212    #'capitalize
213    (split-string
214     (symbol-name feature-name)
215     "-")
216    " "))
217
218 (defun www-format-feature-name-as-rel-to (feature-name)
219   (concat "\u2192" (substring (symbol-name feature-name) 2)))
220
221 (defun www-format-feature-name-as-rel-from (feature-name)
222   (concat "\u2190" (substring (symbol-name feature-name) 2)))
223
224 (defun www-format-feature-name-as-CCS (feature-name)
225   (let* ((rest
226           (split-string
227            (symbol-name feature-name)
228            "-"))
229          (dest (upcase (pop rest))))
230     (when (string-match "^=+>*" dest)
231       (setq dest (concat (substring dest 0 (match-end 0))
232                          " "
233                          (substring dest (match-end 0)))))
234     (cond
235      (rest
236       (while (cdr rest)
237         (setq dest (concat dest " " (upcase (pop rest)))))
238       (if (string-match "^[0-9]+$" (car rest))
239           (concat dest "-" (car rest))
240         (concat dest " " (upcase (car rest))))
241       )
242      (t dest))))
243
244 (defun www-format-feature-name* (feature-name &optional lang)
245   (let (name)
246     (cond
247      ((or (and lang
248                (char-feature-property
249                 feature-name
250                 (intern (format "name@%s" lang))))
251           (char-feature-property
252            feature-name 'name)))
253      ((find-charset feature-name)
254       (www-format-feature-name-as-CCS feature-name))
255      ((and (setq name (symbol-name feature-name))
256            (string-match "^\\(->\\)" name))
257       (www-format-feature-name-as-rel-to feature-name))
258      ((string-match "^\\(<-\\)" name)
259       (www-format-feature-name-as-rel-from feature-name))
260      (t
261       (www-format-feature-name-default feature-name)))))
262
263 (defun www-format-feature-name (feature-name &optional lang)
264   (www-format-encode-string
265    (www-format-feature-name* feature-name lang)))
266
267
268 ;;; @ Feature value presentation
269 ;;;
270
271 (defun www-format-value-as-kuten (value)
272   (format "%02d-%02d"
273           (- (lsh value -8) 32)
274           (- (logand value 255) 32)))
275
276 (defun www-format-value-as-char-list (value &optional without-tags)
277   (if (listp value)
278       (mapconcat
279        (if without-tags
280            (lambda (unit)
281              (www-format-encode-string
282               (format (if (characterp unit)
283                           "%c"
284                         "%s")
285                       unit)
286               'without-tags))
287          (lambda (unit)
288            (if (characterp unit)
289                (format "<a href=\"%s?char=%s\">%s</a>"
290                        chise-wiki-view-url
291                        (www-uri-encode-char unit)
292                        (www-format-encode-string (char-to-string unit)))
293              (www-format-encode-string (format "%s" unit)))))
294        value " ")
295     (www-format-encode-string (format "%s" value) without-tags)))
296
297 (defun www-format-value-as-ids (value &optional without-tags)
298   (if (listp value)
299       (mapconcat
300        (if without-tags
301            (lambda (unit)
302              (www-format-encode-string
303               (format (if (characterp unit)
304                           "%c"
305                         "%s")
306                       unit)
307               'without-tags))
308          (lambda (unit)
309            (if (characterp unit)
310                (format "<a href=\"%s?char=%s\">%s</a>"
311                        chise-wiki-view-url
312                        (www-uri-encode-char unit)
313                        (www-format-encode-string (char-to-string unit)))
314              (www-format-encode-string (format "%s" unit)))))
315        (ideographic-structure-to-ids value) " ")
316     (www-format-encode-string (format "%s" value) without-tags)))
317
318 (defun www-format-value-as-S-exp (value &optional without-tags)
319   (www-format-encode-string (format "%S" value) without-tags))
320
321 (defun www-format-value-as-HEX (value)
322   (if (integerp value)
323       (format "%X" value)
324     (www-format-value-as-S-exp value)))
325
326 (defun www-format-value-as-CCS-default (value)
327   (if (integerp value)
328       (format "0x%s (%d)"
329               (www-format-value-as-HEX value)
330               value)
331     (www-format-value-as-S-exp value)))
332
333 (defun www-format-value-as-CCS-94x94 (value)
334   (if (integerp value)
335       (format "0x%s [%s] (%d)"
336               (www-format-value-as-HEX value)
337               (www-format-value-as-kuten value)
338               value)
339     (www-format-value-as-S-exp value)))
340
341 (defun www-format-value (value &optional feature-name format without-tags)
342   ;; (cond
343   ;;  ((find-charset feature-name)
344   ;;   (cond
345   ;;    ((and (= (charset-chars feature-name) 94)
346   ;;          (= (charset-dimension feature-name) 2))
347   ;;     (www-format-value-as-CCS-94x94 value))
348   ;;    (t
349   ;;     (www-format-value-as-CCS-default value)))
350   ;;   )
351   ;;  (t
352   ;;   (www-format-value-as-S-exp value)))
353   (www-format-apply-value format nil value nil nil without-tags)
354   )
355
356
357 ;;; @ format evaluator
358 ;;;
359
360 (defun www-format-encode-string (string &optional without-tags)
361   (with-temp-buffer
362     (insert string)
363     (let (plane code)
364       (goto-char (point-min))
365       (while (search-forward "<" nil t)
366         (replace-match "&lt;" nil t))
367       (goto-char (point-min))
368       (while (search-forward ">" nil t)
369         (replace-match "&gt;" nil t))
370       (if without-tags
371           (encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
372         (let ((coded-charset-entity-reference-alist
373                (list*
374                 '(=cns11643-1           "C1-" 4 X)
375                 '(=cns11643-2           "C2-" 4 X)
376                 '(=cns11643-3           "C3-" 4 X)
377                 '(=cns11643-4           "C4-" 4 X)
378                 '(=cns11643-5           "C5-" 4 X)
379                 '(=cns11643-6           "C6-" 4 X)
380                 '(=cns11643-7           "C7-" 4 X)
381                 '(=gb2312               "G0-" 4 X)
382                 '(=gb12345              "G1-" 4 X)
383                 '(=jis-x0208@1990       "J90-" 4 X)
384                 '(=jis-x0212            "JSP-" 4 X)
385                 '(=cbeta                "CB" 5 d)
386                 '(=jef-china3           "JC3-" 4 X)
387                 '(=jis-x0208@1997       "J97-" 4 X)
388                 '(=jis-x0208@1978       "J78-" 4 X)
389                 '(=jis-x0208@1983       "J83-" 4 X)
390                 '(=zinbun-oracle        "ZOB-" 4 d)
391                 '(=daikanwa             "M-" 5 d)
392                 coded-charset-entity-reference-alist)))
393           (encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
394
395           (goto-char (point-min))
396           (while (re-search-forward "&CB\\([0-9]+\\);" nil t)
397             (setq code (string-to-int (match-string 1)))
398             (replace-match
399              (format "<img alt=\"CB%05d\" src=\"%s/cb-gaiji/%02d/CB%05d.gif\">"
400                      code
401                      chise-wiki-glyphs-url
402                      (/ code 1000) code)
403              t 'literal))
404
405           (goto-char (point-min))
406           (while (re-search-forward "&J\\(78\\|83\\|90\\|97\\|SP\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
407             (setq plane (match-string 1)
408                   code (string-to-int (match-string 2) 16))
409             (replace-match
410              (format "<img alt=\"J%s-%04X\" src=\"%s/JIS-%s/%02d-%02d.gif\">"
411                      plane code
412                      chise-wiki-glyphs-url
413                      plane
414                      (- (lsh code -8) 32)
415                      (- (logand code 255) 32))
416              t 'literal))
417
418           (goto-char (point-min))
419           (while (re-search-forward "&G\\([01]\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
420             (setq plane (string-to-int (match-string 1))
421                   code (string-to-int (match-string 2) 16))
422             (replace-match
423              (format "<img alt=\"GB%d-%04X\" src=\"%s/GB%d/%02d-%02d.gif\">"
424                      plane code
425                      chise-wiki-glyphs-url
426                      plane
427                      (- (lsh code -8) 32)
428                      (- (logand code 255) 32))
429              t 'literal))
430
431           (goto-char (point-min))
432           (while (re-search-forward "&C\\([1-7]\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
433             (setq plane (string-to-int (match-string 1))
434                   code (string-to-int (match-string 2) 16))
435             (replace-match
436              (format "<img alt=\"CNS%d-%04X\" src=\"%s/CNS%d/%04X.gif\">"
437                      plane code
438                      chise-wiki-glyphs-url
439                      plane code)
440              t 'literal))
441           ))
442       (goto-char (point-min))
443       (while (search-forward "&GT-" nil t)
444         (replace-match "&amp;GT-" t 'literal))
445
446       (buffer-string))))
447
448 (defun www-format-props-to-string (props &optional format)
449   (unless format
450     (setq format (plist-get props :format)))
451   (concat "%"
452           (plist-get props :flag)
453           (if (plist-get props :zero-padding)
454               "0")
455           (if (plist-get props :len)
456               (format "%d" (plist-get props :len)))
457           (cond
458            ((eq format 'decimal) "d")
459            ((eq format 'hex) "x")
460            ((eq format 'HEX) "X")
461            ((eq format 'S-exp) "S")
462            (t "s"))))      
463
464 (defun www-format-apply-value (format props value
465                                       &optional uri-char uri-feature
466                                       without-tags)
467   (let (ret)
468     (setq ret
469           (cond
470            ((memq format '(decimal hex HEX))
471             (if (integerp value)
472                 (format (www-format-props-to-string props format)
473                         value)
474               (www-format-encode-string
475                (format "%s" value)
476                without-tags))
477             )
478            ((eq format 'S-exp)
479             (www-format-encode-string
480              (format (www-format-props-to-string props format)
481                      value)
482              without-tags))
483            ((eq format 'ku-ten)
484             (www-format-value-as-kuten value))
485            ((eq format 'space-separated-char-list)
486             (www-format-value-as-char-list value without-tags))
487            ((eq format 'space-separated-ids)
488             (www-format-value-as-ids value without-tags))
489            (t
490             (setq format 'default)
491             (www-format-encode-string
492              (format (www-format-props-to-string props 'default)
493                      value)
494              without-tags))))
495     (if (or without-tags (eq (plist-get props :mode) 'peek))
496         ret
497       (format "%s <a href=\"%s?char=%s&feature=%s&format=%s\"
498 ><input type=\"submit\" value=\"edit\" /></a>"
499               ret
500               chise-wiki-edit-url
501               uri-char uri-feature format))))
502
503 (defun www-format-eval-feature-value (char
504                                       feature-name
505                                       &optional format lang uri-char value)
506   (unless value
507     (setq value (char-feature char feature-name)))
508   (unless format
509     (setq format (www-feature-value-format feature-name)))
510   (cond
511    ((symbolp format)
512     (www-format-apply-value
513      format nil value
514      uri-char (www-uri-encode-feature-name feature-name))
515     )
516    ((consp format)
517     (cond ((null (cdr format))
518            (setq format (car format))
519            (www-format-apply-value
520             (car format) (nth 1 format) value
521             uri-char (www-uri-encode-feature-name feature-name))
522            )
523           (t
524            (www-format-eval-list format char feature-name lang uri-char)
525            )))))
526
527 (defun www-format-eval-unit (exp char feature-name
528                                  &optional lang uri-char value)
529   (unless value
530     (setq value (char-feature char feature-name)))
531   (unless uri-char
532     (setq uri-char (www-uri-encode-char char)))
533   (cond
534    ((stringp exp) (www-format-encode-string exp))
535    ((null exp) "")
536    ((consp exp)
537     (cond
538      ((memq (car exp) '(value decimal hex HEX ku-ten S-exp default))
539       (if (eq (car exp) 'value)
540           (www-format-eval-feature-value char feature-name
541                                          (plist-get (nth 1 exp) :format)
542                                          lang uri-char value)
543         (www-format-apply-value
544          (car exp) (nth 1 exp) value
545          uri-char (www-uri-encode-feature-name feature-name)))
546       )
547      ((eq (car exp) 'name)
548       (format "<a href=\"%s?feature=%s&char=%s\">%s</a>"
549               chise-wiki-view-url
550               (www-uri-encode-feature-name feature-name)
551               uri-char
552               (www-format-feature-name feature-name lang))
553       )
554      ((eq (car exp) 'link)
555       (format "<a
556  href=\"%s\"
557 >%s</a
558 >"
559               (www-format-eval-list (plist-get (nth 1 exp) :ref)
560                                     char feature-name lang uri-char)
561               (www-format-eval-list (nthcdr 2 exp)
562                                     char feature-name lang uri-char)))
563      (t
564       (format "<%s
565 >%s</%s
566 >"
567               (car exp)
568               (www-format-eval-list (nthcdr 2 exp) char feature-name
569                                     lang uri-char)
570               (car exp)))))))
571
572 (defun www-format-eval-list (format-list char feature-name
573                                          &optional lang uri-char)
574   (if (consp format-list)
575       (mapconcat
576        (lambda (exp)
577          (www-format-eval-unit exp char feature-name lang uri-char))
578        format-list "")
579     (www-format-eval-unit format-list char feature-name lang uri-char)))
580
581
582 ;;; @ HTML generator
583 ;;;
584
585 (defun www-html-display-text (text)
586   (princ
587    (with-temp-buffer
588      (insert text)
589      (goto-char (point-min))
590      (while (search-forward "<" nil t)
591        (replace-match "&lt;" nil t))
592      (goto-char (point-min))
593      (while (search-forward ">" nil t)
594        (replace-match "&gt;" nil t))
595      (goto-char (point-min))
596      (while (re-search-forward "\\[\\[\\([^]|[]+\\)|\\([^][]+\\)\\]\\]" nil t)
597        (replace-match
598         (format "<a href=\"%s\">%s</a>"
599                 (match-string 2)
600                 (match-string 1))
601         nil t))
602      (encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
603      (goto-char (point-min))
604      (while (search-forward "&GT-" nil t)
605        (replace-match "&amp;GT-" nil t))
606      (buffer-string))))
607
608 (defun www-html-display-paragraph (text)
609   (princ "<p>")
610   (www-html-display-text text)
611   (princ "</p>\n"))
612
613 (provide 'cwiki-common)