New files.
[chise/est.git] / cwiki-common.el
1 ;; -*- coding: utf-8-mcs-er -*-
2 (require 'char-db-util)
3
4 (defvar chise-wiki-view-url "view.cgi")
5 (defvar chise-wiki-edit-url "edit/edit.cgi")
6
7 (defvar chise-wiki-glyphs-url
8   "http://chise.zinbun.kyoto-u.ac.jp/glyphs/")
9
10 (defun decode-uri-string (string &optional coding-system)
11   (if (> (length string) 0)
12       (let ((i 0)
13             dest)
14         (setq string
15               (mapconcat (lambda (char)
16                            (if (eq char ?+)
17                                " "
18                              (char-to-string char)))
19                          string ""))
20         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
21           (setq dest (concat dest
22                              (substring string i (match-beginning 0))
23                              (char-to-string
24                               (int-char
25                                (string-to-int (match-string 1 string) 16))))
26                 i (match-end 0)))
27         (decode-coding-string
28          (concat dest (substring string i))
29          coding-system))))
30
31 (defun www-feature-type (feature-name)
32   (or (char-feature-property feature-name 'type)
33       (let ((str (symbol-name feature-name)))
34         (cond
35          ((string-match "^\\(->\\|<-\\)" str)
36           'relation)
37          ((string-match "^ideographic-structure\\(@\\|$\\)" str)
38           'structure)
39          ))))
40
41 (defun www-feature-value-format (feature-name)
42   (or (char-feature-property feature-name 'value-format)
43       (if (memq (www-feature-type feature-name)
44                 '(relation structure))
45           'space-separated-char-list)
46       (if (find-charset feature-name)
47           (if (and (= (charset-dimension feature-name) 2)
48                    (= (charset-chars feature-name) 94))
49               '("0x" (HEX)
50                 " (" (decimal) ") <" (ku-ten) ">")
51             '("0x" (HEX) " (" (decimal) ")")))))
52
53
54 ;;; @ URI representation
55 ;;;
56
57 (defun www-uri-decode-feature-name (uri-feature)
58   (let (feature)
59     (cond
60      ((string-match "^from\\." uri-feature)
61       (intern (format "<-%s" (substring uri-feature (match-end 0))))
62       )
63      ((string-match "^to\\." uri-feature)
64       (intern (format "->%s" (substring uri-feature (match-end 0))))
65       )
66      ((string-match "^rep\\." uri-feature)
67       (intern (format "=%s" (substring uri-feature (match-end 0))))
68       )
69      ((string-match "^g\\." uri-feature)
70       (intern (format "=>>%s" (substring uri-feature (match-end 0))))
71       )
72      ((string-match "^gi\\." uri-feature)
73       (intern (format "=>>>%s" (substring uri-feature (match-end 0))))
74       )
75      ((string-match "^gi\\([0-9]+\\)\\." uri-feature)
76       (intern (format "=>>%s%s"
77                       (make-string (string-to-int
78                                     (match-string 1 uri-feature))
79                                    ?>)
80                       (substring uri-feature (match-end 0))))
81       )
82      ((string-match "^a\\." uri-feature)
83       (intern (format "=>%s" (substring uri-feature (match-end 0))))
84       )
85      ((string-match "^a\\([0-9]+\\)\\." uri-feature)
86       (intern (format "%s>%s"
87                       (make-string (string-to-int
88                                     (match-string 1 uri-feature))
89                                    ?=)
90                       (substring uri-feature (match-end 0))))
91       )
92      ((and (setq feature (intern (format "=>%s" uri-feature)))
93            (find-charset feature))
94       feature)
95      ((and (setq feature (intern (format "=>>%s" uri-feature)))
96            (find-charset feature))
97       feature)
98      ((and (setq feature (intern (format "=>>>%s" uri-feature)))
99            (find-charset feature))
100       feature)
101      ((and (setq feature (intern (format "=%s" uri-feature)))
102            (find-charset feature))
103       feature)
104      (t (intern uri-feature)))))
105
106 (defun www-uri-encode-feature-name (feature-name)
107   (setq feature-name (symbol-name feature-name))
108   (cond
109    ((string-match "^=\\([^=>]+\\)" feature-name)
110     (concat "rep." (substring feature-name (match-beginning 1)))
111     )
112    ((string-match "^=>>\\([^=>]+\\)" feature-name)
113     (concat "g." (substring feature-name (match-beginning 1)))
114     )
115    ((string-match "^=>>>\\([^=>]+\\)" feature-name)
116     (concat "gi." (substring feature-name (match-beginning 1)))
117     )
118    ((string-match "^=>>\\(>+\\)" feature-name)
119     (format "gi%d.%s"
120             (length (match-string 1 feature-name))
121             (substring feature-name (match-end 1)))
122     )
123    ((string-match "^=>\\([^=>]+\\)" feature-name)
124     (concat "a." (substring feature-name (match-beginning 1)))
125     )
126    ((string-match "^\\(=+\\)>" feature-name)
127     (format "a%d.%s"
128             (length (match-string 1 feature-name))
129             (substring feature-name (match-end 0)))
130     )
131    ((string-match "^->" feature-name)
132     (concat "to." (substring feature-name (match-end 0)))
133     )
134    ((string-match "^<-" feature-name)
135     (concat "from." (substring feature-name (match-end 0)))
136     )
137    (t feature-name)))
138
139 (defun www-uri-decode-char (char-rep)
140   (let (ccs cpos)
141     (cond
142      ((string-match ":" char-rep)
143       (setq ccs (substring char-rep 0 (match-beginning 0))
144             cpos (substring char-rep (match-end 0)))
145       (setq ccs (www-uri-decode-feature-name ccs))
146       (cond
147        ((string-match "^0x" cpos)
148         (setq cpos
149               (string-to-number (substring cpos (match-end 0)) 16))
150         )
151        (t
152         (setq cpos (string-to-number cpos))
153         ))
154       (if (numberp cpos)
155           (decode-char ccs cpos))
156       )
157      ((= (length char-rep) 1)
158       (aref char-rep 0)
159       ))))
160
161 (defun www-uri-encode-char (char)
162   (let ((ccs-list '(=ucs
163                     =cns11643-1 =cns11643-2 =cns11643-3
164                     =cns11643-4 =cns11643-5 =cns11643-6 =cns11643-7
165                     =gb2312 =gb12345
166                     =jis-x0208 =jis-x0208@1990
167                     =jis-x0212
168                     =cbeta =jef-china3
169                     =jis-x0213-1@2000 =jis-x0213-1@2004
170                     =jis-x0208@1983 =jis-x0208@1978
171                     =zinbun-oracle
172                     =daikanwa
173                     =gt =gt-k
174                     =big5
175                     =big5-cdp
176                     =>>jis-x0208 =>>jis-x0213-1
177                     =>jis-x0208 =>jis-x0213-1))
178         ccs ret)
179     (while (and ccs-list
180                 (setq ccs (pop ccs-list))
181                 (not (setq ret (encode-char char ccs 'defined-only)))))
182     (cond (ret
183            (format "%s:0x%X"
184                    (www-uri-encode-feature-name ccs)
185                    ret))
186           ((setq ccs (car (split-char char)))
187            (format "%s:0x%X"
188                    (www-uri-encode-feature-name ccs)
189                    (encode-char char ccs))))))
190
191
192 ;;; @ Feature name presentation
193 ;;;
194
195 (defun www-format-feature-name-default (feature-name)
196   (mapconcat
197    #'capitalize
198    (split-string
199     (symbol-name feature-name)
200     "-")
201    " "))
202
203 (defun www-format-feature-name-as-rel-to (feature-name)
204   (concat "\u2192" (substring (symbol-name feature-name) 2)))
205
206 (defun www-format-feature-name-as-rel-from (feature-name)
207   (concat "\u2190" (substring (symbol-name feature-name) 2)))
208
209 (defun www-format-feature-name-as-CCS (feature-name)
210   (let* ((rest
211           (split-string
212            (symbol-name feature-name)
213            "-"))
214          (dest (upcase (pop rest))))
215     (cond
216      (rest
217       (while (cdr rest)
218         (setq dest (concat dest " " (upcase (pop rest)))))
219       (if (string-match "^[0-9]+$" (car rest))
220           (concat dest "-" (car rest))
221         (concat dest " " (upcase (car rest))))
222       )
223      (t dest))))
224
225 (defun www-format-feature-name (feature-name &optional lang)
226   (let (name)
227     (www-format-encode-string
228      (cond
229       ((or (and lang
230                 (char-feature-property
231                  feature-name
232                  (intern (format "name@%s" lang))))
233            (char-feature-property
234             feature-name 'name)))
235       ((find-charset feature-name)
236        (www-format-feature-name-as-CCS feature-name))
237       ((and (setq name (symbol-name feature-name))
238             (string-match "^\\(->\\)" name))
239        (www-format-feature-name-as-rel-to feature-name))
240       ((string-match "^\\(<-\\)" name)
241        (www-format-feature-name-as-rel-from feature-name))
242       (t
243        (www-format-feature-name-default feature-name))))))
244
245
246 ;;; @ Feature value presentation
247 ;;;
248
249 (defun www-format-value-as-kuten (value)
250   (format "%02d-%02d"
251           (- (lsh value -8) 32)
252           (- (logand value 255) 32)))
253
254 (defun www-format-value-as-char-list (value &optional without-tags)
255   (if (listp value)
256       (mapconcat
257        (if without-tags
258            (lambda (unit)
259              (www-format-encode-string
260               (format (if (characterp unit)
261                           "%c"
262                         "%s")
263                       unit)
264               'without-tags))
265          (lambda (unit)
266            (if (characterp unit)
267                (format "<a href=\"%s?char=%s\">%s</a>"
268                        chise-wiki-view-url
269                        (www-uri-encode-char unit)
270                        (www-format-encode-string (char-to-string unit)))
271              (www-format-encode-string (format "%s" unit)))))
272        value " ")
273     (www-format-encode-string (format "%s" value) without-tags)))
274
275 (defun www-format-value-as-S-exp (value &optional without-tags)
276   (www-format-encode-string (format "%S" value) without-tags))
277
278 (defun www-format-value-as-HEX (value)
279   (if (integerp value)
280       (format "%X" value)
281     (www-format-value-as-S-exp value)))
282
283 (defun www-format-value-as-CCS-default (value)
284   (if (integerp value)
285       (format "0x%s (%d)"
286               (www-format-value-as-HEX value)
287               value)
288     (www-format-value-as-S-exp value)))
289
290 (defun www-format-value-as-CCS-94x94 (value)
291   (if (integerp value)
292       (format "0x%s [%s] (%d)"
293               (www-format-value-as-HEX value)
294               (www-format-value-as-kuten value)
295               value)
296     (www-format-value-as-S-exp value)))
297
298 (defun www-format-value (value &optional feature-name format without-tags)
299   ;; (cond
300   ;;  ((find-charset feature-name)
301   ;;   (cond
302   ;;    ((and (= (charset-chars feature-name) 94)
303   ;;          (= (charset-dimension feature-name) 2))
304   ;;     (www-format-value-as-CCS-94x94 value))
305   ;;    (t
306   ;;     (www-format-value-as-CCS-default value)))
307   ;;   )
308   ;;  (t
309   ;;   (www-format-value-as-S-exp value)))
310   (www-format-apply-value format nil value nil nil without-tags)
311   )
312
313
314 ;;; @ format evaluator
315 ;;;
316
317 (defun www-format-encode-string (string &optional without-tags)
318   (with-temp-buffer
319     (insert string)
320     (let (plane code)
321       (goto-char (point-min))
322       (while (search-forward "<" nil t)
323         (replace-match "&lt;" nil t))
324       (goto-char (point-min))
325       (while (search-forward ">" nil t)
326         (replace-match "&gt;" nil t))
327       (if without-tags
328           (encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
329         (let ((coded-charset-entity-reference-alist
330                (list*
331                 '(=cns11643-1           "C1-" 4 X)
332                 '(=cns11643-2           "C2-" 4 X)
333                 '(=cns11643-3           "C3-" 4 X)
334                 '(=cns11643-4           "C4-" 4 X)
335                 '(=cns11643-5           "C5-" 4 X)
336                 '(=cns11643-6           "C6-" 4 X)
337                 '(=cns11643-7           "C7-" 4 X)
338                 '(=gb2312               "G0-" 4 X)
339                 '(=gb12345              "G1-" 4 X)
340                 '(=jis-x0208@1990       "J90-" 4 X)
341                 '(=jis-x0212            "JSP-" 4 X)
342                 '(=cbeta                "CB" 5 d)
343                 '(=jef-china3           "JC3-" 4 X)
344                 '(=jis-x0208@1997       "J97-" 4 X)
345                 '(=jis-x0208@1978       "J78-" 4 X)
346                 '(=jis-x0208@1983       "J83-" 4 X)
347                 '(=zinbun-oracle        "ZOB-" 4 d)
348                 '(=daikanwa             "M-" 5 d)
349                 coded-charset-entity-reference-alist)))
350           (encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
351
352           (goto-char (point-min))
353           (while (re-search-forward "&CB\\([0-9]+\\);" nil t)
354             (setq code (string-to-int (match-string 1)))
355             (replace-match
356              (format "<img alt=\"CB%05d\" src=\"%s/cb-gaiji/%02d/CB%05d.gif\">"
357                      code
358                      chise-wiki-glyphs-url
359                      (/ code 1000) code)
360              t 'literal))
361
362           (goto-char (point-min))
363           (while (re-search-forward "&J\\(78\\|83\\|90\\|97\\|SP\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
364             (setq plane (match-string 1)
365                   code (string-to-int (match-string 2) 16))
366             (replace-match
367              (format "<img alt=\"J%s-%04X\" src=\"%s/JIS-%s/%02d-%02d.gif\">"
368                      plane code
369                      chise-wiki-glyphs-url
370                      plane
371                      (- (lsh code -8) 32)
372                      (- (logand code 255) 32))
373              t 'literal))
374
375           (goto-char (point-min))
376           (while (re-search-forward "&G\\([01]\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
377             (setq plane (string-to-int (match-string 1))
378                   code (string-to-int (match-string 2) 16))
379             (replace-match
380              (format "<img alt=\"GB%d-%04X\" src=\"%s/GB%d/%02d-%02d.gif\">"
381                      plane code
382                      chise-wiki-glyphs-url
383                      plane
384                      (- (lsh code -8) 32)
385                      (- (logand code 255) 32))
386              t 'literal))
387
388           (goto-char (point-min))
389           (while (re-search-forward "&C\\([1-7]\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
390             (setq plane (string-to-int (match-string 1))
391                   code (string-to-int (match-string 2) 16))
392             (replace-match
393              (format "<img alt=\"CNS%d-%04X\" src=\"%s/CNS%d/%04X.gif\">"
394                      plane code
395                      chise-wiki-glyphs-url
396                      plane code)
397              t 'literal))
398           ))
399       (goto-char (point-min))
400       (while (search-forward "&GT-" nil t)
401         (replace-match "&amp;GT-" t 'literal))
402
403       (buffer-string))))
404
405 (defun www-format-props-to-string (props &optional format)
406   (unless format
407     (setq format (plist-get props :format)))
408   (concat "%"
409           (plist-get props :flag)
410           (if (plist-get props :zero-padding)
411               "0")
412           (if (plist-get props :len)
413               (format "%d" (plist-get props :len)))
414           (cond
415            ((eq format 'decimal) "d")
416            ((eq format 'hex) "x")
417            ((eq format 'HEX) "X")
418            ((eq format 'S-exp) "S")
419            (t "s"))))      
420
421 (defun www-format-apply-value (format props value
422                                       &optional uri-char uri-feature
423                                       without-tags)
424   (let (ret)
425     (setq ret
426           (cond
427            ((memq format '(decimal hex HEX))
428             (if (integerp value)
429                 (format (www-format-props-to-string props format)
430                         value)
431               (www-format-encode-string
432                (format "%s" value)
433                without-tags))
434             )
435            ((eq format 'S-exp)
436             (www-format-encode-string
437              (format (www-format-props-to-string props format)
438                      value)
439              without-tags))
440            ((eq format 'ku-ten)
441             (www-format-value-as-kuten value))
442            ((eq format 'space-separated-char-list)
443             (www-format-value-as-char-list value without-tags))
444            (t
445             (setq format 'default)
446             (www-format-encode-string
447              (format (www-format-props-to-string props 'default)
448                      value)
449              without-tags))))
450     (if (or without-tags (eq (plist-get props :mode) 'peek))
451         ret
452       (format "%s <a href=\"%s?char=%s&feature=%s&format=%s\"
453 ><input type=\"submit\" value=\"edit\" /></a>"
454               ret
455               chise-wiki-edit-url
456               uri-char uri-feature format))))
457
458 (defun www-format-eval-feature-value (char
459                                       feature-name
460                                       &optional format lang uri-char value)
461   (unless value
462     (setq value (char-feature char feature-name)))
463   (unless format
464     (setq format (www-feature-value-format feature-name)))
465   (cond
466    ((symbolp format)
467     (www-format-apply-value
468      format nil value
469      uri-char (www-uri-encode-feature-name feature-name))
470     )
471    ((consp format)
472     (cond ((null (cdr format))
473            (setq format (car format))
474            (www-format-apply-value
475             (car format) (nth 1 format) value
476             uri-char (www-uri-encode-feature-name feature-name))
477            )
478           (t
479            (www-format-eval-list format char feature-name lang uri-char)
480            )))))
481
482 (defun www-format-eval-unit (exp char feature-name
483                                  &optional lang uri-char value)
484   (unless value
485     (setq value (char-feature char feature-name)))
486   (unless uri-char
487     (setq uri-char (www-uri-encode-char char)))
488   (cond
489    ((stringp exp) (www-format-encode-string exp))
490    ((null exp) "")
491    ((consp exp)
492     (cond
493      ((memq (car exp) '(value decimal hex HEX ku-ten S-exp default))
494       (if (eq (car exp) 'value)
495           (www-format-eval-feature-value char feature-name
496                                          (plist-get (nth 1 exp) :format)
497                                          lang uri-char value)
498         (www-format-apply-value
499          (car exp) (nth 1 exp) value
500          uri-char (www-uri-encode-feature-name feature-name)))
501       )
502      ((eq (car exp) 'name)
503       (format "<a href=\"%s?feature=%s&char=%s\">%s</a>"
504               chise-wiki-view-url
505               (www-uri-encode-feature-name feature-name)
506               uri-char
507               (www-format-feature-name feature-name lang))
508       )
509      ((eq (car exp) 'link)
510       (format "<a
511  href=\"%s\"
512 >%s</a
513 >"
514               (www-format-eval-list (plist-get (nth 1 exp) :ref)
515                                     char feature-name lang uri-char)
516               (www-format-eval-list (nthcdr 2 exp)
517                                     char feature-name lang uri-char)))
518      (t
519       (format "<%s
520 >%s</%s
521 >"
522               (car exp)
523               (www-format-eval-list (nthcdr 2 exp) char feature-name
524                                     lang uri-char)
525               (car exp)))))))
526
527 (defun www-format-eval-list (format-list char feature-name
528                                          &optional lang uri-char)
529   (if (consp format-list)
530       (mapconcat
531        (lambda (exp)
532          (www-format-eval-unit exp char feature-name lang uri-char))
533        format-list "")
534     (www-format-eval-unit format-list char feature-name lang uri-char)))
535
536
537 ;;; @ HTML generator
538 ;;;
539
540 (defun www-html-display-text (text)
541   (princ
542    (with-temp-buffer
543      (insert text)
544      (goto-char (point-min))
545      (while (search-forward "<" nil t)
546        (replace-match "&lt;" nil t))
547      (goto-char (point-min))
548      (while (search-forward ">" nil t)
549        (replace-match "&gt;" nil t))
550      (goto-char (point-min))
551      (while (re-search-forward "\\[\\[\\([^]|[]+\\)|\\([^][]+\\)\\]\\]" nil t)
552        (replace-match
553         (format "<a href=\"%s\">%s</a>"
554                 (match-string 2)
555                 (match-string 1))
556         nil t))
557      (encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
558      (goto-char (point-min))
559      (while (search-forward "&" nil t)
560        (replace-match "&amp;" nil t))
561      (buffer-string))))
562
563 (defun www-html-display-paragraph (text)
564   (princ "<p>")
565   (www-html-display-text text)
566   (princ "</p>\n"))
567
568 (provide 'cwiki-common)