(www-feature-value-format): Return 'space-separated-ids for
[chise/est.git] / cwiki-common.el
1 ;; -*- coding: utf-8-mcs-er -*-
2 (require 'char-db-util)
3
4 (defvar chise-wiki-view-url "view.cgi")
5 (defvar chise-wiki-edit-url "edit/edit.cgi")
6
7 (defvar chise-wiki-glyphs-url
8   "http://chise.zinbun.kyoto-u.ac.jp/glyphs/")
9
10 (defun decode-uri-string (string &optional coding-system)
11   (if (> (length string) 0)
12       (let ((i 0)
13             dest)
14         (setq string
15               (mapconcat (lambda (char)
16                            (if (eq char ?+)
17                                " "
18                              (char-to-string char)))
19                          string ""))
20         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
21           (setq dest (concat dest
22                              (substring string i (match-beginning 0))
23                              (char-to-string
24                               (int-char
25                                (string-to-int (match-string 1 string) 16))))
26                 i (match-end 0)))
27         (decode-coding-string
28          (concat dest (substring string i))
29          coding-system))))
30
31 (defun www-feature-type (feature-name)
32   (or (char-feature-property feature-name 'type)
33       (let ((str (symbol-name feature-name)))
34         (cond
35          ((string-match "^\\(->\\|<-\\)" str)
36           'relation)
37          ((string-match "^ideographic-structure\\(@\\|$\\)" str)
38           'structure)
39          ))))
40
41 (defun www-feature-value-format (feature-name)
42   (or (char-feature-property feature-name 'value-format)
43       (let ((type (www-feature-type feature-name)))
44         (cond ((eq type 'relation)
45                'space-separated-char-list)
46               ((eq type 'structure)
47                'space-separated-ids)))
48       (if (find-charset feature-name)
49           (if (and (= (charset-dimension feature-name) 2)
50                    (= (charset-chars feature-name) 94))
51               '("0x" (HEX)
52                 " (" (decimal) ") <" (ku-ten) ">")
53             '("0x" (HEX) " (" (decimal) ")")))))
54
55
56 ;;; @ URI representation
57 ;;;
58
59 (defun www-uri-decode-feature-name (uri-feature)
60   (let (feature)
61     (cond
62      ((string-match "^from\\." uri-feature)
63       (intern (format "<-%s" (substring uri-feature (match-end 0))))
64       )
65      ((string-match "^to\\." uri-feature)
66       (intern (format "->%s" (substring uri-feature (match-end 0))))
67       )
68      ((string-match "^rep\\." uri-feature)
69       (intern (format "=%s" (substring uri-feature (match-end 0))))
70       )
71      ((string-match "^g\\." uri-feature)
72       (intern (format "=>>%s" (substring uri-feature (match-end 0))))
73       )
74      ((string-match "^gi\\." uri-feature)
75       (intern (format "=>>>%s" (substring uri-feature (match-end 0))))
76       )
77      ((string-match "^gi\\([0-9]+\\)\\." uri-feature)
78       (intern (format "=>>%s%s"
79                       (make-string (string-to-int
80                                     (match-string 1 uri-feature))
81                                    ?>)
82                       (substring uri-feature (match-end 0))))
83       )
84      ((string-match "^a\\." uri-feature)
85       (intern (format "=>%s" (substring uri-feature (match-end 0))))
86       )
87      ((string-match "^a\\([0-9]+\\)\\." uri-feature)
88       (intern (format "%s>%s"
89                       (make-string (string-to-int
90                                     (match-string 1 uri-feature))
91                                    ?=)
92                       (substring uri-feature (match-end 0))))
93       )
94      ((and (setq feature (intern (format "=>%s" uri-feature)))
95            (find-charset feature))
96       feature)
97      ((and (setq feature (intern (format "=>>%s" uri-feature)))
98            (find-charset feature))
99       feature)
100      ((and (setq feature (intern (format "=>>>%s" uri-feature)))
101            (find-charset feature))
102       feature)
103      ((and (setq feature (intern (format "=%s" uri-feature)))
104            (find-charset feature))
105       feature)
106      (t (intern uri-feature)))))
107
108 (defun www-uri-encode-feature-name (feature-name)
109   (setq feature-name (symbol-name feature-name))
110   (cond
111    ((string-match "^=\\([^=>]+\\)" feature-name)
112     (concat "rep." (substring feature-name (match-beginning 1)))
113     )
114    ((string-match "^=>>\\([^=>]+\\)" feature-name)
115     (concat "g." (substring feature-name (match-beginning 1)))
116     )
117    ((string-match "^=>>>\\([^=>]+\\)" feature-name)
118     (concat "gi." (substring feature-name (match-beginning 1)))
119     )
120    ((string-match "^=>>\\(>+\\)" feature-name)
121     (format "gi%d.%s"
122             (length (match-string 1 feature-name))
123             (substring feature-name (match-end 1)))
124     )
125    ((string-match "^=>\\([^=>]+\\)" feature-name)
126     (concat "a." (substring feature-name (match-beginning 1)))
127     )
128    ((string-match "^\\(=+\\)>" feature-name)
129     (format "a%d.%s"
130             (length (match-string 1 feature-name))
131             (substring feature-name (match-end 0)))
132     )
133    ((string-match "^->" feature-name)
134     (concat "to." (substring feature-name (match-end 0)))
135     )
136    ((string-match "^<-" feature-name)
137     (concat "from." (substring feature-name (match-end 0)))
138     )
139    (t feature-name)))
140
141 (defun www-uri-decode-char (char-rep)
142   (let (ccs cpos)
143     (cond
144      ((string-match ":" char-rep)
145       (setq ccs (substring char-rep 0 (match-beginning 0))
146             cpos (substring char-rep (match-end 0)))
147       (setq ccs (www-uri-decode-feature-name ccs))
148       (cond
149        ((string-match "^0x" cpos)
150         (setq cpos
151               (string-to-number (substring cpos (match-end 0)) 16))
152         )
153        (t
154         (setq cpos (string-to-number cpos))
155         ))
156       (if (numberp cpos)
157           (decode-char ccs cpos))
158       )
159      ((= (length char-rep) 1)
160       (aref char-rep 0)
161       ))))
162
163 (defun www-uri-encode-char (char)
164   (let ((ccs-list '(=ucs
165                     =cns11643-1 =cns11643-2 =cns11643-3
166                     =cns11643-4 =cns11643-5 =cns11643-6 =cns11643-7
167                     =gb2312 =gb12345
168                     =jis-x0208 =jis-x0208@1990
169                     =jis-x0212
170                     =cbeta =jef-china3
171                     =jis-x0213-1@2000 =jis-x0213-1@2004
172                     =jis-x0208@1983 =jis-x0208@1978
173                     =zinbun-oracle
174                     =daikanwa
175                     =gt =gt-k
176                     =big5
177                     =big5-cdp
178                     =>>jis-x0208 =>>jis-x0213-1
179                     =>jis-x0208 =>jis-x0213-1))
180         ccs ret)
181     (while (and ccs-list
182                 (setq ccs (pop ccs-list))
183                 (not (setq ret (encode-char char ccs 'defined-only)))))
184     (cond (ret
185            (format "%s:0x%X"
186                    (www-uri-encode-feature-name ccs)
187                    ret))
188           ((setq ccs (car (split-char char)))
189            (format "%s:0x%X"
190                    (www-uri-encode-feature-name ccs)
191                    (encode-char char ccs))))))
192
193
194 ;;; @ Feature name presentation
195 ;;;
196
197 (defun www-format-feature-name-default (feature-name)
198   (mapconcat
199    #'capitalize
200    (split-string
201     (symbol-name feature-name)
202     "-")
203    " "))
204
205 (defun www-format-feature-name-as-rel-to (feature-name)
206   (concat "\u2192" (substring (symbol-name feature-name) 2)))
207
208 (defun www-format-feature-name-as-rel-from (feature-name)
209   (concat "\u2190" (substring (symbol-name feature-name) 2)))
210
211 (defun www-format-feature-name-as-CCS (feature-name)
212   (let* ((rest
213           (split-string
214            (symbol-name feature-name)
215            "-"))
216          (dest (upcase (pop rest))))
217     (cond
218      (rest
219       (while (cdr rest)
220         (setq dest (concat dest " " (upcase (pop rest)))))
221       (if (string-match "^[0-9]+$" (car rest))
222           (concat dest "-" (car rest))
223         (concat dest " " (upcase (car rest))))
224       )
225      (t dest))))
226
227 (defun www-format-feature-name (feature-name &optional lang)
228   (let (name)
229     (www-format-encode-string
230      (cond
231       ((or (and lang
232                 (char-feature-property
233                  feature-name
234                  (intern (format "name@%s" lang))))
235            (char-feature-property
236             feature-name 'name)))
237       ((find-charset feature-name)
238        (www-format-feature-name-as-CCS feature-name))
239       ((and (setq name (symbol-name feature-name))
240             (string-match "^\\(->\\)" name))
241        (www-format-feature-name-as-rel-to feature-name))
242       ((string-match "^\\(<-\\)" name)
243        (www-format-feature-name-as-rel-from feature-name))
244       (t
245        (www-format-feature-name-default feature-name))))))
246
247
248 ;;; @ Feature value presentation
249 ;;;
250
251 (defun www-format-value-as-kuten (value)
252   (format "%02d-%02d"
253           (- (lsh value -8) 32)
254           (- (logand value 255) 32)))
255
256 (defun www-format-value-as-char-list (value &optional without-tags)
257   (if (listp value)
258       (mapconcat
259        (if without-tags
260            (lambda (unit)
261              (www-format-encode-string
262               (format (if (characterp unit)
263                           "%c"
264                         "%s")
265                       unit)
266               'without-tags))
267          (lambda (unit)
268            (if (characterp unit)
269                (format "<a href=\"%s?char=%s\">%s</a>"
270                        chise-wiki-view-url
271                        (www-uri-encode-char unit)
272                        (www-format-encode-string (char-to-string unit)))
273              (www-format-encode-string (format "%s" unit)))))
274        value " ")
275     (www-format-encode-string (format "%s" value) without-tags)))
276
277 (defun www-format-value-as-ids (value &optional without-tags)
278   (if (listp value)
279       (mapconcat
280        (if without-tags
281            (lambda (unit)
282              (www-format-encode-string
283               (format (if (characterp unit)
284                           "%c"
285                         "%s")
286                       unit)
287               'without-tags))
288          (lambda (unit)
289            (if (characterp unit)
290                (format "<a href=\"%s?char=%s\">%s</a>"
291                        chise-wiki-view-url
292                        (www-uri-encode-char unit)
293                        (www-format-encode-string (char-to-string unit)))
294              (www-format-encode-string (format "%s" unit)))))
295        (ideographic-structure-to-ids value) " ")
296     (www-format-encode-string (format "%s" value) without-tags)))
297
298 (defun www-format-value-as-S-exp (value &optional without-tags)
299   (www-format-encode-string (format "%S" value) without-tags))
300
301 (defun www-format-value-as-HEX (value)
302   (if (integerp value)
303       (format "%X" value)
304     (www-format-value-as-S-exp value)))
305
306 (defun www-format-value-as-CCS-default (value)
307   (if (integerp value)
308       (format "0x%s (%d)"
309               (www-format-value-as-HEX value)
310               value)
311     (www-format-value-as-S-exp value)))
312
313 (defun www-format-value-as-CCS-94x94 (value)
314   (if (integerp value)
315       (format "0x%s [%s] (%d)"
316               (www-format-value-as-HEX value)
317               (www-format-value-as-kuten value)
318               value)
319     (www-format-value-as-S-exp value)))
320
321 (defun www-format-value (value &optional feature-name format without-tags)
322   ;; (cond
323   ;;  ((find-charset feature-name)
324   ;;   (cond
325   ;;    ((and (= (charset-chars feature-name) 94)
326   ;;          (= (charset-dimension feature-name) 2))
327   ;;     (www-format-value-as-CCS-94x94 value))
328   ;;    (t
329   ;;     (www-format-value-as-CCS-default value)))
330   ;;   )
331   ;;  (t
332   ;;   (www-format-value-as-S-exp value)))
333   (www-format-apply-value format nil value nil nil without-tags)
334   )
335
336
337 ;;; @ format evaluator
338 ;;;
339
340 (defun www-format-encode-string (string &optional without-tags)
341   (with-temp-buffer
342     (insert string)
343     (let (plane code)
344       (goto-char (point-min))
345       (while (search-forward "<" nil t)
346         (replace-match "&lt;" nil t))
347       (goto-char (point-min))
348       (while (search-forward ">" nil t)
349         (replace-match "&gt;" nil t))
350       (if without-tags
351           (encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
352         (let ((coded-charset-entity-reference-alist
353                (list*
354                 '(=cns11643-1           "C1-" 4 X)
355                 '(=cns11643-2           "C2-" 4 X)
356                 '(=cns11643-3           "C3-" 4 X)
357                 '(=cns11643-4           "C4-" 4 X)
358                 '(=cns11643-5           "C5-" 4 X)
359                 '(=cns11643-6           "C6-" 4 X)
360                 '(=cns11643-7           "C7-" 4 X)
361                 '(=gb2312               "G0-" 4 X)
362                 '(=gb12345              "G1-" 4 X)
363                 '(=jis-x0208@1990       "J90-" 4 X)
364                 '(=jis-x0212            "JSP-" 4 X)
365                 '(=cbeta                "CB" 5 d)
366                 '(=jef-china3           "JC3-" 4 X)
367                 '(=jis-x0208@1997       "J97-" 4 X)
368                 '(=jis-x0208@1978       "J78-" 4 X)
369                 '(=jis-x0208@1983       "J83-" 4 X)
370                 '(=zinbun-oracle        "ZOB-" 4 d)
371                 '(=daikanwa             "M-" 5 d)
372                 coded-charset-entity-reference-alist)))
373           (encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
374
375           (goto-char (point-min))
376           (while (re-search-forward "&CB\\([0-9]+\\);" nil t)
377             (setq code (string-to-int (match-string 1)))
378             (replace-match
379              (format "<img alt=\"CB%05d\" src=\"%s/cb-gaiji/%02d/CB%05d.gif\">"
380                      code
381                      chise-wiki-glyphs-url
382                      (/ code 1000) code)
383              t 'literal))
384
385           (goto-char (point-min))
386           (while (re-search-forward "&J\\(78\\|83\\|90\\|97\\|SP\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
387             (setq plane (match-string 1)
388                   code (string-to-int (match-string 2) 16))
389             (replace-match
390              (format "<img alt=\"J%s-%04X\" src=\"%s/JIS-%s/%02d-%02d.gif\">"
391                      plane code
392                      chise-wiki-glyphs-url
393                      plane
394                      (- (lsh code -8) 32)
395                      (- (logand code 255) 32))
396              t 'literal))
397
398           (goto-char (point-min))
399           (while (re-search-forward "&G\\([01]\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
400             (setq plane (string-to-int (match-string 1))
401                   code (string-to-int (match-string 2) 16))
402             (replace-match
403              (format "<img alt=\"GB%d-%04X\" src=\"%s/GB%d/%02d-%02d.gif\">"
404                      plane code
405                      chise-wiki-glyphs-url
406                      plane
407                      (- (lsh code -8) 32)
408                      (- (logand code 255) 32))
409              t 'literal))
410
411           (goto-char (point-min))
412           (while (re-search-forward "&C\\([1-7]\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
413             (setq plane (string-to-int (match-string 1))
414                   code (string-to-int (match-string 2) 16))
415             (replace-match
416              (format "<img alt=\"CNS%d-%04X\" src=\"%s/CNS%d/%04X.gif\">"
417                      plane code
418                      chise-wiki-glyphs-url
419                      plane code)
420              t 'literal))
421           ))
422       (goto-char (point-min))
423       (while (search-forward "&GT-" nil t)
424         (replace-match "&amp;GT-" t 'literal))
425
426       (buffer-string))))
427
428 (defun www-format-props-to-string (props &optional format)
429   (unless format
430     (setq format (plist-get props :format)))
431   (concat "%"
432           (plist-get props :flag)
433           (if (plist-get props :zero-padding)
434               "0")
435           (if (plist-get props :len)
436               (format "%d" (plist-get props :len)))
437           (cond
438            ((eq format 'decimal) "d")
439            ((eq format 'hex) "x")
440            ((eq format 'HEX) "X")
441            ((eq format 'S-exp) "S")
442            (t "s"))))      
443
444 (defun www-format-apply-value (format props value
445                                       &optional uri-char uri-feature
446                                       without-tags)
447   (let (ret)
448     (setq ret
449           (cond
450            ((memq format '(decimal hex HEX))
451             (if (integerp value)
452                 (format (www-format-props-to-string props format)
453                         value)
454               (www-format-encode-string
455                (format "%s" value)
456                without-tags))
457             )
458            ((eq format 'S-exp)
459             (www-format-encode-string
460              (format (www-format-props-to-string props format)
461                      value)
462              without-tags))
463            ((eq format 'ku-ten)
464             (www-format-value-as-kuten value))
465            ((eq format 'space-separated-char-list)
466             (www-format-value-as-char-list value without-tags))
467            ((eq format 'space-separated-ids)
468             (www-format-value-as-ids value without-tags))
469            (t
470             (setq format 'default)
471             (www-format-encode-string
472              (format (www-format-props-to-string props 'default)
473                      value)
474              without-tags))))
475     (if (or without-tags (eq (plist-get props :mode) 'peek))
476         ret
477       (format "%s <a href=\"%s?char=%s&feature=%s&format=%s\"
478 ><input type=\"submit\" value=\"edit\" /></a>"
479               ret
480               chise-wiki-edit-url
481               uri-char uri-feature format))))
482
483 (defun www-format-eval-feature-value (char
484                                       feature-name
485                                       &optional format lang uri-char value)
486   (unless value
487     (setq value (char-feature char feature-name)))
488   (unless format
489     (setq format (www-feature-value-format feature-name)))
490   (cond
491    ((symbolp format)
492     (www-format-apply-value
493      format nil value
494      uri-char (www-uri-encode-feature-name feature-name))
495     )
496    ((consp format)
497     (cond ((null (cdr format))
498            (setq format (car format))
499            (www-format-apply-value
500             (car format) (nth 1 format) value
501             uri-char (www-uri-encode-feature-name feature-name))
502            )
503           (t
504            (www-format-eval-list format char feature-name lang uri-char)
505            )))))
506
507 (defun www-format-eval-unit (exp char feature-name
508                                  &optional lang uri-char value)
509   (unless value
510     (setq value (char-feature char feature-name)))
511   (unless uri-char
512     (setq uri-char (www-uri-encode-char char)))
513   (cond
514    ((stringp exp) (www-format-encode-string exp))
515    ((null exp) "")
516    ((consp exp)
517     (cond
518      ((memq (car exp) '(value decimal hex HEX ku-ten S-exp default))
519       (if (eq (car exp) 'value)
520           (www-format-eval-feature-value char feature-name
521                                          (plist-get (nth 1 exp) :format)
522                                          lang uri-char value)
523         (www-format-apply-value
524          (car exp) (nth 1 exp) value
525          uri-char (www-uri-encode-feature-name feature-name)))
526       )
527      ((eq (car exp) 'name)
528       (format "<a href=\"%s?feature=%s&char=%s\">%s</a>"
529               chise-wiki-view-url
530               (www-uri-encode-feature-name feature-name)
531               uri-char
532               (www-format-feature-name feature-name lang))
533       )
534      ((eq (car exp) 'link)
535       (format "<a
536  href=\"%s\"
537 >%s</a
538 >"
539               (www-format-eval-list (plist-get (nth 1 exp) :ref)
540                                     char feature-name lang uri-char)
541               (www-format-eval-list (nthcdr 2 exp)
542                                     char feature-name lang uri-char)))
543      (t
544       (format "<%s
545 >%s</%s
546 >"
547               (car exp)
548               (www-format-eval-list (nthcdr 2 exp) char feature-name
549                                     lang uri-char)
550               (car exp)))))))
551
552 (defun www-format-eval-list (format-list char feature-name
553                                          &optional lang uri-char)
554   (if (consp format-list)
555       (mapconcat
556        (lambda (exp)
557          (www-format-eval-unit exp char feature-name lang uri-char))
558        format-list "")
559     (www-format-eval-unit format-list char feature-name lang uri-char)))
560
561
562 ;;; @ HTML generator
563 ;;;
564
565 (defun www-html-display-text (text)
566   (princ
567    (with-temp-buffer
568      (insert text)
569      (goto-char (point-min))
570      (while (search-forward "<" nil t)
571        (replace-match "&lt;" nil t))
572      (goto-char (point-min))
573      (while (search-forward ">" nil t)
574        (replace-match "&gt;" nil t))
575      (goto-char (point-min))
576      (while (re-search-forward "\\[\\[\\([^]|[]+\\)|\\([^][]+\\)\\]\\]" nil t)
577        (replace-match
578         (format "<a href=\"%s\">%s</a>"
579                 (match-string 2)
580                 (match-string 1))
581         nil t))
582      (encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
583      (goto-char (point-min))
584      (while (search-forward "&" nil t)
585        (replace-match "&amp;" nil t))
586      (buffer-string))))
587
588 (defun www-html-display-paragraph (text)
589   (princ "<p>")
590   (www-html-display-text text)
591   (princ "</p>\n"))
592
593 (provide 'cwiki-common)