(www-get-genre): New function.
[chise/est.git] / cwiki-common.el
1 ;; -*- coding: utf-8-mcs-er -*-
2 (require 'char-db-util)
3
4 (defvar chise-wiki-view-url "view.cgi")
5 (defvar chise-wiki-edit-url "edit.cgi")
6
7 (defvar chise-wiki-bitmap-glyphs-url
8   "http://chise.zinbun.kyoto-u.ac.jp/glyphs")
9
10 (defvar chise-wiki-glyph-cgi-url
11   "http://chise.zinbun.kyoto-u.ac.jp/chisewiki/glyph.cgi")
12
13 (defvar chise-wiki-displayed-features nil)
14
15 (defun decode-uri-string (string &optional coding-system)
16   (if (> (length string) 0)
17       (let ((i 0)
18             dest)
19         (setq string
20               (mapconcat (lambda (char)
21                            (if (eq char ?+)
22                                " "
23                              (char-to-string char)))
24                          string ""))
25         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
26           (setq dest (concat dest
27                              (substring string i (match-beginning 0))
28                              (char-to-string
29                               (int-char
30                                (string-to-int (match-string 1 string) 16))))
31                 i (match-end 0)))
32         (decode-coding-string
33          (concat dest (substring string i))
34          coding-system))))
35
36 (defun www-get-genre (object)
37   (if (characterp object)
38       'character
39     'default))
40
41 (defun www-feature-type (feature-name)
42   (or (char-feature-property feature-name 'type)
43       (let ((str (symbol-name feature-name)))
44         (cond
45          ((string-match "\\*note\\(@[^*]+\\)?$" str)
46           'stext)
47          ((string-match "\\*sources\\(@[^*]+\\)?$" str)
48           'domain-list)
49          ((string-match "\\*" str)
50           nil)
51          ((string-match "^\\(->\\|<-\\)" str)
52           'relation)
53          ((string-match "^ideographic-structure\\(@\\|$\\)" str)
54           'structure)
55          ))))
56
57 (defun www-feature-format (feature-name)
58   (or (char-feature-property feature-name 'format)
59       (let (fn parent ret)
60         (setq fn feature-name)
61         (while (and (setq parent (char-feature-name-parent fn))
62                     (null (setq ret
63                                 (char-feature-property
64                                  parent 'format))))
65           (setq fn parent))
66         ret)
67       '((name) " : " (value))))
68
69 (defun www-feature-value-format (feature-name)
70   (or (char-feature-property feature-name 'value-format)
71       (let (fn parent ret)
72         (setq fn feature-name)
73         (while (and (setq parent (char-feature-name-parent fn))
74                     (null (setq ret
75                                 (char-feature-property
76                                  parent 'value-format))))
77           (setq fn parent))
78         ret)
79       (let ((type (www-feature-type feature-name)))
80         (cond ((eq type 'relation)
81                'space-separated-char-list)
82               ((eq type 'structure)
83                'space-separated-ids)
84               ((eq type 'stext)
85                'wiki-text)
86               ))
87       (if (find-charset feature-name)
88           (if (and (= (charset-dimension feature-name) 2)
89                    (= (charset-chars feature-name) 94))
90               '("0x" (HEX)
91                 " (" (decimal) ") <" (ku-ten) "> " (prev-char) (next-char))
92             '("0x" (HEX) " (" (decimal) ") " (prev-char) (next-char))))))
93
94 (defun char-feature-name-at-domain (feature-name domain)
95   (if domain
96       (let ((name (symbol-name feature-name)))
97         (cond
98          ((string-match "@[^*]+$" name)
99           (intern (format "%s/%s" name domain))
100           )
101          (t
102           (intern (format "%s@%s" name domain))
103           )))
104     feature-name))
105
106 (defun char-feature-name-parent (feature-name)
107   (let ((name (symbol-name feature-name)))
108     (if (string-match "@[^@/*]+\\(/[^@/*]+\\)*$" name)
109         (intern (substring name 0 (car (last (match-data) 2)))))))
110
111 (defun char-feature-name-domain (feature-name)
112   (let ((name (symbol-name feature-name)))
113     (if (string-match "@[^@/*]+\\(/[^@/*]+\\)*$" name)
114         (intern (substring name (1+ (match-beginning 0)))))))
115
116 (defun char-feature-name-sans-versions (feature)
117   (let ((feature-name (symbol-name feature)))
118     (if (string-match "[@/]\\$rev=latest$" feature-name)
119         (intern (substring feature-name 0 (match-beginning 0)))
120       feature)))
121
122 (defun www-get-feature-value (object feature)
123   (let ((latest-feature (char-feature-name-at-domain feature '$rev=latest)))
124     (mount-char-attribute-table latest-feature)
125     (or (char-feature object latest-feature)
126         (char-feature object feature))))
127
128 (defun get-previous-code-point (ccs code)
129   (let ((chars (charset-chars ccs))
130         (dim (charset-dimension ccs))
131         (i 0)
132         mask byte-min byte-max
133         bytes dest)
134     (cond
135      ((= chars 94)
136       (setq mask #x7F
137             byte-min 33
138             byte-max 126)
139       )
140      ((= chars 96)
141       (setq mask #x7F
142             byte-min 32
143             byte-max 127)
144       )
145      ((= chars 128)
146       (setq mask #x7F
147             byte-min 0
148             byte-max #xFF)
149       )
150      (t ; (= chars 256)
151       (setq mask #xFF
152             byte-min 0
153             byte-max #xFF)
154       ))
155     (setq bytes (make-vector dim 0))
156     (while (< i dim)
157       (aset bytes i (logand (lsh code (* i -8)) mask))
158       (setq i (1+ i)))
159     (setq i 0)
160     (while (and (< i dim)
161                 (progn
162                   (aset bytes i (1- (aref bytes i)))
163                   (< (aref bytes i) byte-min)))
164       (aset bytes i byte-max)
165       (setq i (1+ i)))
166     (when (< i dim)
167       (setq dest (aref bytes 0)
168             i 1)
169       (while (< i dim)
170         (setq dest (logior dest (lsh (aref bytes i) (* i 8)))
171               i (1+ i)))
172       dest)))
173
174 (defun get-next-code-point (ccs code)
175   (let ((chars (charset-chars ccs))
176         (dim (charset-dimension ccs))
177         (i 0)
178         mask byte-min byte-max
179         bytes dest)
180     (cond
181      ((= chars 94)
182       (setq mask #x7F
183             byte-min 33
184             byte-max 126)
185       )
186      ((= chars 96)
187       (setq mask #x7F
188             byte-min 32
189             byte-max 127)
190       )
191      ((= chars 128)
192       (setq mask #x7F
193             byte-min 0
194             byte-max #xFF)
195       )
196      (t ; (= chars 256)
197       (setq mask #xFF
198             byte-min 0
199             byte-max #xFF)
200       ))
201     (setq bytes (make-vector dim 0))
202     (while (< i dim)
203       (aset bytes i (logand (lsh code (* i -8)) mask))
204       (setq i (1+ i)))
205     (setq i 0)
206     (while (and (< i dim)
207                 (progn
208                   (aset bytes i (1+ (aref bytes i)))
209                   (> (aref bytes i) byte-max)))
210       (aset bytes i byte-min)
211       (setq i (1+ i)))
212     (when (< i dim)
213       (setq dest (aref bytes 0)
214             i 1)
215       (while (< i dim)
216         (setq dest (logior dest (lsh (aref bytes i) (* i 8)))
217               i (1+ i)))
218       dest)))
219
220 (defun find-previous-defined-code-point (ccs code)
221   (let ((i (get-previous-code-point ccs code))
222         char)
223     (cond
224      ((eq ccs '=jis-x0208)
225       (setq ccs '=jis-x0208@1990))
226      ((eq ccs '=jis-x0213-1)
227       (setq ccs '=jis-x0213-1@2004)))
228     (while (and i
229                 (>= i 0)
230                 (null (setq char (decode-char ccs i
231                                               (unless (eq ccs '=ucs)
232                                                 'defined-only)))))
233       (setq i (get-previous-code-point ccs i)))
234     char))
235
236 (defun find-next-defined-code-point (ccs code)
237   (let ((i (get-next-code-point ccs code))
238         max char)
239     (setq max (+ code 1000))
240     (cond
241      ((eq ccs '=jis-x0208)
242       (setq ccs '=jis-x0208@1990))
243      ((eq ccs '=jis-x0213-1)
244       (setq ccs '=jis-x0213-1@2004)))
245     (while (and i
246                 (<= i max)
247                 (null (setq char (decode-char ccs i
248                                               (unless (eq ccs '=ucs)
249                                                 'defined-only)))))
250       (setq i (get-next-code-point ccs i)))
251     char))
252
253
254 ;;; @ URI representation
255 ;;;
256
257 (defun www-uri-decode-feature-name (uri-feature)
258   (let (feature)
259     (cond
260      ((string-match "^from\\." uri-feature)
261       (intern (format "<-%s" (substring uri-feature (match-end 0))))
262       )
263      ((string-match "^to\\." uri-feature)
264       (intern (format "->%s" (substring uri-feature (match-end 0))))
265       )
266      ((string-match "^rep\\." uri-feature)
267       (intern (format "=%s" (substring uri-feature (match-end 0))))
268       )
269      ((string-match "^g\\." uri-feature)
270       (intern (format "=>>%s" (substring uri-feature (match-end 0))))
271       )
272      ((string-match "^gi\\." uri-feature)
273       (intern (format "=>>>%s" (substring uri-feature (match-end 0))))
274       )
275      ((string-match "^gi\\([0-9]+\\)\\." uri-feature)
276       (intern (format "=>>%s%s"
277                       (make-string (string-to-int
278                                     (match-string 1 uri-feature))
279                                    ?>)
280                       (substring uri-feature (match-end 0))))
281       )
282      ((string-match "^a\\." uri-feature)
283       (intern (format "=>%s" (substring uri-feature (match-end 0))))
284       )
285      ((string-match "^a\\([0-9]+\\)\\." uri-feature)
286       (intern (format "%s>%s"
287                       (make-string (string-to-int
288                                     (match-string 1 uri-feature))
289                                    ?=)
290                       (substring uri-feature (match-end 0))))
291       )
292      ((and (setq feature (intern (format "=>%s" uri-feature)))
293            (find-charset feature))
294       feature)
295      ((and (setq feature (intern (format "=>>%s" uri-feature)))
296            (find-charset feature))
297       feature)
298      ((and (setq feature (intern (format "=>>>%s" uri-feature)))
299            (find-charset feature))
300       feature)
301      ((and (setq feature (intern (format "=%s" uri-feature)))
302            (find-charset feature))
303       feature)
304      (t (intern uri-feature)))))
305
306 (defun www-uri-encode-feature-name (feature-name)
307   (setq feature-name (symbol-name feature-name))
308   (cond
309    ((string-match "^=\\([^=>]+\\)" feature-name)
310     (concat "rep." (substring feature-name (match-beginning 1)))
311     )
312    ((string-match "^=>>\\([^=>]+\\)" feature-name)
313     (concat "g." (substring feature-name (match-beginning 1)))
314     )
315    ((string-match "^=>>>\\([^=>]+\\)" feature-name)
316     (concat "gi." (substring feature-name (match-beginning 1)))
317     )
318    ((string-match "^=>>\\(>+\\)" feature-name)
319     (format "gi%d.%s"
320             (length (match-string 1 feature-name))
321             (substring feature-name (match-end 1)))
322     )
323    ((string-match "^=>\\([^=>]+\\)" feature-name)
324     (concat "a." (substring feature-name (match-beginning 1)))
325     )
326    ((string-match "^\\(=+\\)>" feature-name)
327     (format "a%d.%s"
328             (length (match-string 1 feature-name))
329             (substring feature-name (match-end 0)))
330     )
331    ((string-match "^->" feature-name)
332     (concat "to." (substring feature-name (match-end 0)))
333     )
334    ((string-match "^<-" feature-name)
335     (concat "from." (substring feature-name (match-end 0)))
336     )
337    (t feature-name)))
338
339 (defun www-uri-make-feature-name-url (uri-feature-name uri-char)
340   (format "%s?feature=%s&char=%s"
341           chise-wiki-view-url uri-feature-name uri-char))
342
343 (defun www-uri-decode-object (genre char-rep)
344   (let (ccs cpos)
345     (cond
346      ((string-match "\\(%3A\\|:\\)" char-rep)
347       (setq ccs (substring char-rep 0 (match-beginning 0))
348             cpos (substring char-rep (match-end 0)))
349       (setq ccs (www-uri-decode-feature-name ccs))
350       (cond
351        ((string-match "^0x" cpos)
352         (setq cpos
353               (string-to-number (substring cpos (match-end 0)) 16))
354         )
355        (t
356         (setq cpos (car (read-from-string cpos)))
357         ))
358       (if (and (eq genre 'character)
359                (numberp cpos))
360           (decode-char ccs cpos)
361         (concord-decode-object ccs cpos genre))
362       )
363      (t
364       (setq char-rep (decode-uri-string char-rep 'utf-8-mcs-er))
365       (if (eq genre 'character)
366           (when (= (length char-rep) 1)
367             (aref char-rep 0))
368         (concord-decode-object '=id char-rep genre))))))
369
370 (defun www-uri-encode-char (char)
371   (if (encode-char char '=ucs)
372       (mapconcat
373        (lambda (byte)
374          (format "%%%02X" byte))
375        (encode-coding-string (char-to-string char) 'utf-8-mcs-er)
376        "")
377     (let ((ccs-list '(; =ucs
378                       =cns11643-1 =cns11643-2 =cns11643-3
379                       =cns11643-4 =cns11643-5 =cns11643-6 =cns11643-7
380                       =gb2312 =gb12345
381                       =jis-x0208 =jis-x0208@1990
382                       =jis-x0212
383                       =cbeta =jef-china3
384                       =jis-x0213-1@2000 =jis-x0213-1@2004
385                       =jis-x0208@1983 =jis-x0208@1978
386                       =zinbun-oracle =>zinbun-oracle
387                       =daikanwa
388                       =gt =gt-k
389                       =>>jis-x0208 =>>jis-x0213-1
390                       =>jis-x0208 =>jis-x0213-1
391                       =>>gt
392                       =ruimoku-v6
393                       =big5
394                       =big5-cdp))
395           ccs ret)
396       (while (and ccs-list
397                   (setq ccs (pop ccs-list))
398                   (not (setq ret (encode-char char ccs 'defined-only)))))
399       (cond (ret
400              (format "%s:0x%X"
401                      (www-uri-encode-feature-name ccs)
402                      ret))
403             ((and (setq ccs (car (split-char char)))
404                   (setq ret (encode-char char ccs)))
405              (format "%s:0x%X"
406                      (www-uri-encode-feature-name ccs)
407                      ret))
408             (t
409              (format "system-char-id:0x%X"
410                      (encode-char char 'system-char-id))
411              )))))
412
413
414 ;;; @ Feature name presentation
415 ;;;
416
417 (defun www-format-feature-name-default (feature-name)
418   (mapconcat
419    #'capitalize
420    (split-string
421     (symbol-name feature-name)
422     "-")
423    " "))
424
425 (defun www-format-feature-name-as-metadata (feature-name &optional lang)
426   (let ((str (symbol-name feature-name))
427         base meta)
428     (cond
429      ((string-match "\\*[^*]+$" str)
430       (setq base (substring str 0 (match-beginning 0))
431             meta (substring str (match-beginning 0)))
432       (concat (www-format-feature-name* (intern base) lang)
433               meta))
434      (t
435       (www-format-feature-name-default feature-name)
436       ))))
437
438 (defun www-format-feature-name-as-rel-to (feature-name)
439   (concat "\u2192" (substring (symbol-name feature-name) 2)))
440
441 (defun www-format-feature-name-as-rel-from (feature-name)
442   (concat "\u2190" (substring (symbol-name feature-name) 2)))
443
444 (defun www-format-feature-name-as-CCS (feature-name)
445   (let* ((rest
446           (split-string
447            (symbol-name feature-name)
448            "-"))
449          (dest (upcase (pop rest))))
450     (when (string-match "^=+>*" dest)
451       (setq dest (concat (substring dest 0 (match-end 0))
452                          " "
453                          (substring dest (match-end 0)))))
454     (cond
455      (rest
456       (while (cdr rest)
457         (setq dest (concat dest " " (upcase (pop rest)))))
458       (if (string-match "^[0-9]+$" (car rest))
459           (concat dest "-" (car rest))
460         (concat dest " " (upcase (car rest))))
461       )
462      (t dest))))
463
464 (defun www-format-feature-name* (feature-name &optional lang)
465   (let (name fn parent ret)
466     (cond
467      ((or (and lang
468                (char-feature-property
469                 feature-name
470                 (intern (format "name@%s" lang))))
471           (char-feature-property
472            feature-name 'name)))
473      ((and (setq name (symbol-name feature-name))
474            (string-match "\\*" name))
475       (www-format-feature-name-as-metadata feature-name lang))
476      (t
477       (setq fn feature-name)
478       (while (and (setq parent (char-feature-name-parent fn))
479                   (null (setq ret
480                               (or (and lang
481                                        (char-feature-property
482                                         parent
483                                         (intern (format "name@%s" lang))))
484                                   (char-feature-property
485                                    parent 'name)))))
486         (setq fn parent))
487       (cond
488        (ret
489         (concat ret (substring (symbol-name feature-name)
490                                (length (symbol-name parent)))))
491        ((find-charset feature-name)
492         (www-format-feature-name-as-CCS feature-name))
493        ((string-match "^\\(->\\)" name)
494         (www-format-feature-name-as-rel-to feature-name))
495        ((string-match "^\\(<-\\)" name)
496         (www-format-feature-name-as-rel-from feature-name))
497        (t
498         (www-format-feature-name-default feature-name)
499         ))
500       ))))
501
502 (defun www-format-feature-name (feature-name &optional lang)
503   (www-format-encode-string
504    (www-format-feature-name* feature-name lang)))
505
506
507 ;;; @ Feature value presentation
508 ;;;
509
510 (defun www-format-value-as-kuten (value)
511   (format "%02d-%02d"
512           (- (lsh value -8) 32)
513           (- (logand value 255) 32)))
514
515 (defun www-format-value-default (value &optional without-tags)
516   (if (listp value)
517       (mapconcat
518        (lambda (unit)
519          (www-format-encode-string
520           (format "%S" unit)
521           without-tags))
522        value " ")
523     (www-format-encode-string (format "%S" value) without-tags)))
524   
525 (defun www-format-value-as-char-list (value &optional without-tags)
526   (if (listp value)
527       (mapconcat
528        (if without-tags
529            (lambda (unit)
530              (www-format-encode-string
531               (format (if (characterp unit)
532                           "%c"
533                         "%s")
534                       unit)
535               'without-tags))
536          (lambda (unit)
537            (if (characterp unit)
538                (format "<a href=\"%s?char=%s\">%s</a>"
539                        chise-wiki-view-url
540                        (www-uri-encode-char unit)
541                        (www-format-encode-string (char-to-string unit)))
542              (www-format-encode-string (format "%s" unit)))))
543        value " ")
544     (www-format-encode-string (format "%s" value) without-tags)))
545
546 (defun www-format-value-as-domain-list (value &optional without-tags)
547   (let (name source0 source num dest rest unit start end ddest)
548     (if (listp value)
549         (if without-tags
550             (mapconcat
551              (lambda (unit)
552                (format "%s" unit))
553              value " ")
554           (setq rest value)
555           (while rest
556             (setq unit (pop rest))
557             (if (symbolp unit)
558                 (setq name (symbol-name unit)))
559             (setq dest
560                   (concat
561                    dest
562                    (cond
563                     ((string-match "^zob1968=" name)
564                      (setq source (intern (substring name 0 (match-end 0)))
565                            num (substring name (match-end 0)))
566                      (if (string-match "^\\([0-9]+\\)-\\([0-9]+\\)$" num)
567                          (setq start (string-to-number
568                                       (match-string 1 num))
569                                end (string-to-number
570                                     (match-string 2 num)))
571                        (setq start (string-to-number num)
572                              end start))
573                      (setq ddest
574                            (if (eq source source0)
575                                (format
576                                 ", <a href=\"http://chise.zinbun.kyoto-u.ac.jp/koukotsu/rubbings/%04d\">%04d</a>"
577                                 start start)
578                              (setq source0 source)
579                              (format
580                               " <a href=\"http://chise.zinbun.kyoto-u.ac.jp/koukotsu/\">%s</a>=<a href=\"http://chise.zinbun.kyoto-u.ac.jp/koukotsu/rubbings/%04d\">%04d</a>"
581                               (www-format-encode-string "\u4EAC大人\u6587研甲\u9AA8")
582                               start start)))
583                      (setq start (1+ start))
584                      (while (<= start end)
585                        (setq ddest
586                              (concat
587                               ddest
588                               (format
589                                ", <a href=\"http://chise.zinbun.kyoto-u.ac.jp/koukotsu/rubbings/%04d\">%04d</a>"
590                                start start)))
591                        (setq start (1+ start)))
592                      ddest)
593                     (t
594                      (setq source unit)
595                      (if (eq source source0)
596                          ""
597                        (setq source0 source)
598                        (concat " " name))
599                      )))))
600           dest)
601       (www-format-encode-string (format "%s" value) without-tags))))
602
603 (defun www-format-value-as-ids (value &optional without-tags)
604   (if (listp value)
605       (mapconcat
606        (if without-tags
607            (lambda (unit)
608              (www-format-encode-string
609               (format (if (characterp unit)
610                           "%c"
611                         "%s")
612                       unit)
613               'without-tags))
614          (lambda (unit)
615            (if (characterp unit)
616                (format "<a href=\"%s?char=%s\">%s</a>"
617                        chise-wiki-view-url
618                        (www-uri-encode-char unit)
619                        (www-format-encode-string (char-to-string unit)))
620              (www-format-encode-string (format "%s" unit)))))
621        (ideographic-structure-to-ids value) " ")
622     (www-format-encode-string (format "%s" value) without-tags)))
623
624 (defun www-format-value-as-S-exp (value &optional without-tags)
625   (www-format-encode-string (format "%S" value) without-tags))
626
627 (defun www-format-value-as-HEX (value)
628   (if (integerp value)
629       (format "%X" value)
630     (www-format-value-as-S-exp value)))
631
632 (defun www-format-value-as-CCS-default (value)
633   (if (integerp value)
634       (format "0x%s (%d)"
635               (www-format-value-as-HEX value)
636               value)
637     (www-format-value-as-S-exp value)))
638
639 (defun www-format-value-as-CCS-94x94 (value)
640   (if (integerp value)
641       (format "0x%s [%s] (%d)"
642               (www-format-value-as-HEX value)
643               (www-format-value-as-kuten value)
644               value)
645     (www-format-value-as-S-exp value)))
646
647 (defun www-format-value-as-kangxi-radical (value)
648   (if (and (integerp value)
649            (<= 0 value)
650            (<= value 214))
651       (www-format-encode-string
652        (format "%c" (ideographic-radical value)))
653     (www-format-value-as-S-exp value)))
654
655 (defun www-format-value (object feature-name
656                                 &optional value format
657                                 without-tags without-edit)
658   (unless value
659     (setq value (www-get-feature-value object feature-name)))
660   (www-format-apply-value object feature-name
661                           format nil value nil nil
662                           without-tags without-edit)
663   )
664
665
666 ;;; @ format evaluator
667 ;;;
668
669 (defun www-format-encode-string (string &optional without-tags)
670   (with-temp-buffer
671     (insert string)
672     (let (plane code start end char variants ret rret)
673       (goto-char (point-min))
674       (while (search-forward "<" nil t)
675         (replace-match "&lt;" nil t))
676       (goto-char (point-min))
677       (while (search-forward ">" nil t)
678         (replace-match "&gt;" nil t))
679       (if without-tags
680           (encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
681         (let ((coded-charset-entity-reference-alist
682                (list*
683                 '(=gt                   "GT-" 5 d)
684                 '(=cns11643-1           "C1-" 4 X)
685                 '(=cns11643-2           "C2-" 4 X)
686                 '(=cns11643-3           "C3-" 4 X)
687                 '(=cns11643-4           "C4-" 4 X)
688                 '(=cns11643-5           "C5-" 4 X)
689                 '(=cns11643-6           "C6-" 4 X)
690                 '(=cns11643-7           "C7-" 4 X)
691                 '(=gb2312               "G0-" 4 X)
692                 '(=gb12345              "G1-" 4 X)
693                 '(=jis-x0208@1990       "J90-" 4 X)
694                 '(=jis-x0212            "JSP-" 4 X)
695                 '(=cbeta                "CB" 5 d)
696                 '(=jis-x0208@1997       "J97-" 4 X)
697                 '(=jis-x0208@1978       "J78-" 4 X)
698                 '(=jis-x0208@1983       "J83-" 4 X)
699                 '(=ruimoku-v6           "RUI6-" 4 X)
700                 '(=zinbun-oracle        "ZOB-" 4 d)
701                 '(=jef-china3           "JC3-" 4 X)
702                 '(=daikanwa             "M-" 5 d)
703                 coded-charset-entity-reference-alist)))
704           (encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
705
706           (goto-char (point-min))
707           (while (re-search-forward "&CB\\([0-9]+\\);" nil t)
708             (setq code (string-to-int (match-string 1)))
709             (replace-match
710              (format "<img alt=\"CB%05d\" src=\"%s/cb-gaiji/%02d/CB%05d.gif\">"
711                      code
712                      chise-wiki-bitmap-glyphs-url
713                      (/ code 1000) code)
714              t 'literal))
715
716           (goto-char (point-min))
717           (while (re-search-forward "&J\\(78\\|83\\|90\\|97\\|SP\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
718             (setq plane (match-string 1)
719                   code (string-to-int (match-string 2) 16))
720             (replace-match
721              (format "<img alt=\"J%s-%04X\" src=\"%s/JIS-%s/%02d-%02d.gif\">"
722                      plane code
723                      chise-wiki-bitmap-glyphs-url
724                      plane
725                      (- (lsh code -8) 32)
726                      (- (logand code 255) 32))
727              t 'literal))
728
729           (goto-char (point-min))
730           (while (re-search-forward "&G\\([01]\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
731             (setq plane (string-to-int (match-string 1))
732                   code (string-to-int (match-string 2) 16))
733             (replace-match
734              (format "<img alt=\"GB%d-%04X\" src=\"%s/GB%d/%02d-%02d.gif\">"
735                      plane code
736                      chise-wiki-bitmap-glyphs-url
737                      plane
738                      (- (lsh code -8) 32)
739                      (- (logand code 255) 32))
740              t 'literal))
741
742           (goto-char (point-min))
743           (while (re-search-forward "&C\\([1-7]\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
744             (setq plane (string-to-int (match-string 1))
745                   code (string-to-int (match-string 2) 16))
746             (replace-match
747              (format "<img alt=\"CNS%d-%04X\" src=\"%s/CNS%d/%04X.gif\">"
748                      plane code
749                      chise-wiki-bitmap-glyphs-url
750                      plane code)
751              t 'literal))
752
753           (goto-char (point-min))
754           (while (re-search-forward "&JC3-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
755             (setq code (string-to-int (match-string 1) 16))
756             (replace-match
757              (format "<img alt=\"JC3-%04X\" src=\"http://kanji.zinbun.kyoto-u.ac.jp/db/CHINA3/Gaiji/%04x.gif\">"
758                      code code)
759              t 'literal))
760
761           (goto-char (point-min))
762           (while (re-search-forward "&\\(A-\\)?ZOB-\\([0-9]+\\);" nil t)
763             (setq code (string-to-int (match-string 2)))
764             (replace-match
765              (format "<img alt=\"ZOB-%04d\" src=\"%s/ZOB-1968/%04d.png\">"
766                      code
767                      chise-wiki-bitmap-glyphs-url
768                      code)
769              t 'literal))
770
771           (goto-char (point-min))
772           (while (re-search-forward "&\\(G-\\)?GT-\\([0-9]+\\);" nil t)
773             (setq code (string-to-int (match-string 2)))
774             (replace-match
775              (format "<img alt=\"GT-%05d\" src=\"%s?char=GT-%05d\">"
776                      code
777                      chise-wiki-glyph-cgi-url
778                      code)
779              t 'literal))
780
781           (goto-char (point-min))
782           (while (re-search-forward "&\\(G-\\)?GT-K\\([0-9]+\\);" nil t)
783             (setq code (string-to-int (match-string 2)))
784             (replace-match
785              (format "<img alt=\"GT-K%05d\" src=\"%s?char=GT-K%05d\">"
786                      code
787                      chise-wiki-glyph-cgi-url
788                      code)
789              t 'literal))
790
791           (goto-char (point-min))
792           (while (re-search-forward "&B-\\([0-9A-F]+\\);" nil t)
793             (setq code (string-to-int (match-string 1) 16))
794             (replace-match
795              (format "<img alt=\"B-%04X\" src=\"%s?char=B-%04X\">"
796                      code
797                      chise-wiki-glyph-cgi-url
798                      code)
799              t 'literal))
800
801           (goto-char (point-min))
802           (while (re-search-forward "&CDP-\\([0-9A-F]+\\);" nil t)
803             (setq code (string-to-int (match-string 1) 16))
804             (replace-match
805              (format "<img alt=\"CDP-%04X\" src=\"%s?char=CDP-%04X\">"
806                      code
807                      chise-wiki-glyph-cgi-url
808                      code)
809              t 'literal))
810
811           (goto-char (point-min))
812           (while (re-search-forward "&RUI6-\\([0-9A-F]+\\);" nil t)
813             (setq code (string-to-int (match-string 1) 16))
814             (replace-match
815              (format "<img alt=\"RUI6-%04X\" src=\"%s?char=RUI6-%04X\">"
816                      code
817                      chise-wiki-glyph-cgi-url
818                      code)
819              t 'literal))
820
821           (goto-char (point-min))
822           (while (re-search-forward "&\\(UU\\+\\|U-\\)\\([0-9A-F]+\\);" nil t)
823             (setq code (string-to-int (match-string 2) 16))
824             (replace-match
825              (format "<img alt=\"UU+%04X\" src=\"http://www.unicode.org/cgi-bin/refglyph?24-%04X\">"
826                      code
827                      code)
828              t 'literal))
829
830           (goto-char (point-min))
831           (while (re-search-forward "&MCS-\\([0-9A-F]+\\);" nil t)
832             (setq code (string-to-int (match-string 1) 16))
833             (setq start (match-beginning 0)
834                   end (match-end 0))
835             (setq char (decode-char 'system-char-id code))
836             (cond
837              ((and (setq variants
838                          (or (www-get-feature-value char '->subsumptive)
839                              (www-get-feature-value char '->denotational)))
840                    (progn
841                      (while (and variants
842                                  (setq ret (www-format-encode-string
843                                             (char-to-string (car variants))))
844                                  (string-match "&MCS-\\([0-9A-F]+\\);" ret))
845                        (setq variants (cdr variants)))
846                      ret))
847               (unless (string-match "&MCS-\\([0-9A-F]+\\);" ret)
848                 (goto-char start)
849                 (delete-region start end)
850                 (insert ret))
851               )
852              ((setq ret (or (www-get-feature-value char 'ideographic-combination)
853                             (www-get-feature-value char 'ideographic-structure)))
854               (setq ret
855                     (mapconcat
856                      (lambda (ch)
857                        (if (listp ch)
858                            (if (characterp (setq rret (find-char ch)))
859                                (setq ch rret)))
860                        (if (characterp ch)
861                            (www-format-encode-string
862                             (char-to-string ch) without-tags)
863                          (www-format-encode-string
864                           (format "%S" ch) without-tags)))
865                      ret ""))
866               (when ret
867                 (goto-char start)
868                 (delete-region start end)
869                 (insert ret))
870               )))
871           ))
872       ;; (goto-char (point-min))
873       ;; (while (search-forward "&GT-" nil t)
874       ;;   (replace-match "&amp;GT-" t 'literal))
875       (buffer-string))))
876
877 (defun www-format-props-to-string (props &optional format)
878   (unless format
879     (setq format (plist-get props :format)))
880   (concat "%"
881           (plist-get props :flag)
882           ;; (if (plist-get props :zero-padding)
883           ;;     "0")
884           (if (plist-get props :len)
885               (format "0%d"
886                       (let ((ret (plist-get props :len)))
887                         (if (stringp ret)
888                             (string-to-int ret)
889                           ret))))
890           (cond
891            ((eq format 'decimal) "d")
892            ((eq format 'hex) "x")
893            ((eq format 'HEX) "X")
894            ((eq format 'S-exp) "S")
895            (t "s"))))      
896
897 (defun www-format-apply-value (object feature-name
898                                       format props value
899                                       &optional uri-char uri-feature
900                                       without-tags without-edit)
901   (let (ret)
902     (setq ret
903           (cond
904            ((memq format '(decimal hex HEX))
905             (if (integerp value)
906                 (format (www-format-props-to-string props format)
907                         value)
908               (www-format-encode-string
909                (format "%s" value)
910                without-tags))
911             )
912            ((eq format 'wiki-text)
913             (if without-tags
914                 (www-xml-format-list value)
915               (www-format-eval-list value object feature-name nil uri-char
916                                     without-tags without-edit))
917             )
918            ((eq format 'S-exp)
919             (www-format-encode-string
920              (format (www-format-props-to-string props format)
921                      value)
922              without-tags))
923            ((eq format 'ku-ten)
924             (www-format-value-as-kuten value))
925            ((eq format 'kangxi-radical)
926             (www-format-value-as-kangxi-radical value))
927            ((eq format 'space-separated-char-list)
928             (www-format-value-as-char-list value without-tags))
929            ((eq format 'space-separated-ids)
930             (www-format-value-as-ids value without-tags))
931            ((eq format 'space-separated-domain-list)
932             (www-format-value-as-domain-list value without-tags))
933            ((eq format 'string)
934             (www-format-encode-string (format "%s" value) without-tags)
935             )
936            (t
937             (www-format-value-default value without-tags)
938             ))
939           )
940     (if (or without-tags
941             without-edit
942             (eq (plist-get props :mode) 'peek))
943         ret
944       (format "%s <a href=\"%s?char=%s&feature=%s&format=%s\"
945 ><input type=\"submit\" value=\"edit\" /></a>"
946               ret
947               chise-wiki-edit-url
948               uri-char uri-feature format))))
949
950 (defun www-format-eval-feature-value (char
951                                       feature-name
952                                       &optional format lang uri-char value
953                                       without-tags without-edit)
954   (unless value
955     (setq value (www-get-feature-value char feature-name)))
956   (unless format
957     (setq format (www-feature-value-format feature-name)))
958   (cond
959    ((symbolp format)
960     (www-format-apply-value
961      char feature-name
962      format nil value
963      uri-char (www-uri-encode-feature-name feature-name)
964      without-tags without-edit)
965     )
966    ((consp format)
967     (cond ((null (cdr format))
968            (setq format (car format))
969            (www-format-apply-value
970             char feature-name
971             (car format) (nth 1 format) value
972             uri-char (www-uri-encode-feature-name feature-name)
973             without-tags without-edit)
974            )
975           (t
976            (www-format-eval-list format char feature-name lang uri-char
977                                  without-tags without-edit)
978            )))))
979
980 (defun www-format-eval-unit (exp char feature-name
981                                  &optional lang uri-char value
982                                  without-tags without-edit)
983   (unless value
984     (setq value (www-get-feature-value char feature-name)))
985   (unless uri-char
986     (setq uri-char (www-uri-encode-char char)))
987   (cond
988    ((stringp exp) (www-format-encode-string exp))
989    ((null exp) "")
990    ((consp exp)
991     (cond
992      ((memq (car exp) '(value decimal hex HEX ku-ten kangxi-radical
993                               S-exp string default))
994       (let ((fn (plist-get (nth 1 exp) :feature))
995             domain domain-fn ret)
996         (when fn
997           (when (stringp fn)
998             (setq fn (intern fn)))
999           (setq domain (char-feature-name-domain feature-name))
1000           (setq domain-fn (char-feature-name-at-domain fn domain))
1001           (if (setq ret (www-get-feature-value char domain-fn))
1002               (setq feature-name domain-fn
1003                     value ret)
1004             (setq feature-name fn
1005                   value (www-get-feature-value char fn)))
1006           (push feature-name chise-wiki-displayed-features)
1007           ))
1008       (if (eq (car exp) 'value)
1009           (www-format-eval-feature-value char feature-name
1010                                          (plist-get (nth 1 exp) :format)
1011                                          lang uri-char value
1012                                          without-tags without-edit)
1013         (www-format-apply-value
1014          char feature-name
1015          (car exp) (nth 1 exp) value
1016          uri-char (www-uri-encode-feature-name feature-name)
1017          without-tags without-edit))
1018       )
1019      ((eq (car exp) 'name)
1020       (let ((fn (plist-get (nth 1 exp) :feature))
1021             domain domain-fn)
1022         (when fn
1023           (setq domain (char-feature-name-domain feature-name))
1024           (when (stringp fn)
1025             (setq fn (intern fn)))
1026           (setq domain-fn (char-feature-name-at-domain fn domain))
1027           (setq feature-name domain-fn)))
1028       (if without-tags
1029           (www-format-feature-name feature-name lang)
1030         (format "<a href=\"%s\">%s</a>"
1031                 (www-uri-make-feature-name-url
1032                  (www-uri-encode-feature-name feature-name)
1033                  uri-char)
1034                 (www-format-feature-name feature-name lang))
1035         )
1036       )
1037      ((eq (car exp) 'name-url)
1038       (let ((fn (plist-get (nth 1 exp) :feature))
1039             domain domain-fn)
1040         (when fn
1041           (setq domain (char-feature-name-domain feature-name))
1042           (when (stringp fn)
1043             (setq fn (intern fn)))
1044           (setq domain-fn (char-feature-name-at-domain fn domain))
1045           (setq feature-name domain-fn)))
1046       (www-uri-make-feature-name-url
1047        (www-uri-encode-feature-name feature-name)
1048        uri-char)
1049       )
1050      ((eq (car exp) 'domain-name)
1051       (let ((domain (char-feature-name-domain feature-name)))
1052         (if domain
1053             (format "@%s" domain))))
1054      ((eq (car exp) 'prev-char)
1055       (if without-tags
1056           ""
1057         (let ((prev-char (find-previous-defined-code-point
1058                           feature-name value)))
1059           (if prev-char
1060               (format "\n<a href=\"%s?char=%s\">%s</a>"
1061                       chise-wiki-view-url
1062                       (www-uri-encode-char prev-char)
1063                       "<input type=\"submit\" value=\"-\" />"
1064                       ;; (www-format-encode-string
1065                       ;;  (char-to-string prev-char))
1066                       )
1067             "")))
1068       )
1069      ((eq (car exp) 'next-char)
1070       (if without-tags
1071           ""
1072         (let ((next-char (find-next-defined-code-point
1073                           feature-name value)))
1074           (if next-char
1075               (format "<a href=\"%s?char=%s\">%s</a>"
1076                       chise-wiki-view-url
1077                       (www-uri-encode-char next-char)
1078                       "<input type=\"submit\" value=\"+\" />"
1079                       ;; (www-format-encode-string
1080                       ;;  (char-to-string next-char))
1081                       )
1082             "")))
1083       )
1084      ((eq (car exp) 'link)
1085       (if without-tags
1086           (www-format-eval-list (nthcdr 2 exp)
1087                                 char feature-name lang uri-char
1088                                 without-tags without-edit)
1089         (format "<a
1090  href=\"%s\"
1091 >%s</a
1092 >"
1093                 (www-format-eval-list (plist-get (nth 1 exp) :ref)
1094                                       char feature-name lang uri-char
1095                                       'without-tags 'without-edit)
1096                 (www-format-eval-list (nthcdr 2 exp)
1097                                       char feature-name lang uri-char
1098                                       without-tags without-edit)))
1099       )
1100      (t
1101       (format "<%s
1102 >%s</%s
1103 >"
1104               (car exp)
1105               (www-format-eval-list (nthcdr 2 exp) char feature-name
1106                                     lang uri-char
1107                                     without-tags without-edit)
1108               (car exp)))))))
1109
1110 (defun www-format-eval-list (format-list char feature-name
1111                                          &optional lang uri-char
1112                                          without-tags without-edit)
1113   (if (consp format-list)
1114       (mapconcat
1115        (lambda (exp)
1116          (www-format-eval-unit exp char feature-name lang uri-char
1117                                nil without-tags without-edit))
1118        format-list "")
1119     (www-format-eval-unit format-list char feature-name lang uri-char
1120                           nil without-tags without-edit)))
1121
1122
1123 ;;; @ XML generator
1124 ;;;
1125
1126 (defun www-xml-format-props (props)
1127   (let ((dest "")
1128         key val)
1129     (while props
1130       (setq key (pop props)
1131             val (pop props))
1132       (if (symbolp key)
1133           (setq key (symbol-name key)))
1134       (if (eq (aref key 0) ?:)
1135           (setq key (substring key 1)))
1136       (setq dest
1137             (format "%s %s=\"%s\""
1138                     dest key
1139                     (www-format-encode-string
1140                      (format "%s" val) 'without-tags))))
1141     dest))
1142
1143 (defun www-xml-format-unit (format-unit)
1144   (let (name props children ret)
1145     (cond
1146      ((stringp format-unit)
1147       (mapconcat (lambda (c)
1148                    (cond
1149                     ((eq c ?&) "&amp;")
1150                     ;; ((eq c ?<) "&amp;lt;")
1151                     ;; ((eq c ?>) "&amp;gt;")
1152                     (t
1153                      (char-to-string c))))
1154                  (www-format-encode-string format-unit 'without-tags)
1155                  "")
1156       )
1157      ((consp format-unit)
1158       (setq name (car format-unit)
1159             props (nth 1 format-unit)
1160             children (nthcdr 2 format-unit))
1161       (when (eq name 'link)
1162         (setq ret (plist-get props :ref))
1163         (unless (stringp ret)
1164           (setq props (plist-remprop (copy-list props) :ref))
1165           (setq children
1166                 (cons (list* 'ref nil ret)
1167                       children))))
1168       (if children
1169           (format "<%s%s>%s</%s>"
1170                   name
1171                   (if props
1172                       (www-xml-format-props props)
1173                     "")
1174                   (www-xml-format-list children)
1175                   name)
1176         (format "<%s%s/>"
1177                 name (www-xml-format-props props)))
1178       )
1179      (t
1180       (format "%s" format-unit)))))
1181
1182 (defun www-xml-format-list (format-list)
1183   (if (atom format-list)
1184       (www-xml-format-unit format-list)
1185     (mapconcat #'www-xml-format-unit
1186                format-list "")))
1187
1188
1189 ;;; @ HTML generator
1190 ;;;
1191
1192 (defun www-html-display-text (text)
1193   (princ
1194    (with-temp-buffer
1195      (insert text)
1196      (goto-char (point-min))
1197      (while (search-forward "<" nil t)
1198        (replace-match "&lt;" nil t))
1199      (goto-char (point-min))
1200      (while (search-forward ">" nil t)
1201        (replace-match "&gt;" nil t))
1202      (goto-char (point-min))
1203      (while (re-search-forward "\\[\\[\\([^]|[]+\\)|\\([^][]+\\)\\]\\]" nil t)
1204        (replace-match
1205         (format "<a href=\"%s\">%s</a>"
1206                 (match-string 2)
1207                 (match-string 1))
1208         nil t))
1209      (encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
1210      (goto-char (point-min))
1211      (while (search-forward "&GT-" nil t)
1212        (replace-match "&amp;GT-" nil t))
1213      (buffer-string))))
1214
1215 (defun www-html-display-paragraph (text)
1216   (princ "<p>")
1217   (www-html-display-text text)
1218   (princ "</p>\n"))
1219
1220
1221 ;;; @ for GlyphWiki
1222 ;;;
1223
1224 (defvar coded-charset-GlyphWiki-id-alist
1225   '((=ucs               "u"     4 x nil)
1226     (=ucs@JP            "u"     4 x nil)
1227     (=ucs@jis           "u"     4 x nil)
1228     (=ucs@gb            "u"     4 x "-g")
1229     (=ucs@cns           "u"     4 x "-t")
1230     (=ucs@ks            "u"     4 x "-k")
1231     (=ucs@iso           "u"     4 x "-u")
1232     (=ucs@unicode       "u"     4 x "-us")
1233     (=adobe-japan1-6    "aj1-"  5 d nil)
1234     (=gt                "gt-"   5 d nil)
1235     (=big5-cdp          "cdp-"  4 x nil)
1236     (=cbeta             "cb"    5 d nil)
1237     (=jis-x0208@1978/1pr "j78-" 4 x nil)
1238     (=jis-x0208@1978/-4pr "j78-" 4 x nil)
1239     (=jis-x0208@1978    "j78-"  4 x nil)
1240     (=jis-x0208@1983    "j83-"  4 x nil)
1241     (=jis-x0208@1990    "j90-"  4 x nil)
1242     (=jis-x0212         "jsp-"  4 x nil)
1243     (=jis-x0213-1@2000  "jx1-2000-" 4 x nil)
1244     (=jis-x0213-1@2004  "jx1-2004-" 4 x nil)
1245     (=jis-x0213-2       "jx2-"  4 x nil)
1246     (=cns11643-1        "c1-"   4 x nil)
1247     (=cns11643-2        "c2-"   4 x nil)
1248     (=cns11643-3        "c3-"   4 x nil)
1249     (=cns11643-4        "c4-"   4 x nil)
1250     (=cns11643-5        "c5-"   4 x nil)
1251     (=cns11643-6        "c6-"   4 x nil)
1252     (=cns11643-7        "c7-"   4 x nil)
1253     (=daikanwa          "dkw-"  5 d nil)
1254     (=gt-k              "gt-k"  5 d nil)
1255     (=jef-china3        "jc3-"  4 x nil)
1256     (=big5              "b-"    4 x nil)
1257     (=ks-x1001          "k0-"   4 x nil)
1258     ))
1259
1260 (defun char-GlyphWiki-id (char)
1261   (let ((rest coded-charset-GlyphWiki-id-alist)
1262         spec ret code)
1263     (while (and rest
1264                 (setq spec (pop rest))
1265                 (null (setq ret (char-feature char (car spec))))))
1266     (when ret
1267       (or
1268        (and (memq (car spec) '(=ucs@unicode '=ucs@iso))
1269             (cond
1270              ((and (or (encode-char char '=jis-x0208@1990)
1271                        (encode-char char '=jis-x0212)
1272                        (encode-char char '=jis-x0213-1))
1273                    (setq code (encode-char char '=ucs@jis)))
1274               (format "u%04x" code)
1275               )
1276              ((and (or (encode-char char '=gb2312)
1277                        (encode-char char '=gb12345))
1278                    (setq code (encode-char char '=ucs@gb)))
1279               (format "u%04x-g" code)
1280               )
1281              ((and (or (encode-char char '=cns11643-1)
1282                        (encode-char char '=cns11643-2)
1283                        (encode-char char '=cns11643-3)
1284                        (encode-char char '=cns11643-4)
1285                        (encode-char char '=cns11643-5)
1286                        (encode-char char '=cns11643-6)
1287                        (encode-char char '=cns11643-7))
1288                    (setq code (encode-char char '=ucs@cns)))
1289               (format "u%04x-t" code)
1290               )
1291              ((and (encode-char char '=ks-x1001)
1292                    (setq code (encode-char char '=ucs@ks)))
1293               (format "u%04x-k" code)
1294               )))
1295        (format (format "%s%%0%d%s%s"
1296                        (nth 1 spec)
1297                        (nth 2 spec)
1298                        (nth 3 spec)
1299                        (or (nth 4 spec) ""))
1300                ret)))))
1301
1302
1303 ;;; @ End.
1304 ;;;
1305
1306 (provide 'cwiki-common)
1307
1308 ;;; cwiki-common.el ends here