(www-format-encode-string): Support `=hanyo-denshi/*',
[chise/est.git] / cwiki-common.el
1 ;; -*- coding: utf-8-mcs-er -*-
2 (require 'char-db-util)
3
4 (setq file-name-coding-system 'utf-8-mcs-er)
5
6
7 (concord-assign-genre 'creator@ruimoku "/usr/local/var/ruimoku/db")
8 (concord-assign-genre 'person-name@ruimoku "/usr/local/var/ruimoku/db")
9
10 (concord-assign-genre 'journal-volume@ruimoku "/usr/local/var/ruimoku/db")
11 (concord-assign-genre 'article@ruimoku "/usr/local/var/ruimoku/db")
12 (concord-assign-genre 'book@ruimoku "/usr/local/var/ruimoku/db")
13
14 (concord-assign-genre 'classification@ruimoku "/usr/local/var/ruimoku/db")
15 (concord-assign-genre 'region@ruimoku "/usr/local/var/ruimoku/db")
16 (concord-assign-genre 'era@ruimoku "/usr/local/var/ruimoku/db")
17 (concord-assign-genre 'period@ruimoku "/usr/local/var/ruimoku/db")
18 (concord-assign-genre 'journal@ruimoku "/usr/local/var/ruimoku/db")
19 (concord-assign-genre 'journal-name@ruimoku "/usr/local/var/ruimoku/db")
20 (concord-assign-genre 'publisher@ruimoku "/usr/local/var/ruimoku/db")
21 (concord-assign-genre 'publisher-name@ruimoku "/usr/local/var/ruimoku/db")
22
23 (mount-char-attribute-table '*instance@ruimoku/bibliography/title)
24 ;; (mount-char-attribute-table '*instance@ruimoku/bibliography/content*note)
25
26 (concord-assign-genre 'entry@zh-classical "/usr/local/var/kanbun/db")
27 ;; (concord-assign-genre 'morpheme-entry@zh-classical "/usr/local/var/kanbun/db")
28 (concord-assign-genre 'word-class@zh-classical "/usr/local/var/kanbun/db")
29 (concord-assign-genre 'morpheme@zh-classical "/usr/local/var/kanbun/db")
30 (concord-assign-genre 'sentence@zh-classical "/usr/local/var/kanbun/db")
31 ;; (concord-assign-genre 'sentence-entry@zh-classical "/usr/local/var/kanbun/db")
32
33 (mount-char-attribute-table '*instance@morpheme-entry/zh-classical)
34
35
36 (defvar chise-wiki-view-url "view.cgi")
37 (defvar chise-wiki-edit-url "edit.cgi")
38
39 (defvar chise-wiki-bitmap-glyphs-url
40   "http://www.chise.org/glyphs")
41
42 (defvar chise-wiki-glyph-cgi-url
43   "http://www.chise.org/chisewiki/glyph.cgi")
44
45 (defvar chise-wiki-displayed-features nil)
46
47 (defun decode-uri-string (string &optional coding-system)
48   (if (> (length string) 0)
49       (let ((i 0)
50             dest)
51         (setq string
52               (mapconcat (lambda (char)
53                            (if (eq char ?+)
54                                " "
55                              (char-to-string char)))
56                          string ""))
57         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
58           (setq dest (concat dest
59                              (substring string i (match-beginning 0))
60                              (char-to-string
61                               (int-char
62                                (string-to-int (match-string 1 string) 16))))
63                 i (match-end 0)))
64         (decode-coding-string
65          (concat dest (substring string i))
66          coding-system))))
67
68 (defun www-feature-type (feature-name)
69   (or (char-feature-property feature-name 'type)
70       (let ((str (symbol-name feature-name)))
71         (cond
72          ((string-match "\\*note\\(@[^*]+\\)?$" str)
73           'stext)
74          ((string-match "\\*sources\\(@[^*]+\\)?$" str)
75           'domain-list)
76          ((string-match "\\*" str)
77           nil)
78          ((string-match "^\\(->\\|<-\\)" str)
79           'relation)
80          ((string-match "^ideographic-structure\\(@\\|$\\)" str)
81           'structure)
82          ))))
83
84 (defun www-feature-format (feature-name)
85   (or (char-feature-property feature-name 'format)
86       (let (fn parent ret)
87         (setq fn feature-name)
88         (while (and (setq parent (char-feature-name-parent fn))
89                     (null (setq ret
90                                 (char-feature-property
91                                  parent 'format))))
92           (setq fn parent))
93         ret)
94       '((name) " : " (value))))
95
96 (defun www-feature-value-format (feature-name)
97   (or (char-feature-property feature-name 'value-presentation-format)
98       (char-feature-property feature-name 'value-format)
99       (let (fn parent ret)
100         (setq fn feature-name)
101         (while (and (setq parent (char-feature-name-parent fn))
102                     (null (setq ret
103                                 (or (char-feature-property
104                                      parent 'value-presentation-format)
105                                     (char-feature-property
106                                      parent 'value-format)))))
107           (setq fn parent))
108         ret)
109       (let ((type (www-feature-type feature-name)))
110         (cond ((eq type 'relation)
111                'space-separated)
112               ((eq type 'structure)
113                'space-separated-ids)
114               ((eq type 'stext)
115                'wiki-text)
116               ))
117       (if (find-charset feature-name)
118           (if (and (= (charset-dimension feature-name) 2)
119                    (= (charset-chars feature-name) 94))
120               '("0x" (HEX)
121                 " (" (decimal) ") <" (ku-ten) "> " (prev-char) (next-char))
122             '("0x" (HEX) " (" (decimal) ") " (prev-char) (next-char))))
123       'space-separated))
124
125 (defun char-feature-name-at-domain (feature-name domain)
126   (if domain
127       (let ((name (symbol-name feature-name)))
128         (cond
129          ((string-match "@[^*]+$" name)
130           (intern (format "%s/%s" name domain))
131           )
132          (t
133           (intern (format "%s@%s" name domain))
134           )))
135     feature-name))
136
137 (defun char-feature-name-parent (feature-name)
138   (let ((name (symbol-name feature-name)))
139     (if (string-match "@[^@/*]+\\(/[^@/*]+\\)*$" name)
140         (intern (substring name 0 (car (last (match-data) 2)))))))
141
142 (defun char-feature-name-domain (feature-name)
143   (let ((name (symbol-name feature-name)))
144     (if (string-match "@[^@/*]+\\(/[^@/*]+\\)*$" name)
145         (intern (substring name (1+ (match-beginning 0)))))))
146
147 (defun char-feature-name-sans-versions (feature)
148   (let ((feature-name (symbol-name feature)))
149     (if (string-match "[@/]\\$rev=latest$" feature-name)
150         (intern (substring feature-name 0 (match-beginning 0)))
151       feature)))
152
153 (defun est-object-genre (object)
154   (if (characterp object)
155       'character
156     (concord-object-genre object)))
157
158 (defun www-get-feature-value (object feature)
159   (let ((latest-feature (char-feature-name-at-domain feature '$rev=latest)))
160     (cond
161      ((characterp object)
162       (mount-char-attribute-table latest-feature)
163       (or (char-feature object latest-feature)
164           (char-feature object feature))
165       )
166      (t
167       (or (condition-case nil
168               (concord-object-get object latest-feature)
169             (error nil))
170           (condition-case nil
171               (concord-object-get object feature)
172             (error nil)))
173       ))))
174
175 (defun get-previous-code-point (ccs code)
176   (let ((chars (charset-chars ccs))
177         (dim (charset-dimension ccs))
178         (i 0)
179         mask byte-min byte-max
180         bytes dest)
181     (cond
182      ((= chars 94)
183       (setq mask #x7F
184             byte-min 33
185             byte-max 126)
186       )
187      ((= chars 96)
188       (setq mask #x7F
189             byte-min 32
190             byte-max 127)
191       )
192      ((= chars 128)
193       (setq mask #x7F
194             byte-min 0
195             byte-max #xFF)
196       )
197      (t ; (= chars 256)
198       (setq mask #xFF
199             byte-min 0
200             byte-max #xFF)
201       ))
202     (setq bytes (make-vector dim 0))
203     (while (< i dim)
204       (aset bytes i (logand (lsh code (* i -8)) mask))
205       (setq i (1+ i)))
206     (setq i 0)
207     (while (and (< i dim)
208                 (progn
209                   (aset bytes i (1- (aref bytes i)))
210                   (< (aref bytes i) byte-min)))
211       (aset bytes i byte-max)
212       (setq i (1+ i)))
213     (when (< i dim)
214       (setq dest (aref bytes 0)
215             i 1)
216       (while (< i dim)
217         (setq dest (logior dest (lsh (aref bytes i) (* i 8)))
218               i (1+ i)))
219       dest)))
220
221 (defun get-next-code-point (ccs code)
222   (let ((chars (charset-chars ccs))
223         (dim (charset-dimension ccs))
224         (i 0)
225         mask byte-min byte-max
226         bytes dest)
227     (cond
228      ((= chars 94)
229       (setq mask #x7F
230             byte-min 33
231             byte-max 126)
232       )
233      ((= chars 96)
234       (setq mask #x7F
235             byte-min 32
236             byte-max 127)
237       )
238      ((= chars 128)
239       (setq mask #x7F
240             byte-min 0
241             byte-max #xFF)
242       )
243      (t ; (= chars 256)
244       (setq mask #xFF
245             byte-min 0
246             byte-max #xFF)
247       ))
248     (setq bytes (make-vector dim 0))
249     (while (< i dim)
250       (aset bytes i (logand (lsh code (* i -8)) mask))
251       (setq i (1+ i)))
252     (setq i 0)
253     (while (and (< i dim)
254                 (progn
255                   (aset bytes i (1+ (aref bytes i)))
256                   (> (aref bytes i) byte-max)))
257       (aset bytes i byte-min)
258       (setq i (1+ i)))
259     (when (< i dim)
260       (setq dest (aref bytes 0)
261             i 1)
262       (while (< i dim)
263         (setq dest (logior dest (lsh (aref bytes i) (* i 8)))
264               i (1+ i)))
265       dest)))
266
267 (defun find-previous-defined-code-point (ccs code)
268   (let ((i (get-previous-code-point ccs code))
269         char)
270     (cond
271      ((eq ccs '=jis-x0208)
272       (setq ccs '=jis-x0208@1990))
273      ((eq ccs '=jis-x0213-1)
274       (setq ccs '=jis-x0213-1@2004)))
275     (while (and i
276                 (>= i 0)
277                 (null (setq char (decode-char ccs i
278                                               (unless (eq ccs '=ucs)
279                                                 'defined-only)))))
280       (setq i (get-previous-code-point ccs i)))
281     char))
282
283 (defun find-next-defined-code-point (ccs code)
284   (let ((i (get-next-code-point ccs code))
285         max char)
286     (setq max (+ code 1000))
287     (cond
288      ((eq ccs '=jis-x0208)
289       (setq ccs '=jis-x0208@1990))
290      ((eq ccs '=jis-x0213-1)
291       (setq ccs '=jis-x0213-1@2004)))
292     (while (and i
293                 (<= i max)
294                 (null (setq char (decode-char ccs i
295                                               (unless (eq ccs '=ucs)
296                                                 'defined-only)))))
297       (setq i (get-next-code-point ccs i)))
298     char))
299
300
301 ;;; @ URI representation
302 ;;;
303
304 (defun www-uri-decode-feature-name (uri-feature)
305   (let (feature)
306     (setq uri-feature (decode-uri-string uri-feature 'utf-8-mcs-er))
307     (cond
308      ((string-match "^from\\." uri-feature)
309       (intern (format "<-%s" (substring uri-feature (match-end 0))))
310       )
311      ((string-match "^to\\." uri-feature)
312       (intern (format "->%s" (substring uri-feature (match-end 0))))
313       )
314      ((string-match "^rep\\." uri-feature)
315       (intern (format "=%s" (substring uri-feature (match-end 0))))
316       )
317      ((string-match "^g\\." uri-feature)
318       (intern (format "=>>%s" (substring uri-feature (match-end 0))))
319       )
320      ((string-match "^gi\\." uri-feature)
321       (intern (format "=>>>%s" (substring uri-feature (match-end 0))))
322       )
323      ((string-match "^gi\\([0-9]+\\)\\." uri-feature)
324       (intern (format "=>>%s%s"
325                       (make-string (string-to-int
326                                     (match-string 1 uri-feature))
327                                    ?>)
328                       (substring uri-feature (match-end 0))))
329       )
330      ((string-match "^o\\." uri-feature)
331       (intern (format "=+>%s" (substring uri-feature (match-end 0))))
332       )
333      ((string-match "^a\\." uri-feature)
334       (intern (format "=>%s" (substring uri-feature (match-end 0))))
335       )
336      ((string-match "^a\\([0-9]+\\)\\." uri-feature)
337       (intern (format "%s>%s"
338                       (make-string (string-to-int
339                                     (match-string 1 uri-feature))
340                                    ?=)
341                       (substring uri-feature (match-end 0))))
342       )
343      ((and (setq feature (intern (format "=>%s" uri-feature)))
344            (find-charset feature))
345       feature)
346      ((and (setq feature (intern (format "=>>%s" uri-feature)))
347            (find-charset feature))
348       feature)
349      ((and (setq feature (intern (format "=>>>%s" uri-feature)))
350            (find-charset feature))
351       feature)
352      ((and (setq feature (intern (format "=%s" uri-feature)))
353            (find-charset feature))
354       feature)
355      (t (intern uri-feature)))))
356
357 (defun www-uri-encode-feature-name (feature-name)
358   (setq feature-name (symbol-name feature-name))
359   (cond
360    ((string-match "^=\\+>\\([^=>]+\\)" feature-name)
361     (concat "o." (substring feature-name (match-beginning 1)))
362     )
363    ((string-match "^=\\([^=>]+\\)" feature-name)
364     (concat "rep." (substring feature-name (match-beginning 1)))
365     )
366    ((string-match "^=>>\\([^=>]+\\)" feature-name)
367     (concat "g." (substring feature-name (match-beginning 1)))
368     )
369    ((string-match "^=>>>\\([^=>]+\\)" feature-name)
370     (concat "gi." (substring feature-name (match-beginning 1)))
371     )
372    ((string-match "^=>>\\(>+\\)" feature-name)
373     (format "gi%d.%s"
374             (length (match-string 1 feature-name))
375             (substring feature-name (match-end 1)))
376     )
377    ((string-match "^=>\\([^=>]+\\)" feature-name)
378     (concat "a." (substring feature-name (match-beginning 1)))
379     )
380    ((string-match "^\\(=+\\)>" feature-name)
381     (format "a%d.%s"
382             (length (match-string 1 feature-name))
383             (substring feature-name (match-end 0)))
384     )
385    ((string-match "^->" feature-name)
386     (concat "to." (substring feature-name (match-end 0)))
387     )
388    ((string-match "^<-" feature-name)
389     (concat "from." (substring feature-name (match-end 0)))
390     )
391    (t feature-name)))
392
393 (defun www-uri-make-feature-name-url (uri-genre uri-feature-name uri-object)
394   (format "%s?feature=%s&%s=%s"
395           chise-wiki-view-url uri-feature-name uri-genre uri-object))
396
397 (defun www-uri-decode-object (genre char-rep)
398   (let (ccs cpos)
399     (cond
400      ((string-match "\\(%3A\\|:\\)" char-rep)
401       (setq ccs (substring char-rep 0 (match-beginning 0))
402             cpos (substring char-rep (match-end 0)))
403       (setq ccs (www-uri-decode-feature-name ccs))
404       (cond
405        ((string-match "^0x" cpos)
406         (setq cpos
407               (string-to-number (substring cpos (match-end 0)) 16))
408         )
409        (t
410         (setq cpos (car (read-from-string
411                          (decode-uri-string
412                           cpos file-name-coding-system))))
413         ))
414       (if (and (eq genre 'character)
415                (numberp cpos))
416           (decode-char ccs cpos)
417         (concord-decode-object ccs cpos genre))
418       )
419      (t
420       (setq char-rep (decode-uri-string char-rep 'utf-8-mcs-er))
421       (cond
422        ((eq genre 'character)
423         (when (= (length char-rep) 1)
424           (aref char-rep 0))
425         )
426        ((eq genre 'feature)
427         (concord-decode-object
428          '=id (www-uri-decode-feature-name char-rep) 'feature)
429         )
430        (t
431         (concord-decode-object
432          '=id (car (read-from-string char-rep)) genre)
433         ))))))
434
435 (defun www-uri-encode-object (object)
436   (if (characterp object)
437       (if (encode-char object '=ucs)
438           (mapconcat
439            (lambda (byte)
440              (format "%%%02X" byte))
441            (encode-coding-string (char-to-string object) 'utf-8-mcs-er)
442            "")
443         (let ((ccs-list '(; =ucs
444                           =cns11643-1 =cns11643-2 =cns11643-3
445                           =cns11643-4 =cns11643-5 =cns11643-6 =cns11643-7
446                           =gb2312 =gb12345
447                           =jis-x0208 =jis-x0208@1990
448                           =jis-x0213-2
449                           =jis-x0212
450                           =adobe-japan1
451                           =cbeta =jef-china3
452                           =jis-x0213-1@2000 =jis-x0213-1@2004
453                           =jis-x0208@1983 =jis-x0208@1978
454                           =zinbun-oracle =>zinbun-oracle
455                           =daikanwa
456                           =gt =gt-k
457                           =>>>adobe-japan1
458                           =>>>jis-x0208 =>>>jis-x0213-1 =>>>jis-x0213-2
459                           =>>jis-x0208 =>>jis-x0213-1 =>>jis-x0213-2
460                           =>>adobe-japan1
461                           =+>jis-x0208 =+>jis-x0213-1 =+>jis-x0213-2
462                           =+>jis-x0208@1978
463                           =+>adobe-japan1
464                           =>jis-x0208 =>jis-x0213-1
465                           =>>gt
466                           =>ucs@iso =>ucs@unicode
467                           =>ucs@jis =>ucs@cns =>ucs@ks
468                           =>>ucs@iso =>>ucs@unicode
469                           =>>ucs@jis =>>ucs@cns =>>ucs@ks
470                           =>>>ucs@iso =>>>ucs@unicode
471                           =>>>ucs@jis =>>>ucs@cns =>>>ucs@ks
472                           =ruimoku-v6
473                           =big5
474                           =big5-cdp))
475               ccs ret)
476           (while (and ccs-list
477                       (setq ccs (pop ccs-list))
478                       (not (setq ret (encode-char object ccs 'defined-only)))))
479           (cond (ret
480                  (format "%s:0x%X"
481                          (www-uri-encode-feature-name ccs)
482                          ret))
483                 ((and (setq ccs (car (split-char object)))
484                       (setq ret (encode-char object ccs)))
485                  (format "%s:0x%X"
486                          (www-uri-encode-feature-name ccs)
487                          ret))
488                 (t
489                  (format "system-char-id:0x%X"
490                          (encode-char object 'system-char-id))
491                  ))))
492     (format "rep.id:%s" (concord-object-id object))))
493
494 (defun est-format-object (object &optional readable)
495   (if (characterp object)
496       (char-to-string object)
497     (let ((ret (or (if readable
498                        (or (concord-object-get object 'name)
499                            (concord-object-get object '=name)
500                            (concord-object-get object 'title)))
501                    (concord-object-id object))))
502       (format "%s" ret))))
503
504 (defun www-uri-make-object-url (object &optional uri-object)
505   (format "%s?%s=%s"
506           chise-wiki-view-url
507           (est-object-genre object)
508           (or uri-object
509               (www-uri-encode-object object))))
510
511
512 ;;; @ Feature name presentation
513 ;;;
514
515 (defun www-format-feature-name-default (feature-name)
516   (mapconcat
517    #'capitalize
518    (split-string
519     (symbol-name feature-name)
520     "-")
521    " "))
522
523 (defun www-format-feature-name-as-metadata (feature-name &optional lang)
524   (let ((str (symbol-name feature-name))
525         base meta)
526     (cond
527      ((string-match "\\*[^*]+$" str)
528       (setq base (substring str 0 (match-beginning 0))
529             meta (substring str (match-beginning 0)))
530       (concat (www-format-feature-name* (intern base) lang)
531               meta))
532      (t
533       (www-format-feature-name-default feature-name)
534       ))))
535
536 (defun www-format-feature-name-as-rel-to (feature-name)
537   (concat "\u2192" (substring (symbol-name feature-name) 2)))
538
539 (defun www-format-feature-name-as-rel-from (feature-name)
540   (concat "\u2190" (substring (symbol-name feature-name) 2)))
541
542 (defun www-format-feature-name-as-CCS (feature-name)
543   (let* ((rest
544           (split-string
545            (symbol-name feature-name)
546            "-"))
547          (dest (upcase (pop rest))))
548     (when (string-match "^=+>*" dest)
549       (setq dest (concat (substring dest 0 (match-end 0))
550                          " "
551                          (substring dest (match-end 0)))))
552     (cond
553      (rest
554       (while (cdr rest)
555         (setq dest (concat dest " " (upcase (pop rest)))))
556       (if (string-match "^[0-9]+$" (car rest))
557           (concat dest "-" (car rest))
558         (concat dest " " (upcase (car rest))))
559       )
560      (t dest))))
561
562 (defun www-format-feature-name* (feature-name &optional lang)
563   (let (name fn parent ret)
564     (cond
565      ((or (and lang
566                (char-feature-property
567                 feature-name
568                 (intern (format "name@%s" lang))))
569           (char-feature-property
570            feature-name 'name)))
571      ((and (setq name (symbol-name feature-name))
572            (string-match "\\*" name))
573       (www-format-feature-name-as-metadata feature-name lang))
574      (t
575       (setq fn feature-name)
576       (while (and (setq parent (char-feature-name-parent fn))
577                   (null (setq ret
578                               (or (and lang
579                                        (char-feature-property
580                                         parent
581                                         (intern (format "name@%s" lang))))
582                                   (char-feature-property
583                                    parent 'name)))))
584         (setq fn parent))
585       (cond
586        (ret
587         (concat ret (substring (symbol-name feature-name)
588                                (length (symbol-name parent)))))
589        ((find-charset feature-name)
590         (www-format-feature-name-as-CCS feature-name))
591        ((string-match "^\\(->\\)" name)
592         (www-format-feature-name-as-rel-to feature-name))
593        ((string-match "^\\(<-\\)" name)
594         (www-format-feature-name-as-rel-from feature-name))
595        (t
596         (www-format-feature-name-default feature-name)
597         ))
598       ))))
599
600 (defun www-format-feature-name (feature-name &optional lang)
601   (www-format-encode-string
602    (www-format-feature-name* feature-name lang)))
603
604
605 ;;; @ HTML generator
606 ;;;
607
608 (defvar www-format-char-img-style "vertical-align:bottom;")
609
610 (defun www-format-encode-string (string &optional without-tags as-body)
611   (with-temp-buffer
612     (insert string)
613     (let (plane code start end char variants ret rret)
614       (when as-body
615         (goto-char (point-min))
616         (while (search-forward "&" nil t)
617           (replace-match "&amp;" nil t)))
618       (goto-char (point-min))
619       (while (search-forward "<" nil t)
620         (replace-match "&lt;" nil t))
621       (goto-char (point-min))
622       (while (search-forward ">" nil t)
623         (replace-match "&gt;" nil t))
624       (if without-tags
625           (encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
626         (let ((coded-charset-entity-reference-alist
627                (list*
628                 '(=gt                   "GT-" 5 d)
629                 '(=hanyo-denshi/ja   "HD-JA-" 4 X)
630                 '(=hanyo-denshi/jb   "HD-JB-" 4 X)
631                 '(=hanyo-denshi/jc   "HD-JC-" 4 X)
632                 '(=hanyo-denshi/jd   "HD-JD-" 4 X)
633                 '(=hanyo-denshi/ft   "HD-FT-" 4 X)
634                 '(=hanyo-denshi/ia   "HD-IA-" 4 X)
635                 '(=hanyo-denshi/ib   "HD-IB-" 4 X)
636                 '(=hanyo-denshi/hg   "HD-HG-" 4 X)
637                 '(=hanyo-denshi/ip   "HD-IP-" 4 X)
638                 '(=hanyo-denshi/jt   "HD-JT-" 4 X)
639                 '(=hanyo-denshi/ks   "HD-KS-" 6 d)
640                 '(=cns11643-1           "C1-" 4 X)
641                 '(=cns11643-2           "C2-" 4 X)
642                 '(=cns11643-3           "C3-" 4 X)
643                 '(=cns11643-4           "C4-" 4 X)
644                 '(=cns11643-5           "C5-" 4 X)
645                 '(=cns11643-6           "C6-" 4 X)
646                 '(=cns11643-7           "C7-" 4 X)
647                 '(=adobe-japan1-6       "AJ1-" 5 d)
648                 '(=big5-cdp             "CDP-" 4 X)
649                 '(=gb2312               "G0-" 4 X)
650                 '(=gb12345              "G1-" 4 X)
651                 '(=jis-x0208@1990       "J90-" 4 X)
652                 '(=jis-x0212            "JSP-" 4 X)
653                 '(=cbeta                "CB" 5 d)
654                 '(=jis-x0208@1997       "J97-" 4 X)
655                 '(=jis-x0208@1978       "J78-" 4 X)
656                 '(=jis-x0208@1983       "J83-" 4 X)
657                 '(=ruimoku-v6           "RUI6-" 4 X)
658                 '(=zinbun-oracle        "ZOB-" 4 d)
659                 '(=jef-china3           "JC3-" 4 X)
660                 '(=ucs@unicode          "UU+" 4 X)
661                 '(=ucs@JP/hanazono  "hanaJU+" 4 X)
662                 '(=daikanwa             "M-" 5 d)
663                 coded-charset-entity-reference-alist)))
664           (encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
665
666           (goto-char (point-min))
667           (while (re-search-forward "&CB\\([0-9]+\\);" nil t)
668             (setq code (string-to-int (match-string 1)))
669             (replace-match
670              (format "<img alt=\"CB%05d\" src=\"%s/cb-gaiji/%02d/CB%05d.gif\"
671 style=\"%s\">"
672                      code
673                      chise-wiki-bitmap-glyphs-url
674                      (/ code 1000) code
675                      www-format-char-img-style)
676              t 'literal))
677
678           (goto-char (point-min))
679           (while (re-search-forward "&\\(o-\\|G-\\|g2-\\)?J\\(78\\|83\\|90\\|97\\|SP\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
680             (setq plane (match-string 2)
681                   code (string-to-int (match-string 3) 16))
682             (replace-match
683              (format "<img alt=\"J%s-%04X\" src=\"%s/JIS-%s/%02d-%02d.gif\"
684 style=\"%s\">"
685                      plane code
686                      chise-wiki-bitmap-glyphs-url
687                      plane
688                      (- (lsh code -8) 32)
689                      (- (logand code 255) 32)
690                      www-format-char-img-style)
691              t 'literal))
692
693           (goto-char (point-min))
694           (while (re-search-forward "&\\(o-\\)?HD-\\(JA\\|JB\\|JC\\|JD\\|FT\\|IA\\|IB\\|HG\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
695             (setq plane (match-string 2)
696                   code (string-to-int (match-string 3) 16))
697             (replace-match
698              (format "<img alt=\"HD-%s-%04X\" src=\"%s/IVD/HanyoDenshi/%s%02d%02d.png\"
699 style=\"%s\">"
700                      plane code
701                      chise-wiki-bitmap-glyphs-url
702                      plane
703                      (- (lsh code -8) 32)
704                      (- (logand code 255) 32)
705                      www-format-char-img-style)
706              t 'literal))
707
708           (goto-char (point-min))
709           (while (re-search-forward "&\\(o-\\|G-\\|g2-\\)?HD-\\(IP\\|JT\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
710             (setq plane (match-string 2)
711                   code (string-to-int (match-string 3) 16))
712             (replace-match
713              (format "<img alt=\"HD-%s-%04X\" src=\"%s/IVD/HanyoDenshi/%s%04X.png\"
714 style=\"%s\">"
715                      plane code
716                      chise-wiki-bitmap-glyphs-url
717                      plane code
718                      www-format-char-img-style)
719              t 'literal))
720
721           (goto-char (point-min))
722           (while (re-search-forward "&\\(o-\\|G-\\|g2-\\)?HD-KS-\\([0-9]+\\);" nil t)
723             (setq code (string-to-int (match-string 2)))
724             (replace-match
725              (format "<img alt=\"HD-KS%06d\" src=\"%s/IVD/HanyoDenshi/KS%06d.png\"
726 style=\"vertical-align:middle\">"
727                      code
728                      chise-wiki-bitmap-glyphs-url
729                      code
730                      www-format-char-img-style)
731              t 'literal))
732
733           (goto-char (point-min))
734           (while (re-search-forward "&G\\([01]\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
735             (setq plane (string-to-int (match-string 1))
736                   code (string-to-int (match-string 2) 16))
737             (replace-match
738              (format "<img alt=\"GB%d-%04X\" src=\"%s/GB%d/%02d-%02d.gif\"
739 style=\"%s\">"
740                      plane code
741                      chise-wiki-bitmap-glyphs-url
742                      plane
743                      (- (lsh code -8) 32)
744                      (- (logand code 255) 32)
745                      www-format-char-img-style)
746              t 'literal))
747
748           (goto-char (point-min))
749           (while (re-search-forward "&C\\([1-7]\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
750             (setq plane (string-to-int (match-string 1))
751                   code (string-to-int (match-string 2) 16))
752             (replace-match
753              (format "<img alt=\"CNS%d-%04X\" src=\"%s/CNS%d/%04X.gif\"
754 style=\"%s\">"
755                      plane code
756                      chise-wiki-bitmap-glyphs-url
757                      plane code
758                      www-format-char-img-style)
759              t 'literal))
760
761           (goto-char (point-min))
762           (while (re-search-forward "&JC3-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
763             (setq code (string-to-int (match-string 1) 16))
764             (replace-match
765              (format "<img alt=\"JC3-%04X\" src=\"http://kanji.zinbun.kyoto-u.ac.jp/db/CHINA3/Gaiji/%04x.gif\">"
766                      code code)
767              t 'literal))
768
769           (goto-char (point-min))
770           (while (re-search-forward "&\\(A-\\)?ZOB-\\([0-9]+\\);" nil t)
771             (setq code (string-to-int (match-string 2)))
772             (replace-match
773              (format "<img alt=\"ZOB-%04d\" src=\"%s/ZOB-1968/%04d.png\"
774 style=\"vertical-align:middle\">"
775                      code
776                      chise-wiki-bitmap-glyphs-url
777                      code
778                      www-format-char-img-style)
779              t 'literal))
780
781           (goto-char (point-min))
782           (while (re-search-forward "&\\(o-\\|G-\\|g2-\\)?AJ1-\\([0-9]+\\);" nil t)
783             (setq code (string-to-int (match-string 2)))
784             (replace-match
785              (format "<img alt=\"AJ1-%05d\" src=\"%s/IVD/AdobeJapan1/CID+%d.png\"
786 style=\"vertical-align:middle\">"
787                      code
788                      chise-wiki-bitmap-glyphs-url
789                      code
790                      www-format-char-img-style)
791              t 'literal))
792
793           (goto-char (point-min))
794           (while (re-search-forward "&\\(G-\\|g2-\\)?GT-\\([0-9]+\\);" nil t)
795             (setq code (string-to-int (match-string 2)))
796             (replace-match
797              (format "<img alt=\"GT-%05d\" src=\"%s?char=GT-%05d\"
798 style=\"%s\">"
799                      code
800                      chise-wiki-glyph-cgi-url
801                      code
802                      www-format-char-img-style)
803              t 'literal))
804
805           (goto-char (point-min))
806           (while (re-search-forward "&\\(G-\\)?GT-K\\([0-9]+\\);" nil t)
807             (setq code (string-to-int (match-string 2)))
808             (replace-match
809              (format "<img alt=\"GT-K%05d\" src=\"%s?char=GT-K%05d\"
810 style=\"%s\">"
811                      code
812                      chise-wiki-glyph-cgi-url
813                      code
814                      www-format-char-img-style)
815              t 'literal))
816
817           (goto-char (point-min))
818           (while (re-search-forward "&B-\\([0-9A-F]+\\);" nil t)
819             (setq code (string-to-int (match-string 1) 16))
820             (replace-match
821              (format "<img alt=\"B-%04X\" src=\"%s?char=B-%04X\"
822 style=\"%s\">"
823                      code
824                      chise-wiki-glyph-cgi-url
825                      code
826                      www-format-char-img-style)
827              t 'literal))
828
829           (goto-char (point-min))
830           (while (re-search-forward "&CDP-\\([0-9A-F]+\\);" nil t)
831             (setq code (string-to-int (match-string 1) 16))
832             (replace-match
833              (format "<img alt=\"CDP-%04X\" src=\"%s?char=CDP-%04X\"
834 style=\"%s\">"
835                      code
836                      chise-wiki-glyph-cgi-url
837                      code
838                      www-format-char-img-style)
839              t 'literal))
840
841           (goto-char (point-min))
842           (while (re-search-forward "&RUI6-\\([0-9A-F]+\\);" nil t)
843             (setq code (string-to-int (match-string 1) 16))
844             (replace-match
845              (format "<img alt=\"RUI6-%04X\" src=\"%s?char=RUI6-%04X\"
846 style=\"vertical-align:middle\">"
847                      code
848                      chise-wiki-glyph-cgi-url
849                      code
850                      www-format-char-img-style)
851              t 'literal))
852
853           (goto-char (point-min))
854           (while (re-search-forward "&hanaJU\\+\\([0-9A-F]+\\);" nil t)
855             (setq code (string-to-int (match-string 1) 16))
856             (replace-match
857              (format "<img alt=\"hanaJU+%04X\" src=\"%s?char=hana-JU+%04X\"
858 style=\"vertical-align:middle\">"
859                      code
860                      chise-wiki-glyph-cgi-url
861                      code
862                      www-format-char-img-style)
863              t 'literal))
864
865           (goto-char (point-min))
866           (while (re-search-forward "&\\(A-\\)?\\(UU\\+\\|U-\\)\\([0-9A-F]+\\);" nil t)
867             (setq code (string-to-int (match-string 3) 16))
868             (replace-match
869              (format "<img alt=\"UU+%04X\" src=\"http://www.unicode.org/cgi-bin/refglyph?24-%04X\"
870 style=\"vertical-align:middle\">"
871                      code
872                      code
873                      www-format-char-img-style)
874              t 'literal))
875
876           (goto-char (point-min))
877           (while (re-search-forward "&MCS-\\([0-9A-F]+\\);" nil t)
878             (setq code (string-to-int (match-string 1) 16))
879             (setq start (match-beginning 0)
880                   end (match-end 0))
881             (setq char (decode-char 'system-char-id code))
882             (cond
883              ((and (setq variants
884                          (or (www-get-feature-value char '->subsumptive)
885                              (www-get-feature-value char '->denotational)))
886                    (progn
887                      (while (and variants
888                                  (setq ret (www-format-encode-string
889                                             (char-to-string (car variants))))
890                                  (string-match "&MCS-\\([0-9A-F]+\\);" ret))
891                        (setq variants (cdr variants)))
892                      ret))
893               (unless (string-match "&MCS-\\([0-9A-F]+\\);" ret)
894                 (goto-char start)
895                 (delete-region start end)
896                 (insert ret))
897               )
898              ((setq ret (or (www-get-feature-value char 'ideographic-combination)
899                             (www-get-feature-value char 'ideographic-structure)))
900               (setq ret
901                     (mapconcat
902                      (lambda (ch)
903                        (if (listp ch)
904                            (if (characterp (setq rret (find-char ch)))
905                                (setq ch rret)))
906                        (if (characterp ch)
907                            (www-format-encode-string
908                             (char-to-string ch) without-tags)
909                          (www-format-encode-string
910                           (format "%S" ch) without-tags)))
911                      ret ""))
912               (when ret
913                 (goto-char start)
914                 (delete-region start end)
915                 (insert ret))
916               )))
917           ))
918       ;; (goto-char (point-min))
919       ;; (while (search-forward "&GT-" nil t)
920       ;;   (replace-match "&amp;GT-" t 'literal))
921       (buffer-string))))
922
923 (defun www-html-display-text (text)
924   (princ
925    (with-temp-buffer
926      (insert text)
927      (goto-char (point-min))
928      (while (search-forward "<" nil t)
929        (replace-match "&lt;" nil t))
930      (goto-char (point-min))
931      (while (search-forward ">" nil t)
932        (replace-match "&gt;" nil t))
933      (goto-char (point-min))
934      (while (re-search-forward "\\[\\[\\([^]|[]+\\)|\\([^][]+\\)\\]\\]" nil t)
935        (replace-match
936         (format "<a href=\"%s\">%s</a>"
937                 (match-string 2)
938                 (match-string 1))
939         nil t))
940      (encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
941      (goto-char (point-min))
942      (while (search-forward "&GT-" nil t)
943        (replace-match "&amp;GT-" nil t))
944      (buffer-string))))
945
946 (defun www-html-display-paragraph (text)
947   (princ "<p>")
948   (www-html-display-text text)
949   (princ "</p>\n"))
950
951
952 ;;; @ for GlyphWiki
953 ;;;
954
955 (defvar coded-charset-GlyphWiki-id-alist
956   '((=ucs               "u"     4 x nil)
957     (=adobe-japan1-0    "aj1-"  5 d nil)
958     (=adobe-japan1-1    "aj1-"  5 d nil)
959     (=adobe-japan1-2    "aj1-"  5 d nil)
960     (=adobe-japan1-3    "aj1-"  5 d nil)
961     (=adobe-japan1-4    "aj1-"  5 d nil)
962     (=adobe-japan1-5    "aj1-"  5 d nil)
963     (=adobe-japan1-6    "aj1-"  5 d nil)
964     (=decomposition@cid)
965     (=decomposition@hanyo-denshi)
966     (=hanyo-denshi/ks   "koseki-" 6 d nil)
967     (=>>hanyo-denshi/ks "koseki-" 6 d nil)
968     (=ucs@jis           "u"     4 x nil)
969     (=ucs@cns           "u"     4 x "-t")
970     (=jis-x0212         "jsp-"  4 x nil)
971     (=jis-x0213-1@2000  "jx1-2000-" 4 x nil)
972     (=jis-x0213-1@2004  "jx1-2004-" 4 x nil)
973     (=jis-x0213-2       "jx2-"  4 x nil)
974     (=gt                "gt-"   5 d nil)
975     (=daikanwa          "dkw-"  5 d nil)
976     (=gt-k              "gt-k"  5 d nil)
977     (=jef-china3        "jc3-"  4 x nil)
978     (=big5              "b-"    4 x nil)
979     (=ucs@ks            "u"     4 x "-k")
980     (=ks-x1001          "k0-"   4 x nil)
981     (=jis-x0208@1978/1pr "j78-" 4 x nil)
982     (=jis-x0208@1978/-4pr "j78-" 4 x nil)
983     (=jis-x0208@1978    "j78-"  4 x nil)
984     (=>>>jis-x0208@1978 "j78-"  4 x nil)
985     (=>>jis-x0208@1978  "j78-"  4 x nil)
986     (=+>jis-x0208@1978  "j78-"  4 x nil)
987     (=ucs@JP            "u"     4 x nil)
988     (=ucs@gb            "u"     4 x "-g")
989     (=ucs@iso           "u"     4 x "-u")
990     (=ucs@unicode       "u"     4 x "-us")
991     (=big5-cdp          "cdp-"  4 x nil)
992     (=>>big5-cdp        "cdp-"  4 x nil)
993     (=cns11643-1        "c1-"   4 x nil)
994     (=cns11643-2        "c2-"   4 x nil)
995     (=cns11643-3        "c3-"   4 x nil)
996     (=cns11643-4        "c4-"   4 x nil)
997     (=cns11643-5        "c5-"   4 x nil)
998     (=cns11643-6        "c6-"   4 x nil)
999     (=cns11643-7        "c7-"   4 x nil)
1000     (=jis-x0208         "j90-"  4 x nil)
1001     (=>>>jis-x0208      "j90-"  4 x nil)
1002     (=>>jis-x0208       "j90-"  4 x nil)
1003     (=+>jis-x0208       "j90-"  4 x nil)
1004     (=jis-x0208@1990    "j90-"  4 x nil)
1005     (=>>>jis-x0208@1990 "j90-"  4 x nil)
1006     (=>>jis-x0208@1990  "j90-"  4 x nil)
1007     (=+>jis-x0208@1990  "j90-"  4 x nil)
1008     (=jis-x0208@1983    "j83-"  4 x nil)
1009     (=>>>jis-x0208@1983 "j83-"  4 x nil)
1010     (=>>jis-x0208@1983  "j83-"  4 x nil)
1011     (=+>jis-x0208@1983  "j83-"  4 x nil)
1012     (=cbeta             "cb"    5 d nil)
1013     (=>>daikanwa        "dkw-"  5 d nil)
1014     ))
1015
1016 (defun char-GlyphWiki-id (char)
1017   (let ((rest coded-charset-GlyphWiki-id-alist)
1018         spec ret code)
1019     (while (and rest
1020                 (setq spec (pop rest))
1021                 (null (setq ret (char-feature char (car spec))))))
1022     (when ret
1023       (or
1024        (and (listp ret)
1025             (mapconcat #'char-GlyphWiki-id ret "-"))
1026        (and (memq (car spec) '(=ucs@unicode '=ucs@iso))
1027             (cond
1028              ((and (or (encode-char char '=jis-x0208@1990)
1029                        (encode-char char '=jis-x0212)
1030                        (encode-char char '=jis-x0213-1))
1031                    (setq code (encode-char char '=ucs@jis)))
1032               (format "u%04x" code)
1033               )
1034              ((and (or (encode-char char '=gb2312)
1035                        (encode-char char '=gb12345))
1036                    (setq code (encode-char char '=ucs@gb)))
1037               (format "u%04x-g" code)
1038               )
1039              ((and (or (encode-char char '=cns11643-1)
1040                        (encode-char char '=cns11643-2)
1041                        (encode-char char '=cns11643-3)
1042                        (encode-char char '=cns11643-4)
1043                        (encode-char char '=cns11643-5)
1044                        (encode-char char '=cns11643-6)
1045                        (encode-char char '=cns11643-7))
1046                    (setq code (encode-char char '=ucs@cns)))
1047               (format "u%04x-t" code)
1048               )
1049              ((and (encode-char char '=ks-x1001)
1050                    (setq code (encode-char char '=ucs@ks)))
1051               (format "u%04x-k" code)
1052               )))
1053        (format (format "%s%%0%d%s%s"
1054                        (nth 1 spec)
1055                        (nth 2 spec)
1056                        (nth 3 spec)
1057                        (or (nth 4 spec) ""))
1058                ret)))))
1059
1060
1061 ;;; @ End.
1062 ;;;
1063
1064 (provide 'cwiki-common)
1065
1066 ;;; cwiki-common.el ends here