(www-ids-find-format-line): Display glyphs of JIS X0208, 0212, GB2312,
[chise/ids.git] / www / www-ids-find.el
1 (require 'ids-find)
2
3 (defun decode-url-string (string &optional coding-system)
4   (if (> (length string) 0)
5       (let ((i 0)
6             dest)
7         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
8           (setq dest (concat dest
9                              (substring string i (match-beginning 0))
10                              (char-to-string
11                               (int-char
12                                (string-to-int (match-string 1 string) 16))))
13                 i (match-end 0)))
14         (decode-coding-string
15          (concat dest (substring string i))
16          coding-system))))
17
18 (defvar www-ids-find-tang-chars-file-name
19   "~tomo/projects/chise/ids/www/tang-chars.udd")
20
21 (defun www-ids-find-format-line (c is)
22   (let ((str (encode-coding-string (format "%c" c) 'utf-8-er))
23         plane code ucs)
24     (princ
25      (with-temp-buffer
26        (cond
27         ((string-match "&CB\\([0-9]+\\);" str)
28          (setq code (string-to-int (match-string 1 str)))
29          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
30          (insert str)
31          (insert (format "\"><img alt=\"CB%05d\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/cb-gaiji/%02d/CB%05d.gif\">\n"
32                          code (/ code 1000) code))
33          (insert (format "CB%05d</a>" code))
34          )
35         ((string-match "&JC3-\\([0-9A-F]+\\);" str)
36          (setq code (string-to-int (match-string 1 str) 16))
37          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
38          (insert str)
39          (insert (format "\"><img alt=\"JC3-%04X\" src=\"http://kanji.zinbun.kyoto-u.ac.jp/db/CHINA3/Gaiji/%04x.gif\">\n"
40                          code code))
41          (insert (format "JC3-%04X</a>" code))
42          )
43         ((string-match "&J\\(78\\|83\\|90\\|SP\\)-\\([0-9A-F]+\\);" str)
44          (setq plane (match-string 1 str)
45                code (string-to-int (match-string 2 str) 16))
46          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
47          (insert str)
48          (insert (format "\"><img alt=\"J%s-%04X\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/JIS-%s/%02d-%02d.gif\">\n"
49                          plane code plane
50                          (- (lsh code -8) 32)
51                          (- (logand code 255) 32)))
52          (insert (format "J%s-%04X</a>" plane code))
53          )
54         ((string-match "&G\\([01]\\)-\\([0-9A-F]+\\);" str)
55          (setq plane (string-to-int (match-string 1 str))
56                code (string-to-int (match-string 2 str) 16))
57          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
58          (insert str)
59          (insert (format "\"><img alt=\"G%d-%04X\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/GB%d/%02d-%02d.gif\">\n"
60                          plane code plane
61                          (- (lsh code -8) 32)
62                          (- (logand code 255) 32)))
63          (insert (format "G%d-%04X</a>" plane code))
64          )
65         ((string-match "&C\\([1-7]\\)-\\([0-9A-F]+\\);" str)
66          (setq plane (string-to-int (match-string 1 str))
67                code (string-to-int (match-string 2 str) 16))
68          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
69          (insert str)
70          (insert (format "\"><img alt=\"C%d-%04X\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/CNS%d/%04X.gif\">\n"
71                          plane code plane code))
72          (insert (format "C%d-%04X</a>" plane code))
73          )
74         (t
75          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
76          (insert str)
77          (insert "\">")
78          (insert str)
79          (insert "</a>")
80          ))
81        (goto-char (point-min))
82        (while (search-forward "&" nil t)
83          (replace-match "&amp;" t 'literal))
84        (buffer-string)
85        ))
86     (princ
87      (or (if (setq ucs (or (char-ucs c)
88                            (encode-char c 'ucs)))
89              (format " <a href=\"http://www.unicode.org/cgi-bin/GetUnihanData.pl?codepoint=%X\">%s</a>"
90                      ucs
91                      (cond ((<= ucs #xFFFF)
92                             (format "U+%04X" ucs))
93                            ((<= ucs #x10FFFF)
94                             (format "U-%08X" ucs))))
95            "          ")))
96     (princ " ")
97     (when is
98       (princ
99        (with-temp-buffer
100          (insert
101           (encode-coding-string
102            (ideographic-structure-to-ids is)
103            'utf-8-jp-er))
104          (goto-char (point-min))
105          (while (re-search-forward "&CB\\([0-9]+\\);" nil t)
106            (setq code (string-to-int (match-string 1)))
107            (replace-match
108             (format "<img alt=\"CB%05d\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/cb-gaiji/%02d/CB%05d.gif\">"
109                     code (/ code 1000) code)
110             t 'literal))
111          (buffer-string))))
112     (when (and ucs
113                (with-current-buffer
114                    (find-file-noselect
115                     www-ids-find-tang-chars-file-name)
116                  (goto-char (point-min))
117                  (re-search-forward (format "^%d$" ucs) nil t)))
118       (princ
119        (format " <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar?query=%s\">"
120                (mapconcat
121                 (lambda (c)
122                   (format "%%%02X" (char-int c)))
123                 (encode-coding-string (char-to-string c)
124                                       'utf-8-jp)
125                 "")))
126       (princ (encode-coding-string "\e$B"M\e(B[\e$BEbBeBsK\\e(B]</a>" 'utf-8-jp-er)))
127     (princ "<br>\n")))
128
129 (defun www-batch-ids-find ()
130   (let ((components (car command-line-args-left))
131         (coded-charset-entity-reference-alist
132          (list*
133           '(=cns11643-1         "C1-" 4 X)
134           '(=cns11643-2         "C2-" 4 X)
135           '(=cns11643-3         "C3-" 4 X)
136           '(=cns11643-4         "C4-" 4 X)
137           '(=cns11643-5         "C5-" 4 X)
138           '(=cns11643-6         "C6-" 4 X)
139           '(=cns11643-7         "C7-" 4 X)
140           '(=gb2312             "G0-" 4 X)
141           '(=gb12345            "G1-" 4 X)
142           '(=jis-x0208@1990     "J90-" 4 X)
143           '(=jis-x0212          "JSP-" 4 X)
144           '(=cbeta              "CB" 5 d)
145           '(=jef-china3         "JC3-" 4 X)
146           '(=jis-x0208@1978     "J78-" 4 X)
147           '(=jis-x0208@1983     "J83-" 4 X)
148           '(=daikanwa           "M-" 5 d)
149           coded-charset-entity-reference-alist))
150         is)
151     (setq command-line-args-left (cdr command-line-args-left))
152     (cond
153      ((stringp components)
154       (if (string-match "^components=" components)
155           (setq components (substring components (match-end 0))))
156       (setq components
157             (if (> (length components) 0)
158                 (decode-url-string components 'utf-8-jp-er)
159               nil))
160       )
161      (t
162       (setq components nil)
163       ))
164     (princ "Content-Type: text/html; charset=\"UTF-8\"
165
166 <!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"
167             \"http://www.w3.org/TR/html4/loose.dtd\">
168 <html lang=\"ja\">
169 <head>
170 <title>CHISE IDS Find</title>
171 </head>
172
173 <body>
174
175 <h1>")
176     (princ (encode-coding-string "CHISE IDS \e$B4A;z8!:w\e(B" 'utf-8-jp-er))
177     (princ "</h1>
178 <p>
179 <form action=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/ids-find\" method=\"GET\">
180 ")
181     (princ (encode-coding-string "\e$BItIJJ8;zNs\e(B" 'utf-8-jp-er))
182     (princ " <input type=\"text\" name=\"components\" size=\"30\" maxlength=\"30\" value=\"")
183     (if (> (length components) 0)
184         (princ (encode-coding-string components 'utf-8-jp-er)))
185     (princ "\">
186 <input type=\"submit\" value=\"")
187     (princ (encode-coding-string "\e$B8!:w3+;O\e(B" 'utf-8-jp-er))
188     (princ "\">
189 </form>
190
191 ")
192     (cond
193      (components
194       ;; (map-char-attribute
195       ;;  (lambda (c v)
196       ;;    (when (every (lambda (p)
197       ;;                   (ideographic-structure-member p v))
198       ;;                 components)
199       ;;      (princ (encode-coding-string
200       ;;              (ids-find-format-line c v)
201       ;;              'utf-8-jp-er))
202       ;;      (princ "<br>\n")
203       ;;      )
204       ;;    nil)
205       ;;  'ideographic-structure)
206       (when (= (length components) 1)
207         (www-ids-find-format-line (aref components 0)
208                                   (char-feature (aref components 0)
209                                                 'ideographic-structure)))
210       (dolist (c (ideographic-products-find components))
211         (setq is (char-feature c 'ideographic-structure))
212         ;; to avoid problems caused by wrong indexes
213         (when (every (lambda (c)
214                        (ideographic-structure-member c is))
215                      components)
216           (www-ids-find-format-line c is)))
217       )
218      (t
219       (princ (encode-coding-string "<hr>
220 <p>
221 \e$B;XDj$7$?ItIJ$rA4$F4^$`4A;z$N0lMw$rI=<($7$^$9!#\e(B
222 <p>
223 CHISE \e$B$GMQ$$$i$l$k<BBV;2>H7A<0!JNc!'\e(B&amp;M-00256;\e$B!K$GItIJ$r;XDj$9$k;v$b$G$-$^$9!#\e(B" 'utf-8-jp-er))
224       ))
225     (princ "<hr>")
226     (princ
227      (format
228       "Powered by <a
229 href=\"http://kanji.zinbun.kyoto-u.ac.jp/projects/chise/xemacs/\"
230 >XEmacs CHISE</a> %s."
231       xemacs-chise-version))
232     (princ "
233 </body>
234 </html>
235 ")))