(www-ids-find-format-line): Add setting for "&ZOB-dddd;".
[chise/ids.git] / www / www-ids-find.el
1 (require 'ids-find)
2
3 (defun decode-url-string (string &optional coding-system)
4   (if (> (length string) 0)
5       (let ((i 0)
6             dest)
7         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
8           (setq dest (concat dest
9                              (substring string i (match-beginning 0))
10                              (char-to-string
11                               (int-char
12                                (string-to-int (match-string 1 string) 16))))
13                 i (match-end 0)))
14         (decode-coding-string
15          (concat dest (substring string i))
16          coding-system))))
17
18 (defvar www-ids-find-tang-chars-file-name
19   "~tomo/projects/chise/ids/www/tang-chars.udd")
20
21 (defun www-ids-find-format-line (c is)
22   (let ((str (encode-coding-string (format "%c" c) 'utf-8-er))
23         plane code ucs)
24     (princ
25      (with-temp-buffer
26        (cond
27         ((string-match "&CB\\([0-9]+\\);" str)
28          (setq code (string-to-int (match-string 1 str)))
29          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
30          (insert str)
31          (insert (format "\"><img alt=\"CB%05d\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/cb-gaiji/%02d/CB%05d.gif\">\n"
32                          code (/ code 1000) code))
33          (insert (format "CB%05d</a>" code))
34          )
35         ((string-match "&JC3-\\([0-9A-F]+\\);" str)
36          (setq code (string-to-int (match-string 1 str) 16))
37          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
38          (insert str)
39          (insert (format "\"><img alt=\"JC3-%04X\" src=\"http://kanji.zinbun.kyoto-u.ac.jp/db/CHINA3/Gaiji/%04x.gif\">\n"
40                          code code))
41          (insert (format "JC3-%04X</a>" code))
42          )
43         ((string-match "&J\\(78\\|83\\|90\\|SP\\)-\\([0-9A-F]+\\);" str)
44          (setq plane (match-string 1 str)
45                code (string-to-int (match-string 2 str) 16))
46          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
47          (insert str)
48          (insert (format "\"><img alt=\"J%s-%04X\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/JIS-%s/%02d-%02d.gif\">\n"
49                          plane code plane
50                          (- (lsh code -8) 32)
51                          (- (logand code 255) 32)))
52          (insert (format "J%s-%04X</a>" plane code))
53          )
54         ((string-match "&G\\([01]\\)-\\([0-9A-F]+\\);" str)
55          (setq plane (string-to-int (match-string 1 str))
56                code (string-to-int (match-string 2 str) 16))
57          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
58          (insert str)
59          (insert (format "\"><img alt=\"G%d-%04X\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/GB%d/%02d-%02d.gif\">\n"
60                          plane code plane
61                          (- (lsh code -8) 32)
62                          (- (logand code 255) 32)))
63          (insert (format "G%d-%04X</a>" plane code))
64          )
65         ((string-match "&C\\([1-7]\\)-\\([0-9A-F]+\\);" str)
66          (setq plane (string-to-int (match-string 1 str))
67                code (string-to-int (match-string 2 str) 16))
68          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
69          (insert str)
70          (insert (format "\"><img alt=\"C%d-%04X\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/CNS%d/%04X.gif\">\n"
71                          plane code plane code))
72          (insert (format "C%d-%04X</a>" plane code))
73          )
74         ((string-match "&ZOB-\\([0-9]+\\);" str)
75          (setq code (string-to-int (match-string 1 str)))
76          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
77          (insert str)
78          (insert (format "\"><img alt=\"ZOB-%04d\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/ZOB-1968/%04d.png\">\n"
79                          code code))
80          (insert (format "ZOB-%04d</a>" code))
81          )
82         (t
83          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
84          (insert str)
85          (insert "\">")
86          (insert str)
87          (insert "</a>")
88          ))
89        (goto-char (point-min))
90        (while (search-forward "&" nil t)
91          (replace-match "&amp;" t 'literal))
92        (buffer-string)
93        ))
94     (princ
95      (or (if (setq ucs (or (char-ucs c)
96                            (encode-char c 'ucs)))
97              (format " <a href=\"http://www.unicode.org/cgi-bin/GetUnihanData.pl?codepoint=%X\">%s</a>"
98                      ucs
99                      (cond ((<= ucs #xFFFF)
100                             (format "U+%04X" ucs))
101                            ((<= ucs #x10FFFF)
102                             (format "U-%08X" ucs))))
103            "          ")))
104     (princ " ")
105     (when is
106       (princ
107        (with-temp-buffer
108          (insert
109           (encode-coding-string
110            (ideographic-structure-to-ids is)
111            'utf-8-jp-er))
112          (goto-char (point-min))
113          (while (re-search-forward "&CB\\([0-9]+\\);" nil t)
114            (setq code (string-to-int (match-string 1)))
115            (replace-match
116             (format "<img alt=\"CB%05d\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/cb-gaiji/%02d/CB%05d.gif\">"
117                     code (/ code 1000) code)
118             t 'literal))
119          (buffer-string))))
120     (when (and ucs
121                (with-current-buffer
122                    (find-file-noselect
123                     www-ids-find-tang-chars-file-name)
124                  (goto-char (point-min))
125                  (re-search-forward (format "^%d$" ucs) nil t)))
126       (princ
127        (format " <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar?query=%s\">"
128                (mapconcat
129                 (lambda (c)
130                   (format "%%%02X" (char-int c)))
131                 (encode-coding-string (char-to-string c)
132                                       'utf-8-jp)
133                 "")))
134       (princ (encode-coding-string "\e$B"M\e(B[\e$BEbBeBsK\\e(B]</a>" 'utf-8-jp-er)))
135     (princ "<br>\n")))
136
137 (defun www-batch-ids-find ()
138   (let ((components (car command-line-args-left))
139         (coded-charset-entity-reference-alist
140          (list*
141           '(=cns11643-1         "C1-" 4 X)
142           '(=cns11643-2         "C2-" 4 X)
143           '(=cns11643-3         "C3-" 4 X)
144           '(=cns11643-4         "C4-" 4 X)
145           '(=cns11643-5         "C5-" 4 X)
146           '(=cns11643-6         "C6-" 4 X)
147           '(=cns11643-7         "C7-" 4 X)
148           '(=gb2312             "G0-" 4 X)
149           '(=gb12345            "G1-" 4 X)
150           '(=jis-x0208@1990     "J90-" 4 X)
151           '(=jis-x0212          "JSP-" 4 X)
152           '(=cbeta              "CB" 5 d)
153           '(=jef-china3         "JC3-" 4 X)
154           '(=jis-x0208@1978     "J78-" 4 X)
155           '(=jis-x0208@1983     "J83-" 4 X)
156           '(=daikanwa           "M-" 5 d)
157           coded-charset-entity-reference-alist))
158         is)
159     (setq command-line-args-left (cdr command-line-args-left))
160     (cond
161      ((stringp components)
162       (if (string-match "^components=" components)
163           (setq components (substring components (match-end 0))))
164       (setq components
165             (if (> (length components) 0)
166                 (decode-url-string components 'utf-8-jp-er)
167               nil))
168       )
169      (t
170       (setq components nil)
171       ))
172     (princ "Content-Type: text/html; charset=\"UTF-8\"
173
174 <!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"
175             \"http://www.w3.org/TR/html4/loose.dtd\">
176 <html lang=\"ja\">
177 <head>
178 <title>CHISE IDS Find</title>
179 </head>
180
181 <body>
182
183 <h1>")
184     (princ (encode-coding-string "CHISE IDS \e$B4A;z8!:w\e(B" 'utf-8-jp-er))
185     (princ "</h1>
186 <p>
187 <form action=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/ids-find\" method=\"GET\">
188 ")
189     (princ (encode-coding-string "\e$BItIJJ8;zNs\e(B" 'utf-8-jp-er))
190     (princ " <input type=\"text\" name=\"components\" size=\"30\" maxlength=\"30\" value=\"")
191     (if (> (length components) 0)
192         (princ (encode-coding-string components 'utf-8-jp-er)))
193     (princ "\">
194 <input type=\"submit\" value=\"")
195     (princ (encode-coding-string "\e$B8!:w3+;O\e(B" 'utf-8-jp-er))
196     (princ "\">
197 </form>
198
199 ")
200     (cond
201      (components
202       ;; (map-char-attribute
203       ;;  (lambda (c v)
204       ;;    (when (every (lambda (p)
205       ;;                   (ideographic-structure-member p v))
206       ;;                 components)
207       ;;      (princ (encode-coding-string
208       ;;              (ids-find-format-line c v)
209       ;;              'utf-8-jp-er))
210       ;;      (princ "<br>\n")
211       ;;      )
212       ;;    nil)
213       ;;  'ideographic-structure)
214       (when (= (length components) 1)
215         (www-ids-find-format-line (aref components 0)
216                                   (char-feature (aref components 0)
217                                                 'ideographic-structure)))
218       (dolist (c (ideographic-products-find components))
219         (setq is (char-feature c 'ideographic-structure))
220         ;; to avoid problems caused by wrong indexes
221         (when (every (lambda (c)
222                        (ideographic-structure-member c is))
223                      components)
224           (www-ids-find-format-line c is)))
225       )
226      (t
227       (princ (encode-coding-string "<hr>
228 <p>
229 \e$B;XDj$7$?ItIJ$rA4$F4^$`4A;z$N0lMw$rI=<($7$^$9!#\e(B
230 <p>
231 CHISE \e$B$GMQ$$$i$l$k<BBV;2>H7A<0!JNc!'\e(B&amp;M-00256;\e$B!K$GItIJ$r;XDj$9$k;v$b$G$-$^$9!#\e(B" 'utf-8-jp-er))
232       ))
233     (princ "<hr>")
234     (princ
235      (format
236       "Powered by <a
237 href=\"http://kanji.zinbun.kyoto-u.ac.jp/projects/chise/xemacs/\"
238 >XEmacs CHISE</a> %s."
239       xemacs-chise-version))
240     (princ "
241 </body>
242 </html>
243 ")))