482331e7ce75a0a4a25368c7d66ccd2643a48502
[chise/ids.git] / www / www-ids-find.el
1 (require 'ids-find)
2
3 (defun decode-url-string (string &optional coding-system)
4   (if (> (length string) 0)
5       (let ((i 0)
6             dest)
7         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
8           (setq dest (concat dest
9                              (substring string i (match-beginning 0))
10                              (char-to-string
11                               (int-char
12                                (string-to-int (match-string 1 string) 16))))
13                 i (match-end 0)))
14         (decode-coding-string
15          (concat dest (substring string i))
16          coding-system))))
17
18 (defconst www-ids-find-version "0.24.0")
19
20 (defvar www-ids-find-ideographic-products-file-name
21   (expand-file-name "ideographic-products"
22                     (expand-file-name
23                      "feature"
24                      (expand-file-name
25                       "character"
26                       chise-system-db-directory))))
27
28 (defvar www-ids-find-chise-link-map-url-prefix
29   "http://kamichi.jp/chise_linkmap/map.cgi?code=")
30
31 (defvar www-ids-find-tang-chars-file-name
32   "~tomo/projects/chise/ids/www/tang-chars.udd")
33
34 (defun www-ids-find-format-char (c)
35   (let ((str (encode-coding-string (format "%c" c) 'utf-8-er))
36         plane code)
37     (princ
38      (with-temp-buffer
39        (cond
40         ((string-match "&CB\\([0-9]+\\);" str)
41          (setq code (string-to-int (match-string 1 str)))
42          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
43          (insert str)
44          (insert (format "\"><img alt=\"CB%05d\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/cb-gaiji/%02d/CB%05d.gif\">\n"
45                          code (/ code 1000) code))
46          (insert (format "CB%05d</a>" code))
47          )
48         ((string-match "&JC3-\\([0-9A-F]+\\);" str)
49          (setq code (string-to-int (match-string 1 str) 16))
50          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
51          (insert str)
52          (insert (format "\"><img alt=\"JC3-%04X\" src=\"http://kanji.zinbun.kyoto-u.ac.jp/db/CHINA3/Gaiji/%04x.gif\">\n"
53                          code code))
54          (insert (format "JC3-%04X</a>" code))
55          )
56         ((string-match "&J\\(78\\|83\\|90\\|SP\\)-\\([0-9A-F]+\\);" str)
57          (setq plane (match-string 1 str)
58                code (string-to-int (match-string 2 str) 16))
59          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
60          (insert str)
61          (insert (format "\"><img alt=\"J%s-%04X\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/JIS-%s/%02d-%02d.gif\">\n"
62                          plane code plane
63                          (- (lsh code -8) 32)
64                          (- (logand code 255) 32)))
65          (insert (format "J%s-%04X</a>" plane code))
66          )
67         ((string-match "&G\\([01]\\)-\\([0-9A-F]+\\);" str)
68          (setq plane (string-to-int (match-string 1 str))
69                code (string-to-int (match-string 2 str) 16))
70          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
71          (insert str)
72          (insert (format "\"><img alt=\"G%d-%04X\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/GB%d/%02d-%02d.gif\">\n"
73                          plane code plane
74                          (- (lsh code -8) 32)
75                          (- (logand code 255) 32)))
76          (insert (format "G%d-%04X</a>" plane code))
77          )
78         ((string-match "&C\\([1-7]\\)-\\([0-9A-F]+\\);" str)
79          (setq plane (string-to-int (match-string 1 str))
80                code (string-to-int (match-string 2 str) 16))
81          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
82          (insert str)
83          (insert (format "\"><img alt=\"C%d-%04X\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/CNS%d/%04X.gif\">\n"
84                          plane code plane code))
85          (insert (format "C%d-%04X</a>" plane code))
86          )
87         ((string-match "&ZOB-\\([0-9]+\\);" str)
88          (setq code (string-to-int (match-string 1 str)))
89          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
90          (insert str)
91          (insert (format "\"><img alt=\"ZOB-%04d\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/ZOB-1968/%04d.png\">\n"
92                          code code))
93          (insert (format "ZOB-%04d</a>" code))
94          )
95         (t
96          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
97          ;; (insert str)
98          (insert
99           (mapconcat (lambda (c)
100                        (if (<= (char-int c) #x7F)
101                            (char-to-string c)
102                          (format "%%%02X" c)))
103                      str ""))
104          (insert "\">")
105          (insert str)
106          (insert "</a>")
107          ))
108        (goto-char (point-min))
109        (while (search-forward "&" nil t)
110          (replace-match "&amp;" t 'literal))
111        (buffer-string)))))
112   
113 (defun www-ids-find-format-line (c is)
114   (let (code ucs)
115     (www-ids-find-format-char c)
116     (princ
117      (or (if (setq ucs (or (char-ucs c)
118                            (encode-char c 'ucs)))
119              (format
120               " <a href=\"http://www.unicode.org/cgi-bin/GetUnihanData.pl?codepoint=%X\">%s</a>"
121               ucs
122               (cond ((<= ucs #xFFFF)
123                      (format "U+%04X" ucs))
124                     ((<= ucs #x10FFFF)
125                      (format "U-%08X" ucs))))
126            "          ")))
127     (when ucs
128       (princ
129        (format " <a href=\"%s%X\">(link map)</a>"
130                www-ids-find-chise-link-map-url-prefix ucs)))
131     (princ " ")
132     (when is
133       (princ
134        (with-temp-buffer
135          (insert
136           (encode-coding-string
137            (ideographic-structure-to-ids is)
138            'utf-8-er))
139          (goto-char (point-min))
140          (while (re-search-forward "&CB\\([0-9]+\\);" nil t)
141            (setq code (string-to-int (match-string 1)))
142            (replace-match
143             (format "<img alt=\"CB%05d\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/cb-gaiji/%02d/CB%05d.gif\">"
144                     code (/ code 1000) code)
145             t 'literal))
146          (buffer-string))))
147     (when (and ucs
148                (with-current-buffer
149                    (find-file-noselect
150                     www-ids-find-tang-chars-file-name)
151                  (goto-char (point-min))
152                  (re-search-forward (format "^%d$" ucs) nil t)))
153       (princ
154        (format " <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar?query=%s\">"
155                (mapconcat
156                 (lambda (c)
157                   (format "%%%02X" (char-int c)))
158                 (encode-coding-string (char-to-string c)
159                                       'utf-8-jp)
160                 "")))
161       (princ (encode-coding-string "\e$B"M\e(B[\e$BEbBeBsK\\e(B]</a>" 'utf-8-jp-er)))
162     (princ "<br>\n")))
163
164 (defun www-ids-insert-chars-including-components (components
165                                                   &optional ignored-chars)
166   (let ((products (copy-list (ideographic-products-find components)))
167         is as bs) 
168     (dolist (c (cond
169                 ((> (length products) 10000)
170                  products)
171                 ((> (length products) 4096)
172                  (sort products
173                        (lambda (a b)
174                          (< (char-int a)(char-int b))))
175                  )
176                 ((> (length products) 512)
177                  (sort products
178                        (lambda (a b)
179                          (if (setq as (char-total-strokes a))
180                              (if (setq bs (char-total-strokes b))
181                                  (if (= as bs)
182                                      (< (char-int a)(char-int b))
183                                    (< as bs))
184                                t)
185                            (< (char-int a)(char-int b)))))
186                  )
187                 (t
188                  (sort products
189                        (lambda (a b)
190                          (if (setq as (char-total-strokes a))
191                              (if (setq bs (char-total-strokes b))
192                                  (if (= as bs)
193                                      (ideograph-char< a b)
194                                    (< as bs))
195                                t)
196                            (ideograph-char< a b))))
197                  )))
198       (unless (memq c ignored-chars)
199         (setq is (char-feature c 'ideographic-structure))
200         (princ "<li>")
201         (www-ids-find-format-line c is)
202         (princ "<ul>\n")
203         (setq ignored-chars
204               (www-ids-insert-chars-including-components
205                (char-to-string c)
206                (cons c ignored-chars)))
207         (princ "</ul>\n")
208         )
209       ))
210   ignored-chars)
211
212 (defun www-batch-ids-find ()
213   (let ((components (car command-line-args-left))
214         (coded-charset-entity-reference-alist
215          (list*
216           '(=cns11643-1         "C1-" 4 X)
217           '(=cns11643-2         "C2-" 4 X)
218           '(=cns11643-3         "C3-" 4 X)
219           '(=cns11643-4         "C4-" 4 X)
220           '(=cns11643-5         "C5-" 4 X)
221           '(=cns11643-6         "C6-" 4 X)
222           '(=cns11643-7         "C7-" 4 X)
223           '(=gb2312             "G0-" 4 X)
224           '(=gb12345            "G1-" 4 X)
225           '(=jis-x0208@1990     "J90-" 4 X)
226           '(=jis-x0212          "JSP-" 4 X)
227           '(=cbeta              "CB" 5 d)
228           '(=jef-china3         "JC3-" 4 X)
229           '(=jis-x0208@1978     "J78-" 4 X)
230           '(=jis-x0208@1983     "J83-" 4 X)
231           '(=daikanwa           "M-" 5 d)
232           coded-charset-entity-reference-alist))
233         )
234     (setq command-line-args-left (cdr command-line-args-left))
235     (cond
236      ((stringp components)
237       (if (string-match "^components=" components)
238           (setq components (substring components (match-end 0))))
239       (setq components
240             (if (> (length components) 0)
241                 (decode-url-string components 'utf-8-er)
242               nil))
243       )
244      (t
245       (setq components nil)
246       ))
247     (princ "Content-Type: text/html; charset=UTF-8
248
249 <!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"
250             \"http://www.w3.org/TR/html4/loose.dtd\">
251 <html lang=\"ja\">
252 <head>
253 <title>CHISE IDS Find</title>
254 </head>
255
256 <body>
257
258 <h1>")
259     (princ (encode-coding-string "CHISE IDS \e$B4A;z8!:w\e(B" 'utf-8-jp-er))
260     (princ "</h1>")
261     (princ "
262 <p>Version ")
263     (princ www-ids-find-version)
264     (princ (format-time-string
265             " (Last-modified: %Y-%m-%d %H:%M:%S)"
266             (nth 5
267                  (file-attributes
268                   www-ids-find-ideographic-products-file-name))))
269     (princ "
270 <hr>
271 <p>
272 <form action=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/ids-find\" method=\"GET\">
273 ")
274     (princ (encode-coding-string "\e$BItIJJ8;zNs\e(B" 'utf-8-jp-er))
275     (princ " <input type=\"text\" name=\"components\" size=\"30\" maxlength=\"30\" value=\"")
276     (if (> (length components) 0)
277         (princ (encode-coding-string components 'utf-8-er)))
278     (princ "\">
279 <input type=\"submit\" value=\"")
280     (princ (encode-coding-string "\e$B8!:w3+;O\e(B" 'utf-8-jp-er))
281     (princ "\">
282 </form>
283
284 ")
285     (unless (file-newer-than-file-p
286              www-ids-find-ideographic-products-file-name
287              (locate-file (car command-line-args) exec-path))
288       (princ (encode-coding-string "<hr>
289 <p>
290 \e$B8=:_!"%7%9%F%`$N99?7:n6HCf$G$9!#$7$P$i$/$*BT$A$/$@$5$$!#\e(B
291 <hr>
292 " 'utf-8-jp-er))
293       ;; (setq components nil)
294       )
295     (cond
296      (components
297       ;; (map-char-attribute
298       ;;  (lambda (c v)
299       ;;    (when (every (lambda (p)
300       ;;                   (ideographic-structure-member p v))
301       ;;                 components)
302       ;;      (princ (encode-coding-string
303       ;;              (ids-find-format-line c v)
304       ;;              'utf-8-jp-er))
305       ;;      (princ "<br>\n")
306       ;;      )
307       ;;    nil)
308       ;;  'ideographic-structure)
309       (when (= (length components) 1)
310         (www-ids-find-format-line (aref components 0)
311                                   (char-feature (aref components 0)
312                                                 'ideographic-structure)))
313       ;; (dolist (c (ideographic-products-find components))
314       ;;   (setq is (char-feature c 'ideographic-structure))
315       ;;   ;; to avoid problems caused by wrong indexes
316       ;;   (when (every (lambda (c)
317       ;;                  (ideographic-structure-member c is))
318       ;;                components)
319       ;;     (www-ids-find-format-line c is)))
320       (princ "<ul>\n")
321       (www-ids-insert-chars-including-components components)
322       (princ "</ul>\n")
323       )
324      (t
325       (princ (encode-coding-string "<hr>
326 <p>
327 \e$B;XDj$7$?ItIJ$rA4$F4^$`4A;z$N0lMw$rI=<($7$^$9!#\e(B
328 <p>
329 CHISE \e$B$GMQ$$$i$l$k<BBV;2>H7A<0!JNc!'\e(B&amp;M-00256;\e$B!K$GItIJ$r;XDj$9$k;v$b$G$-$^$9!#\e(B" 'utf-8-jp-er))
330       (princ (encode-coding-string "
331 <p>
332 \[Links\]
333 <ul>
334 <li><a href=\"http://www.shuiren.org/chuden/toyoshi/syoseki/chise_ids.html\"
335 >\e$B!V\e(BCHISE IDS FIND\e$B$G4A;z$r8!:w!W\e(B</a> \e$B!=\e(B \e$B;3ED?r?N$5$s!J\e(B<a
336 href=\"http://www.shuiren.org/\">\e$B?g?MDb\e(B</a>\e$B!K$K$h$k2r@b\e(B
337 </ul>
338 <ul>
339 <li><a href=\"http://www.karitsu.org/tools/firefox_plugin.htm\"
340 >Firefox \e$BMQ\e(B plugin</a> by \e$B=);3M[0lO:$5$s!J\e(B<a href=\"http://www.karitsu.org/\"
341 >\e$B2aN)c7\e(B</a>\e$B!K\e(B
342 </ul>
343 <ul>
344 <li><a href=\"http://cvs.m17n.org/viewcvs/chise/ids/www/www-ids-find.el?view=markup\"
345 >www-ids-find.el (source file (Emacs Lisp part))
346 <li><a href=\"http://kanji.zinbun.kyoto-u.ac.jp/projects/chise/ids/\"
347 >\e$B!V\e(BCHISE \e$B4A;z9=B$>pJs%G!<%?%Y!<%9!W\e(B</a>
348 <li><a href=\"http://fonts.jp/chise_linkmap/\"
349 >\e$B!V\e(Bchise_linkmap : CHISE \e$B4A;zO"4D?^!W\e(B</a> by \e$B>eCO9(0l$5$s\e(B
350 <li><a href=\"http://kanji.zinbun.kyoto-u.ac.jp/projects/chise/\"
351 >CHISE Project</a>
352 </ul>
353 <ul>
354 <li><a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar\"
355 >\e$B!VBsK\J8;z%G!<%?%Y!<%9!W\e(B</a> by
356 <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/\"
357 >\e$B5~ETBg3X\e(B21\e$B@$5*\e(BCOE\e$B!VEl%"%8%"@$3&$N?MJ8>pJs3X8&5f650i5rE@!W\e(B</a>
358 <li><a href=\"http://www.unicode.org/\"
359 >Unicode</a>
360 </ul>"
361  'utf-8-jp-er))
362
363       ))
364     (princ "<hr>")
365     (princ "<p>
366 Copyright (C) 2005, 2006, 2007, 2008 <a href=\"http://kanji.zinbun.kyoto-u.ac.jp/~tomo/\"
367 >MORIOKA Tomohiko</a>")
368     (princ
369      (format
370       "<p>Powered by <a
371 href=\"http://kanji.zinbun.kyoto-u.ac.jp/projects/chise/xemacs/\"
372 >XEmacs CHISE</a> %s."
373       xemacs-chise-version))
374     (princ "
375 </body>
376 </html>
377 ")))