(www-ids-find-version): Update to 0.25.1.
[chise/ids.git] / www / www-ids-find.el
1 (require 'ids-find)
2
3 (defun decode-url-string (string &optional coding-system)
4   (if (> (length string) 0)
5       (let ((i 0)
6             dest)
7         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
8           (setq dest (concat dest
9                              (substring string i (match-beginning 0))
10                              (char-to-string
11                               (int-char
12                                (string-to-int (match-string 1 string) 16))))
13                 i (match-end 0)))
14         (decode-coding-string
15          (concat dest (substring string i))
16          coding-system))))
17
18 (defconst www-ids-find-version "0.25.1")
19
20 (defvar www-ids-find-ideographic-products-file-name
21   (expand-file-name "ideographic-products"
22                     (expand-file-name
23                      "feature"
24                      (expand-file-name
25                       "character"
26                       chise-system-db-directory))))
27
28 (defvar www-ids-find-char-viewer-url
29   "/chisewiki/view.cgi?char=")
30
31 (defvar www-ids-find-chise-link-map-url-prefix
32   "http://fonts.jp/chise_linkmap/map.cgi?code=")
33
34 (defvar www-ids-find-tang-chars-file-name
35   "~tomo/projects/chise/ids/www/tang-chars.udd")
36
37 (defun www-ids-find-format-char (c &optional code-desc)
38   (let ((str (encode-coding-string (format "%c" c) 'utf-8-er))
39         plane code)
40     (princ
41      (with-temp-buffer
42        (cond
43         ((string-match "&CB\\([0-9]+\\);" str)
44          (setq code (string-to-int (match-string 1 str)))
45          (insert (format "<a href=\"%s"
46                          www-ids-find-char-viewer-url))
47          (insert str)
48          (insert (format "\"><img alt=\"CB%05d\" src=\"/glyphs/cb-gaiji/%02d/CB%05d.gif\">\n"
49                          code (/ code 1000) code))
50          (when code-desc
51            (insert (format "CB%05d</a>" code)))
52          )
53         ((string-match "&JC3-\\([0-9A-F]+\\);" str)
54          (setq code (string-to-int (match-string 1 str) 16))
55          (insert (format "<a href=\"%s"
56                          www-ids-find-char-viewer-url))
57          (insert str)
58          (insert (format "\"><img alt=\"JC3-%04X\" src=\"http://kanji.zinbun.kyoto-u.ac.jp/db/CHINA3/Gaiji/%04x.gif\">\n"
59                          code code))
60          (when code-desc
61            (insert (format "JC3-%04X</a>" code)))
62          )
63         ((string-match "&J\\(78\\|83\\|90\\|SP\\)-\\([0-9A-F]+\\);" str)
64          (setq plane (match-string 1 str)
65                code (string-to-int (match-string 2 str) 16))
66          (insert (format "<a href=\"%s"
67                          www-ids-find-char-viewer-url))
68          (insert str)
69          (insert (format "\"><img alt=\"J%s-%04X\" src=\"/glyphs/JIS-%s/%02d-%02d.gif\">\n"
70                          plane code plane
71                          (- (lsh code -8) 32)
72                          (- (logand code 255) 32)))
73          (when code-desc
74            (insert (format "J%s-%04X</a>" plane code)))
75          )
76         ((string-match "&G\\([01]\\)-\\([0-9A-F]+\\);" str)
77          (setq plane (string-to-int (match-string 1 str))
78                code (string-to-int (match-string 2 str) 16))
79          (insert (format "<a href=\"%s"
80                          www-ids-find-char-viewer-url))
81          (insert str)
82          (insert (format "\"><img alt=\"G%d-%04X\" src=\"/glyphs/GB%d/%02d-%02d.gif\">\n"
83                          plane code plane
84                          (- (lsh code -8) 32)
85                          (- (logand code 255) 32)))
86          (when code-desc
87            (insert (format "G%d-%04X</a>" plane code)))
88          )
89         ((string-match "&C\\([1-7]\\)-\\([0-9A-F]+\\);" str)
90          (setq plane (string-to-int (match-string 1 str))
91                code (string-to-int (match-string 2 str) 16))
92          (insert (format "<a href=\"%s"
93                          www-ids-find-char-viewer-url))
94          (insert str)
95          (insert (format "\"><img alt=\"C%d-%04X\" src=\"/glyphs/CNS%d/%04X.gif\">\n"
96                          plane code plane code))
97          (when code-desc
98            (insert (format "C%d-%04X</a>" plane code)))
99          )
100         ((string-match "&ZOB-\\([0-9]+\\);" str)
101          (setq code (string-to-int (match-string 1 str)))
102          (insert (format "<a href=\"%s"
103                          www-ids-find-char-viewer-url))
104          (insert str)
105          (insert (format "\"><img alt=\"ZOB-%04d\" src=\"/glyphs/ZOB-1968/%04d.png\">\n"
106                          code code))
107          (when code-desc
108            (insert (format "ZOB-%04d</a>" code)))
109          )
110         (t
111          (insert (format "<a href=\"%s"
112                          www-ids-find-char-viewer-url))
113          ;; (insert str)
114          (insert
115           (mapconcat (lambda (c)
116                        (if (<= (char-int c) #x7F)
117                            (char-to-string c)
118                          (format "%%%02X" c)))
119                      str ""))
120          (insert "\">")
121          (insert str)
122          (insert "</a>")
123          ))
124        (goto-char (point-min))
125        (while (search-forward "&" nil t)
126          (replace-match "&amp;" t 'literal))
127        (buffer-string)))))
128   
129 (defun www-ids-find-format-line (c is)
130   (let (ucs len i ids)
131     (www-ids-find-format-char c 'code-desc)
132     (princ
133      (or (if (setq ucs (or (char-ucs c)
134                            (encode-char c 'ucs)))
135              (format
136               " <a href=\"http://www.unicode.org/cgi-bin/GetUnihanData.pl?codepoint=%X\">%s</a>"
137               ucs
138               (cond ((<= ucs #xFFFF)
139                      (format "U+%04X" ucs))
140                     ((<= ucs #x10FFFF)
141                      (format "U-%08X" ucs))))
142            "          ")))
143     (when ucs
144       (princ
145        (format " <a href=\"%s%X\">(link map)</a>"
146                www-ids-find-chise-link-map-url-prefix ucs)))
147     (princ " ")
148     (when is
149       (setq ids (ideographic-structure-to-ids is))
150       (setq i 0
151             len (length ids))
152       (while (< i len)
153         (www-ids-find-format-char (aref ids i))
154         (setq i (1+ i))))
155     (when (and ucs
156                (with-current-buffer
157                    (find-file-noselect
158                     www-ids-find-tang-chars-file-name)
159                  (goto-char (point-min))
160                  (re-search-forward (format "^%d$" ucs) nil t)))
161       (princ
162        (format " <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar?query=%s\">"
163                (mapconcat
164                 (lambda (c)
165                   (format "%%%02X" (char-int c)))
166                 (encode-coding-string (char-to-string c)
167                                       'utf-8-jp)
168                 "")))
169       (princ (encode-coding-string "\e$B"M\e(B[\e$BEbBeBsK\\e(B]</a>" 'utf-8-jp-er)))
170     (princ "<br>\n")))
171
172 (defun www-ids-insert-chars-including-components (components
173                                                   &optional ignored-chars)
174   (let ((products (ideographic-products-find components))
175         is as bs len ignore-children)
176     (setq len (length products))
177     (when (>= len 1024)
178       (setq ignore-children t)
179       (princ
180        (encode-coding-string
181         "<p>\e$B7k2L$,B?$9$.$k$?$a!":F5"E*8!:w$r>JN,$7$^$7$?!#\e(B</p>"
182         'utf-8-jp-er)))
183     (if (>= len 2048)
184         (dolist (c products)
185           (www-ids-find-format-char c))
186       (princ "<ul>\n")
187       (dolist (c (cond
188                   ;; ((>= len 2048)
189                   ;;  (setq ignore-children t)
190                   ;;  products)
191                   ;; ((>= len 1024)
192                   ;;  products)
193                   ((>= len 1024)
194                    (sort (copy-list products)
195                          (lambda (a b)
196                            (< (char-int a)(char-int b))))
197                    )
198                   ((>= len 512)
199                    (sort (copy-list products)
200                          (lambda (a b)
201                            (if (setq as (char-total-strokes a))
202                                (if (setq bs (char-total-strokes b))
203                                    (if (= as bs)
204                                        (< (char-int a)(char-int b))
205                                      (< as bs))
206                                  t)
207                              (< (char-int a)(char-int b)))))
208                    )
209                   (t
210                    (sort (copy-list products)
211                          (lambda (a b)
212                            (if (setq as (char-total-strokes a))
213                                (if (setq bs (char-total-strokes b))
214                                    (if (= as bs)
215                                        (ideograph-char< a b)
216                                      (< as bs))
217                                  t)
218                              (ideograph-char< a b))))
219                    )))
220         (unless (memq c ignored-chars)
221           (setq is (char-feature c 'ideographic-structure))
222           (princ "<li>")
223           (www-ids-find-format-line c is)
224           (unless ignore-children
225             ;; (princ "<ul>\n")
226             (setq ignored-chars
227                   (www-ids-insert-chars-including-components
228                    (char-to-string c)
229                    (cons c ignored-chars)))
230             ;; (princ "</ul>\n")
231             ))
232         )
233       (princ "</ul>\n")
234       ))
235   ignored-chars)
236
237 (defun www-batch-ids-find ()
238   (let ((components (car command-line-args-left))
239         (coded-charset-entity-reference-alist
240          (list*
241           '(=cns11643-1         "C1-" 4 X)
242           '(=cns11643-2         "C2-" 4 X)
243           '(=cns11643-3         "C3-" 4 X)
244           '(=cns11643-4         "C4-" 4 X)
245           '(=cns11643-5         "C5-" 4 X)
246           '(=cns11643-6         "C6-" 4 X)
247           '(=cns11643-7         "C7-" 4 X)
248           '(=gb2312             "G0-" 4 X)
249           '(=gb12345            "G1-" 4 X)
250           '(=jis-x0208@1990     "J90-" 4 X)
251           '(=jis-x0212          "JSP-" 4 X)
252           '(=cbeta              "CB" 5 d)
253           '(=jef-china3         "JC3-" 4 X)
254           '(=jis-x0208@1978     "J78-" 4 X)
255           '(=jis-x0208@1983     "J83-" 4 X)
256           '(=daikanwa           "M-" 5 d)
257           coded-charset-entity-reference-alist))
258         )
259     (setq command-line-args-left (cdr command-line-args-left))
260     (cond
261      ((stringp components)
262       (if (string-match "^components=" components)
263           (setq components (substring components (match-end 0))))
264       (setq components
265             (if (> (length components) 0)
266                 (decode-url-string components 'utf-8-er)
267               nil))
268       )
269      (t
270       (setq components nil)
271       ))
272     (princ "Content-Type: text/html; charset=UTF-8
273
274 <!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"
275             \"http://www.w3.org/TR/html4/loose.dtd\">
276 <html lang=\"ja\">
277 <head>
278 <title>CHISE IDS Find</title>
279 </head>
280
281 <body>
282
283 <h1>")
284     (princ (encode-coding-string "CHISE IDS \e$B4A;z8!:w\e(B" 'utf-8-jp-er))
285     (princ "</h1>")
286     (princ "
287 <p>Version ")
288     (princ www-ids-find-version)
289     (princ (format-time-string
290             " (Last-modified: %Y-%m-%d %H:%M:%S)"
291             (nth 5
292                  (file-attributes
293                   www-ids-find-ideographic-products-file-name))))
294     (princ "
295 <hr>
296 <p>
297 <form action=\"/ids-find\" method=\"GET\">
298 ")
299     (princ (encode-coding-string "\e$BItIJJ8;zNs\e(B" 'utf-8-jp-er))
300     (princ " <input type=\"text\" name=\"components\" size=\"30\" maxlength=\"30\" value=\"")
301     (if (> (length components) 0)
302         (princ (encode-coding-string components 'utf-8-er)))
303     (princ "\">
304 <input type=\"submit\" value=\"")
305     (princ (encode-coding-string "\e$B8!:w3+;O\e(B" 'utf-8-jp-er))
306     (princ "\">
307 </form>
308
309 ")
310     (unless (file-newer-than-file-p
311              www-ids-find-ideographic-products-file-name
312              (locate-file (car command-line-args) exec-path))
313       (princ (encode-coding-string "<hr>
314 <p>
315 \e$B8=:_!"%7%9%F%`$N99?7:n6HCf$G$9!#$7$P$i$/$*BT$A$/$@$5$$!#\e(B
316 <hr>
317 " 'utf-8-jp-er))
318       ;; (setq components nil)
319       )
320     (cond
321      (components
322       ;; (map-char-attribute
323       ;;  (lambda (c v)
324       ;;    (when (every (lambda (p)
325       ;;                   (ideographic-structure-member p v))
326       ;;                 components)
327       ;;      (princ (encode-coding-string
328       ;;              (ids-find-format-line c v)
329       ;;              'utf-8-jp-er))
330       ;;      (princ "<br>\n")
331       ;;      )
332       ;;    nil)
333       ;;  'ideographic-structure)
334       (when (= (length components) 1)
335         (www-ids-find-format-line (aref components 0)
336                                   (char-feature (aref components 0)
337                                                 'ideographic-structure)))
338       ;; (dolist (c (ideographic-products-find components))
339       ;;   (setq is (char-feature c 'ideographic-structure))
340       ;;   ;; to avoid problems caused by wrong indexes
341       ;;   (when (every (lambda (c)
342       ;;                  (ideographic-structure-member c is))
343       ;;                components)
344       ;;     (www-ids-find-format-line c is)))
345       ;; (princ "<ul>\n")
346       (www-ids-insert-chars-including-components components)
347       ;; (princ "</ul>\n")
348       )
349      (t
350       (princ (encode-coding-string "<hr>
351 <p>
352 \e$B;XDj$7$?ItIJ$rA4$F4^$`4A;z$N0lMw$rI=<($7$^$9!#\e(B
353 <p>
354 CHISE \e$B$GMQ$$$i$l$k<BBV;2>H7A<0!JNc!'\e(B&amp;M-00256;\e$B!K$GItIJ$r;XDj$9$k;v$b$G$-$^$9!#\e(B" 'utf-8-jp-er))
355       (princ (encode-coding-string "
356 <p>
357 \[Links\]
358 <ul>
359 <li><a href=\"http://www.shuiren.org/chuden/toyoshi/syoseki/chise_ids.html\"
360 >\e$B!V\e(BCHISE IDS FIND\e$B$G4A;z$r8!:w!W\e(B</a> \e$B!=\e(B \e$B;3ED?r?N$5$s!J\e(B<a
361 href=\"http://www.shuiren.org/\">\e$B?g?MDb\e(B</a>\e$B!K$K$h$k2r@b\e(B
362 </ul>
363 <ul>
364 <li><a href=\"http://www.karitsu.org/tools/firefox_plugin.htm\"
365 >Firefox \e$BMQ\e(B plugin</a> by \e$B=);3M[0lO:$5$s!J\e(B<a href=\"http://www.karitsu.org/\"
366 >\e$B2aN)c7\e(B</a>\e$B!K\e(B
367 </ul>
368 <ul>
369 <li><a href=\"http://cvs.m17n.org/viewcvs/chise/ids/www/www-ids-find.el?view=markup\"
370 >www-ids-find.el (source file (Emacs Lisp part))
371 <li><a href=\"http://www.chise.org/ids/\"
372 >\e$B!V\e(BCHISE \e$B4A;z9=B$>pJs%G!<%?%Y!<%9!W\e(B</a>
373 <li><a href=\"http://fonts.jp/chise_linkmap/\"
374 >\e$B!V\e(Bchise_linkmap : CHISE \e$B4A;zO"4D?^!W\e(B</a> by \e$B>eCO9(0l$5$s\e(B
375 <li><a href=\"http://www.chise.org/\"
376 >CHISE Project</a>
377 </ul>
378 <ul>
379 <li><a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar\"
380 >\e$B!VBsK\J8;z%G!<%?%Y!<%9!W\e(B</a> by
381 <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/\"
382 >\e$B5~ETBg3X\e(B21\e$B@$5*\e(BCOE\e$B!VEl%"%8%"@$3&$N?MJ8>pJs3X8&5f650i5rE@!W\e(B</a>
383 <li><a href=\"http://www.unicode.org/\"
384 >Unicode</a>
385 </ul>"
386  'utf-8-jp-er))
387
388       ))
389     (princ "<hr>")
390     (princ "<p>
391 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 <a href=\"http://kanji.zinbun.kyoto-u.ac.jp/~tomo/\"
392 >MORIOKA Tomohiko</a>")
393     (princ
394      (format
395       "<p>Powered by <a
396 href=\"http://www.chise.org/xemacs/\"
397 >XEmacs CHISE</a> %s."
398       (encode-coding-string xemacs-chise-version 'utf-8-jp-er)))
399     (princ "
400 </body>
401 </html>
402 ")))