(ids-index-store-char): Revert to use `char-feature' instead of
[chise/ids.git] / www / www-ids-find.el
1 (require 'ids-find)
2
3 (defun decode-url-string (string &optional coding-system)
4   (if (> (length string) 0)
5       (let ((i 0)
6             dest)
7         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
8           (setq dest (concat dest
9                              (substring string i (match-beginning 0))
10                              (char-to-string
11                               (int-char
12                                (string-to-int (match-string 1 string) 16))))
13                 i (match-end 0)))
14         (decode-coding-string
15          (concat dest (substring string i))
16          coding-system))))
17
18 (defconst www-ids-find-version "0.22.2")
19
20 (defvar www-ids-find-ideographic-products-file-name
21   (expand-file-name "ideographic-products"
22                     (expand-file-name
23                      "feature"
24                      (expand-file-name
25                       "character"
26                       chise-system-db-directory))))
27
28 (defvar www-ids-find-tang-chars-file-name
29   "~tomo/projects/chise/ids/www/tang-chars.udd")
30
31 (defun www-ids-find-format-line (c is)
32   (let ((str (encode-coding-string (format "%c" c) 'utf-8-er))
33         plane code ucs)
34     (princ
35      (with-temp-buffer
36        (cond
37         ((string-match "&CB\\([0-9]+\\);" str)
38          (setq code (string-to-int (match-string 1 str)))
39          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
40          (insert str)
41          (insert (format "\"><img alt=\"CB%05d\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/cb-gaiji/%02d/CB%05d.gif\">\n"
42                          code (/ code 1000) code))
43          (insert (format "CB%05d</a>" code))
44          )
45         ((string-match "&JC3-\\([0-9A-F]+\\);" str)
46          (setq code (string-to-int (match-string 1 str) 16))
47          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
48          (insert str)
49          (insert (format "\"><img alt=\"JC3-%04X\" src=\"http://kanji.zinbun.kyoto-u.ac.jp/db/CHINA3/Gaiji/%04x.gif\">\n"
50                          code code))
51          (insert (format "JC3-%04X</a>" code))
52          )
53         ((string-match "&J\\(78\\|83\\|90\\|SP\\)-\\([0-9A-F]+\\);" str)
54          (setq plane (match-string 1 str)
55                code (string-to-int (match-string 2 str) 16))
56          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
57          (insert str)
58          (insert (format "\"><img alt=\"J%s-%04X\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/JIS-%s/%02d-%02d.gif\">\n"
59                          plane code plane
60                          (- (lsh code -8) 32)
61                          (- (logand code 255) 32)))
62          (insert (format "J%s-%04X</a>" plane code))
63          )
64         ((string-match "&G\\([01]\\)-\\([0-9A-F]+\\);" str)
65          (setq plane (string-to-int (match-string 1 str))
66                code (string-to-int (match-string 2 str) 16))
67          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
68          (insert str)
69          (insert (format "\"><img alt=\"G%d-%04X\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/GB%d/%02d-%02d.gif\">\n"
70                          plane code plane
71                          (- (lsh code -8) 32)
72                          (- (logand code 255) 32)))
73          (insert (format "G%d-%04X</a>" plane code))
74          )
75         ((string-match "&C\\([1-7]\\)-\\([0-9A-F]+\\);" str)
76          (setq plane (string-to-int (match-string 1 str))
77                code (string-to-int (match-string 2 str) 16))
78          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
79          (insert str)
80          (insert (format "\"><img alt=\"C%d-%04X\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/CNS%d/%04X.gif\">\n"
81                          plane code plane code))
82          (insert (format "C%d-%04X</a>" plane code))
83          )
84         ((string-match "&ZOB-\\([0-9]+\\);" str)
85          (setq code (string-to-int (match-string 1 str)))
86          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
87          (insert str)
88          (insert (format "\"><img alt=\"ZOB-%04d\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/ZOB-1968/%04d.png\">\n"
89                          code code))
90          (insert (format "ZOB-%04d</a>" code))
91          )
92         (t
93          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
94          ;; (insert str)
95          (insert
96           (mapconcat (lambda (c)
97                        (if (<= (char-int c) #x7F)
98                            (char-to-string c)
99                          (format "%%%02X" c)))
100                      str ""))
101          (insert "\">")
102          (insert str)
103          (insert "</a>")
104          ))
105        (goto-char (point-min))
106        (while (search-forward "&" nil t)
107          (replace-match "&amp;" t 'literal))
108        (buffer-string)
109        ))
110     (princ
111      (or (if (setq ucs (or (char-ucs c)
112                            (encode-char c 'ucs)))
113              (format
114               " <a href=\"http://www.unicode.org/cgi-bin/GetUnihanData.pl?codepoint=%X\">%s</a>"
115               ucs
116               (cond ((<= ucs #xFFFF)
117                      (format "U+%04X" ucs))
118                     ((<= ucs #x10FFFF)
119                      (format "U-%08X" ucs))))
120            "          ")))
121     (when ucs
122       (princ
123        (format " <a href=\"http://geta.mag.keio.ac.jp/chiseperl/map.cgi?code=%X\">(link map)</a>"
124                ucs)))
125     (princ " ")
126     (when is
127       (princ
128        (with-temp-buffer
129          (insert
130           (encode-coding-string
131            (ideographic-structure-to-ids is)
132            'utf-8-jp-er))
133          (goto-char (point-min))
134          (while (re-search-forward "&CB\\([0-9]+\\);" nil t)
135            (setq code (string-to-int (match-string 1)))
136            (replace-match
137             (format "<img alt=\"CB%05d\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/cb-gaiji/%02d/CB%05d.gif\">"
138                     code (/ code 1000) code)
139             t 'literal))
140          (buffer-string))))
141     (when (and ucs
142                (with-current-buffer
143                    (find-file-noselect
144                     www-ids-find-tang-chars-file-name)
145                  (goto-char (point-min))
146                  (re-search-forward (format "^%d$" ucs) nil t)))
147       (princ
148        (format " <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar?query=%s\">"
149                (mapconcat
150                 (lambda (c)
151                   (format "%%%02X" (char-int c)))
152                 (encode-coding-string (char-to-string c)
153                                       'utf-8-jp)
154                 "")))
155       (princ (encode-coding-string "\e$B"M\e(B[\e$BEbBeBsK\\e(B]</a>" 'utf-8-jp-er)))
156     (princ "<br>\n")))
157
158 (defun www-ids-insert-chars-including-components (components)
159   (let (is)
160     (dolist (c (ideographic-products-find components))
161       (setq is (char-feature c 'ideographic-structure))
162       ;; to avoid problems caused by wrong indexes
163       (when (every (lambda (cc)
164                      (ideographic-structure-member cc is))
165                    components)
166         (princ "<li>")
167         (www-ids-find-format-line c is)
168         (princ "<ul>\n")
169         (www-ids-insert-chars-including-components (char-to-string c))
170         (princ "</ul>\n")
171         )
172       )))
173
174 (defun www-batch-ids-find ()
175   (let ((components (car command-line-args-left))
176         (coded-charset-entity-reference-alist
177          (list*
178           '(=cns11643-1         "C1-" 4 X)
179           '(=cns11643-2         "C2-" 4 X)
180           '(=cns11643-3         "C3-" 4 X)
181           '(=cns11643-4         "C4-" 4 X)
182           '(=cns11643-5         "C5-" 4 X)
183           '(=cns11643-6         "C6-" 4 X)
184           '(=cns11643-7         "C7-" 4 X)
185           '(=gb2312             "G0-" 4 X)
186           '(=gb12345            "G1-" 4 X)
187           '(=jis-x0208@1990     "J90-" 4 X)
188           '(=jis-x0212          "JSP-" 4 X)
189           '(=cbeta              "CB" 5 d)
190           '(=jef-china3         "JC3-" 4 X)
191           '(=jis-x0208@1978     "J78-" 4 X)
192           '(=jis-x0208@1983     "J83-" 4 X)
193           '(=daikanwa           "M-" 5 d)
194           coded-charset-entity-reference-alist))
195         )
196     (setq command-line-args-left (cdr command-line-args-left))
197     (cond
198      ((stringp components)
199       (if (string-match "^components=" components)
200           (setq components (substring components (match-end 0))))
201       (setq components
202             (if (> (length components) 0)
203                 (decode-url-string components 'utf-8-jp-er)
204               nil))
205       )
206      (t
207       (setq components nil)
208       ))
209     (princ "Content-Type: text/html; charset=UTF-8
210
211 <!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"
212             \"http://www.w3.org/TR/html4/loose.dtd\">
213 <html lang=\"ja\">
214 <head>
215 <title>CHISE IDS Find</title>
216 </head>
217
218 <body>
219
220 <h1>")
221     (princ (encode-coding-string "CHISE IDS \e$B4A;z8!:w\e(B" 'utf-8-jp-er))
222     (princ "</h1>")
223     (princ "
224 <p>Version ")
225     (princ www-ids-find-version)
226     (princ (format-time-string
227             " (Last-modified: %Y-%m-%d %H:%M:%S)"
228             (nth 5
229                  (file-attributes
230                   www-ids-find-ideographic-products-file-name))))
231     (princ "
232 <p>
233 Copyright (C) 2005 <a href=\"http://kanji.zinbun.kyoto-u.ac.jp/~tomo/\"
234 >MORIOKA Tomohiko</a>
235 <hr>
236 <p>
237 <form action=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/ids-find\" method=\"GET\">
238 ")
239     (princ (encode-coding-string "\e$BItIJJ8;zNs\e(B" 'utf-8-jp-er))
240     (princ " <input type=\"text\" name=\"components\" size=\"30\" maxlength=\"30\" value=\"")
241     (if (> (length components) 0)
242         (princ (encode-coding-string components 'utf-8-jp-er)))
243     (princ "\">
244 <input type=\"submit\" value=\"")
245     (princ (encode-coding-string "\e$B8!:w3+;O\e(B" 'utf-8-jp-er))
246     (princ "\">
247 </form>
248
249 ")
250     (unless (file-newer-than-file-p
251              www-ids-find-ideographic-products-file-name
252              (locate-file (car command-line-args) exec-path))
253       (princ (encode-coding-string "<hr>
254 <p>
255 \e$B8=:_!"%7%9%F%`$N99?7:n6HCf$G$9!#$7$P$i$/$*BT$A$/$@$5$$!#\e(B
256 <hr>
257 " 'utf-8-jp-er))
258       ;; (setq components nil)
259       )
260     (cond
261      (components
262       ;; (map-char-attribute
263       ;;  (lambda (c v)
264       ;;    (when (every (lambda (p)
265       ;;                   (ideographic-structure-member p v))
266       ;;                 components)
267       ;;      (princ (encode-coding-string
268       ;;              (ids-find-format-line c v)
269       ;;              'utf-8-jp-er))
270       ;;      (princ "<br>\n")
271       ;;      )
272       ;;    nil)
273       ;;  'ideographic-structure)
274       (when (= (length components) 1)
275         (www-ids-find-format-line (aref components 0)
276                                   (char-feature (aref components 0)
277                                                 'ideographic-structure)))
278       ;; (dolist (c (ideographic-products-find components))
279       ;;   (setq is (char-feature c 'ideographic-structure))
280       ;;   ;; to avoid problems caused by wrong indexes
281       ;;   (when (every (lambda (c)
282       ;;                  (ideographic-structure-member c is))
283       ;;                components)
284       ;;     (www-ids-find-format-line c is)))
285       (princ "<ul>\n")
286       (www-ids-insert-chars-including-components components)
287       (princ "</ul>\n")
288       )
289      (t
290       (princ (encode-coding-string "<hr>
291 <p>
292 \e$B;XDj$7$?ItIJ$rA4$F4^$`4A;z$N0lMw$rI=<($7$^$9!#\e(B
293 <p>
294 CHISE \e$B$GMQ$$$i$l$k<BBV;2>H7A<0!JNc!'\e(B&amp;M-00256;\e$B!K$GItIJ$r;XDj$9$k;v$b$G$-$^$9!#\e(B" 'utf-8-jp-er))
295       (princ (encode-coding-string "
296 <p>
297 \[Links\]
298 <ul>
299 <li><a href=\"http://www.shuiren.org/chuden/toyoshi/syoseki/chise_ids.html\"
300 >\e$B!V\e(BCHISE IDS FIND\e$B$G4A;z$r8!:w!W\e(B</a> \e$B!=\e(B \e$B;3ED?r?N$5$s!J\e(B<a
301 href=\"http://www.shuiren.org/\">\e$B?g?MDb\e(B</a>\e$B!K$K$h$k2r@b\e(B
302 </ul>
303 <ul>
304 <li><a href=\"http://www.karitsu.org/tools/firefox_plugin.htm\"
305 >Firefox \e$BMQ\e(B plugin</a> by \e$B=);3M[0lO:$5$s!J\e(B<a href=\"http://www.karitsu.org/\"
306 >\e$B2aN)c7\e(B</a>\e$B!K\e(B
307 </ul>
308 <ul>
309 <li><a href=\"http://cvs.m17n.org/viewcvs/chise/ids/www/www-ids-find.el?view=markup\"
310 >www-ids-find.el (source file (Emacs Lisp part))
311 <li><a href=\"http://kanji.zinbun.kyoto-u.ac.jp/projects/chise/ids/\"
312 >\e$B!V\e(BCHISE \e$B4A;z9=B$>pJs%G!<%?%Y!<%9!W\e(B</a>
313 <li><a href=\"http://fonts.jp/chise_linkmap/\"
314 >\e$B!V\e(Bchise_linkmap : CHISE \e$B4A;zO"4D?^!W\e(B</a> by \e$B>eCO9(0l$5$s\e(B
315 <li><a href=\"http://kanji.zinbun.kyoto-u.ac.jp/projects/chise/\"
316 >CHISE Project</a>
317 </ul>
318 <ul>
319 <li><a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar\"
320 >\e$B!VBsK\J8;z%G!<%?%Y!<%9!W\e(B</a> by
321 <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/\"
322 >\e$B5~ETBg3X\e(B21\e$B@$5*\e(BCOE\e$B!VEl%"%8%"@$3&$N?MJ8>pJs3X8&5f650i5rE@!W\e(B</a>
323 <li><a href=\"http://www.unicode.org/\"
324 >Unicode</a>
325 </ul>"
326  'utf-8-jp-er))
327
328       ))
329     (princ "<hr>")
330     (princ
331      (format
332       "Powered by <a
333 href=\"http://kanji.zinbun.kyoto-u.ac.jp/projects/chise/xemacs/\"
334 >XEmacs CHISE</a> %s."
335       xemacs-chise-version))
336     (princ "
337 </body>
338 </html>
339 ")))