Require `cwiki-common' of EsT.
[chise/ids.git] / www / www-ids-find.el
1 (require 'ids-find)
2 (require 'cwiki-common)
3
4 (defun decode-url-string (string &optional coding-system)
5   (if (> (length string) 0)
6       (let ((i 0)
7             dest)
8         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
9           (setq dest (concat dest
10                              (substring string i (match-beginning 0))
11                              (char-to-string
12                               (int-char
13                                (string-to-int (match-string 1 string) 16))))
14                 i (match-end 0)))
15         (decode-coding-string
16          (concat dest (substring string i))
17          coding-system))))
18
19 (defconst www-ids-find-version "0.26")
20
21 (defvar www-ids-find-ideographic-products-file-name
22   (expand-file-name "ideographic-products"
23                     (expand-file-name
24                      "feature"
25                      (expand-file-name
26                       "character"
27                       chise-system-db-directory))))
28
29 (defvar www-ids-find-char-viewer-url
30   "/est/view/character/")
31
32 (defvar www-ids-find-chise-link-map-url-prefix
33   "http://fonts.jp/chise_linkmap/map.cgi?code=")
34
35 (defvar www-ids-find-tang-chars-file-name
36   "~tomo/projects/chise/ids/www/tang-chars.udd")
37
38 (defun www-ids-find-format-char (c &optional code-desc)
39   (princ
40    (format "<a href=\"%s%s\">%s</a>"
41            www-ids-find-char-viewer-url
42            (www-uri-encode-object c)
43            (www-format-encode-string (char-to-string c))))
44   ;; (let ((str (encode-coding-string (format "%c" c) 'utf-8-er))
45   ;;       plane code)
46   ;;   (princ
47   ;;    (with-temp-buffer
48   ;;      (cond
49   ;;       ((string-match "&CB\\([0-9]+\\);" str)
50   ;;        (setq code (string-to-int (match-string 1 str)))
51   ;;        (insert (format "<a href=\"%s"
52   ;;                        www-ids-find-char-viewer-url))
53   ;;        (insert str)
54   ;;        (insert (format "\"><img alt=\"CB%05d\" src=\"/glyphs/cb-gaiji/%02d/CB%05d.gif\">\n"
55   ;;                        code (/ code 1000) code))
56   ;;        (when code-desc
57   ;;          (insert (format "CB%05d</a>" code)))
58   ;;        )
59   ;;       ((string-match "&JC3-\\([0-9A-F]+\\);" str)
60   ;;        (setq code (string-to-int (match-string 1 str) 16))
61   ;;        (insert (format "<a href=\"%s"
62   ;;                        www-ids-find-char-viewer-url))
63   ;;        (insert str)
64   ;;        (insert (format "\"><img alt=\"JC3-%04X\" src=\"http://kanji.zinbun.kyoto-u.ac.jp/db/CHINA3/Gaiji/%04x.gif\">\n"
65   ;;                        code code))
66   ;;        (when code-desc
67   ;;          (insert (format "JC3-%04X</a>" code)))
68   ;;        )
69   ;;       ((string-match "&J\\(78\\|83\\|90\\|SP\\)-\\([0-9A-F]+\\);" str)
70   ;;        (setq plane (match-string 1 str)
71   ;;              code (string-to-int (match-string 2 str) 16))
72   ;;        (insert (format "<a href=\"%s"
73   ;;                        www-ids-find-char-viewer-url))
74   ;;        (insert str)
75   ;;        (insert (format "\"><img alt=\"J%s-%04X\" src=\"/glyphs/JIS-%s/%02d-%02d.gif\">\n"
76   ;;                        plane code plane
77   ;;                        (- (lsh code -8) 32)
78   ;;                        (- (logand code 255) 32)))
79   ;;        (when code-desc
80   ;;          (insert (format "J%s-%04X</a>" plane code)))
81   ;;        )
82   ;;       ((string-match "&G\\([01]\\)-\\([0-9A-F]+\\);" str)
83   ;;        (setq plane (string-to-int (match-string 1 str))
84   ;;              code (string-to-int (match-string 2 str) 16))
85   ;;        (insert (format "<a href=\"%s"
86   ;;                        www-ids-find-char-viewer-url))
87   ;;        (insert str)
88   ;;        (insert (format "\"><img alt=\"G%d-%04X\" src=\"/glyphs/GB%d/%02d-%02d.gif\">\n"
89   ;;                        plane code plane
90   ;;                        (- (lsh code -8) 32)
91   ;;                        (- (logand code 255) 32)))
92   ;;        (when code-desc
93   ;;          (insert (format "G%d-%04X</a>" plane code)))
94   ;;        )
95   ;;       ((string-match "&C\\([1-7]\\)-\\([0-9A-F]+\\);" str)
96   ;;        (setq plane (string-to-int (match-string 1 str))
97   ;;              code (string-to-int (match-string 2 str) 16))
98   ;;        (insert (format "<a href=\"%s"
99   ;;                        www-ids-find-char-viewer-url))
100   ;;        (insert str)
101   ;;        (insert (format "\"><img alt=\"C%d-%04X\" src=\"/glyphs/CNS%d/%04X.gif\">\n"
102   ;;                        plane code plane code))
103   ;;        (when code-desc
104   ;;          (insert (format "C%d-%04X</a>" plane code)))
105   ;;        )
106   ;;       ((string-match "&ZOB-\\([0-9]+\\);" str)
107   ;;        (setq code (string-to-int (match-string 1 str)))
108   ;;        (insert (format "<a href=\"%s"
109   ;;                        www-ids-find-char-viewer-url))
110   ;;        (insert str)
111   ;;        (insert (format "\"><img alt=\"ZOB-%04d\" src=\"/glyphs/ZOB-1968/%04d.png\">\n"
112   ;;                        code code))
113   ;;        (when code-desc
114   ;;          (insert (format "ZOB-%04d</a>" code)))
115   ;;        )
116   ;;       (t
117   ;;        (insert (format "<a href=\"%s"
118   ;;                        www-ids-find-char-viewer-url))
119   ;;        ;; (insert str)
120   ;;        (insert
121   ;;         (mapconcat (lambda (c)
122   ;;                      (if (<= (char-int c) #x7F)
123   ;;                          (char-to-string c)
124   ;;                        (format "%%%02X" c)))
125   ;;                    str ""))
126   ;;        (insert "\">")
127   ;;        (insert str)
128   ;;        (insert "</a>")
129   ;;        ))
130   ;;      (goto-char (point-min))
131   ;;      (while (search-forward "&" nil t)
132   ;;        (replace-match "&amp;" t 'literal))
133   ;;      (buffer-string))))
134   )
135   
136 (defun www-ids-find-format-line (c is)
137   (let (ucs len i ids)
138     (www-ids-find-format-char c 'code-desc)
139     (princ
140      (or (if (setq ucs (or (char-ucs c)
141                            (encode-char c 'ucs)))
142              (format
143               " <a href=\"http://www.unicode.org/cgi-bin/GetUnihanData.pl?codepoint=%X\">%s</a>"
144               ucs
145               (cond ((<= ucs #xFFFF)
146                      (format "U+%04X" ucs))
147                     ((<= ucs #x10FFFF)
148                      (format "U-%08X" ucs))))
149            "          ")))
150     (when ucs
151       (princ
152        (format " <a href=\"%s%X\">(link map)</a>"
153                www-ids-find-chise-link-map-url-prefix ucs)))
154     (princ " ")
155     (when is
156       (setq ids (ideographic-structure-to-ids is))
157       (setq i 0
158             len (length ids))
159       (while (< i len)
160         (www-ids-find-format-char (aref ids i))
161         (setq i (1+ i))))
162     (when (and ucs
163                (with-current-buffer
164                    (find-file-noselect
165                     www-ids-find-tang-chars-file-name)
166                  (goto-char (point-min))
167                  (re-search-forward (format "^%d$" ucs) nil t)))
168       (princ
169        (format " <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar?query=%s\">"
170                (mapconcat
171                 (lambda (c)
172                   (format "%%%02X" (char-int c)))
173                 (encode-coding-string (char-to-string c)
174                                       'utf-8-jp)
175                 "")))
176       (princ (encode-coding-string "\e$B"M\e(B[\e$BEbBeBsK\\e(B]</a>" 'utf-8-jp-er)))
177     (princ "<br>\n")))
178
179 (defun www-ids-insert-chars-including-components (components
180                                                   &optional ignored-chars)
181   (let ((products (ideographic-products-find components))
182         is as bs len ignore-children)
183     (setq len (length products))
184     (when (>= len 1024)
185       (setq ignore-children t)
186       (princ
187        (encode-coding-string
188         "<p>\e$B7k2L$,B?$9$.$k$?$a!":F5"E*8!:w$r>JN,$7$^$7$?!#\e(B</p>"
189         'utf-8-jp-er)))
190     (if (>= len 2048)
191         (dolist (c products)
192           (www-ids-find-format-char c))
193       (princ "<ul>\n")
194       (dolist (c (cond
195                   ;; ((>= len 2048)
196                   ;;  (setq ignore-children t)
197                   ;;  products)
198                   ;; ((>= len 1024)
199                   ;;  products)
200                   ((>= len 1024)
201                    (sort (copy-list products)
202                          (lambda (a b)
203                            (< (char-int a)(char-int b))))
204                    )
205                   ((>= len 512)
206                    (sort (copy-list products)
207                          (lambda (a b)
208                            (if (setq as (char-total-strokes a))
209                                (if (setq bs (char-total-strokes b))
210                                    (if (= as bs)
211                                        (< (char-int a)(char-int b))
212                                      (< as bs))
213                                  t)
214                              (< (char-int a)(char-int b)))))
215                    )
216                   (t
217                    (sort (copy-list products)
218                          (lambda (a b)
219                            (if (setq as (char-total-strokes a))
220                                (if (setq bs (char-total-strokes b))
221                                    (if (= as bs)
222                                        (ideograph-char< a b)
223                                      (< as bs))
224                                  t)
225                              (ideograph-char< a b))))
226                    )))
227         (unless (memq c ignored-chars)
228           (setq is (char-feature c 'ideographic-structure))
229           (princ "<li>")
230           (www-ids-find-format-line c is)
231           (unless ignore-children
232             ;; (princ "<ul>\n")
233             (setq ignored-chars
234                   (www-ids-insert-chars-including-components
235                    (char-to-string c)
236                    (cons c ignored-chars)))
237             ;; (princ "</ul>\n")
238             ))
239         )
240       (princ "</ul>\n")
241       ))
242   ignored-chars)
243
244 (defun www-batch-ids-find ()
245   (let ((components (car command-line-args-left))
246         (coded-charset-entity-reference-alist
247          (list*
248           '(=cns11643-1         "C1-" 4 X)
249           '(=cns11643-2         "C2-" 4 X)
250           '(=cns11643-3         "C3-" 4 X)
251           '(=cns11643-4         "C4-" 4 X)
252           '(=cns11643-5         "C5-" 4 X)
253           '(=cns11643-6         "C6-" 4 X)
254           '(=cns11643-7         "C7-" 4 X)
255           '(=gb2312             "G0-" 4 X)
256           '(=gb12345            "G1-" 4 X)
257           '(=jis-x0208@1990     "J90-" 4 X)
258           '(=jis-x0212          "JSP-" 4 X)
259           '(=cbeta              "CB" 5 d)
260           '(=jef-china3         "JC3-" 4 X)
261           '(=jis-x0208@1978     "J78-" 4 X)
262           '(=jis-x0208@1983     "J83-" 4 X)
263           '(=daikanwa           "M-" 5 d)
264           coded-charset-entity-reference-alist))
265         )
266     (setq command-line-args-left (cdr command-line-args-left))
267     (cond
268      ((stringp components)
269       (if (string-match "^components=" components)
270           (setq components (substring components (match-end 0))))
271       (setq components
272             (if (> (length components) 0)
273                 (decode-url-string components 'utf-8-er)
274               nil))
275       )
276      (t
277       (setq components nil)
278       ))
279     (princ "Content-Type: text/html; charset=UTF-8
280
281 <!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"
282             \"http://www.w3.org/TR/html4/loose.dtd\">
283 <html lang=\"ja\">
284 <head>
285 <title>CHISE IDS Find</title>
286 </head>
287
288 <body>
289
290 <h1>")
291     (princ (encode-coding-string "CHISE IDS \e$B4A;z8!:w\e(B" 'utf-8-jp-er))
292     (princ "</h1>")
293     (princ "
294 <p>Version ")
295     (princ www-ids-find-version)
296     (princ (format-time-string
297             " (Last-modified: %Y-%m-%d %H:%M:%S)"
298             (nth 5
299                  (file-attributes
300                   www-ids-find-ideographic-products-file-name))))
301     (princ "
302 <hr>
303 <p>
304 <form action=\"/ids-find\" method=\"GET\">
305 ")
306     (princ (encode-coding-string "\e$BItIJJ8;zNs\e(B" 'utf-8-jp-er))
307     (princ " <input type=\"text\" name=\"components\" size=\"30\" maxlength=\"30\" value=\"")
308     (if (> (length components) 0)
309         (princ (encode-coding-string components 'utf-8-er)))
310     (princ "\">
311 <input type=\"submit\" value=\"")
312     (princ (encode-coding-string "\e$B8!:w3+;O\e(B" 'utf-8-jp-er))
313     (princ "\">
314 </form>
315
316 ")
317     (unless (file-newer-than-file-p
318              www-ids-find-ideographic-products-file-name
319              (locate-file (car command-line-args) exec-path))
320       (princ (encode-coding-string "<hr>
321 <p>
322 \e$B8=:_!"%7%9%F%`$N99?7:n6HCf$G$9!#$7$P$i$/$*BT$A$/$@$5$$!#\e(B
323 <hr>
324 " 'utf-8-jp-er))
325       ;; (setq components nil)
326       )
327     (cond
328      (components
329       ;; (map-char-attribute
330       ;;  (lambda (c v)
331       ;;    (when (every (lambda (p)
332       ;;                   (ideographic-structure-member p v))
333       ;;                 components)
334       ;;      (princ (encode-coding-string
335       ;;              (ids-find-format-line c v)
336       ;;              'utf-8-jp-er))
337       ;;      (princ "<br>\n")
338       ;;      )
339       ;;    nil)
340       ;;  'ideographic-structure)
341       (when (= (length components) 1)
342         (www-ids-find-format-line (aref components 0)
343                                   (char-feature (aref components 0)
344                                                 'ideographic-structure)))
345       ;; (dolist (c (ideographic-products-find components))
346       ;;   (setq is (char-feature c 'ideographic-structure))
347       ;;   ;; to avoid problems caused by wrong indexes
348       ;;   (when (every (lambda (c)
349       ;;                  (ideographic-structure-member c is))
350       ;;                components)
351       ;;     (www-ids-find-format-line c is)))
352       ;; (princ "<ul>\n")
353       (www-ids-insert-chars-including-components components)
354       ;; (princ "</ul>\n")
355       )
356      (t
357       (princ (encode-coding-string "<hr>
358 <p>
359 \e$B;XDj$7$?ItIJ$rA4$F4^$`4A;z$N0lMw$rI=<($7$^$9!#\e(B
360 <p>
361 CHISE \e$B$GMQ$$$i$l$k<BBV;2>H7A<0!JNc!'\e(B&amp;M-00256;\e$B!K$GItIJ$r;XDj$9$k;v$b$G$-$^$9!#\e(B" 'utf-8-jp-er))
362       (princ (encode-coding-string "
363 <p>
364 \[Links\]
365 <ul>
366 <li><a href=\"http://www.shuiren.org/chuden/toyoshi/syoseki/chise_ids.html\"
367 >\e$B!V\e(BCHISE IDS FIND\e$B$G4A;z$r8!:w!W\e(B</a> \e$B!=\e(B \e$B;3ED?r?N$5$s!J\e(B<a
368 href=\"http://www.shuiren.org/\">\e$B?g?MDb\e(B</a>\e$B!K$K$h$k2r@b\e(B
369 </ul>
370 <ul>
371 <li><a href=\"http://www.karitsu.org/tools/firefox_plugin.htm\"
372 >Firefox \e$BMQ\e(B plugin</a> by \e$B=);3M[0lO:$5$s!J\e(B<a href=\"http://www.karitsu.org/\"
373 >\e$B2aN)c7\e(B</a>\e$B!K\e(B
374 </ul>
375 <ul>
376 <li><a href=\"http://cvs.m17n.org/viewcvs/chise/ids/www/www-ids-find.el?view=markup\"
377 >www-ids-find.el (source file (Emacs Lisp part))
378 <li><a href=\"http://www.chise.org/ids/\"
379 >\e$B!V\e(BCHISE \e$B4A;z9=B$>pJs%G!<%?%Y!<%9!W\e(B</a>
380 <li><a href=\"http://fonts.jp/chise_linkmap/\"
381 >\e$B!V\e(Bchise_linkmap : CHISE \e$B4A;zO"4D?^!W\e(B</a> by \e$B>eCO9(0l$5$s\e(B
382 <li><a href=\"http://www.chise.org/\"
383 >CHISE Project</a>
384 </ul>
385 <ul>
386 <li><a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar\"
387 >\e$B!VBsK\J8;z%G!<%?%Y!<%9!W\e(B</a> by
388 <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/\"
389 >\e$B5~ETBg3X\e(B21\e$B@$5*\e(BCOE\e$B!VEl%"%8%"@$3&$N?MJ8>pJs3X8&5f650i5rE@!W\e(B</a>
390 <li><a href=\"http://www.unicode.org/\"
391 >Unicode</a>
392 </ul>"
393  'utf-8-jp-er))
394
395       ))
396     (princ "<hr>")
397     (princ "<p>
398 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2015 <a href=\"http://kanji.zinbun.kyoto-u.ac.jp/~tomo/\"
399 >MORIOKA Tomohiko</a>")
400     (princ
401      (format
402       "<p>Powered by <a
403 href=\"http://www.chise.org/xemacs/\"
404 >XEmacs CHISE</a> %s."
405       (encode-coding-string xemacs-chise-version 'utf-8-jp-er)))
406     (princ "
407 </body>
408 </html>
409 ")))