update.
[chise/ids.git] / www / www-hng-ids-find.el
1 (require 'ids-find)
2 (require 'cwiki-common)
3
4 (setq www-format-char-img-style "vertical-align:middle;")
5
6 (defvar hng-ccs-list
7   (let (dest)
8     (dolist (ccs (charset-list))
9       (when (string-match "^===hng-" (symbol-name ccs))
10         (setq dest (cons ccs dest))))
11     dest))
12
13 (defun char-hng-p (char)
14   (or (get-char-attribute char '->HNG)
15       (char-have-hng-p char)))
16
17 (defun char-have-hng-p (char)
18   (or (some (lambda (ccs)
19               (and (encode-char char ccs)
20                    char))
21             hng-ccs-list)
22       (some #'char-have-hng-p
23             (get-char-attribute char '->subsumptive))
24       (some #'char-have-hng-p
25             (get-char-attribute char '->denotational))))
26
27 (defun decode-url-string (string &optional coding-system)
28   (if (> (length string) 0)
29       (let ((i 0)
30             dest)
31         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
32           (setq dest (concat dest
33                              (substring string i (match-beginning 0))
34                              (char-to-string
35                               (int-char
36                                (string-to-int (match-string 1 string) 16))))
37                 i (match-end 0)))
38         (decode-coding-string
39          (concat dest (substring string i))
40          coding-system))))
41
42 (defconst www-hng-ids-find-version "0.26")
43
44 (defvar www-ids-find-ideographic-products-file-name
45   (expand-file-name "ideographic-products"
46                     (expand-file-name
47                      "feature"
48                      (expand-file-name
49                       "character"
50                       chise-system-db-directory))))
51
52 (defvar www-ids-find-char-viewer-url
53   "/est/view/character/")
54
55 (defvar www-ids-find-chise-link-map-url-prefix
56   "http://fonts.jp/chise_linkmap/map.cgi?code=")
57
58 (defvar www-ids-find-tang-chars-file-name
59   "~tomo/projects/chise/ids/www/tang-chars.udd")
60
61 (defun www-ids-find-format-char (c &optional code-desc)
62   (princ
63    (format "<a href=\"%s%s\">%s</a>"
64            www-ids-find-char-viewer-url
65            (www-uri-encode-object c)
66            (www-format-encode-string (char-to-string c))))
67   ;; (let ((str (encode-coding-string (format "%c" c) 'utf-8-er))
68   ;;       plane code)
69   ;;   (princ
70   ;;    (with-temp-buffer
71   ;;      (cond
72   ;;       ((string-match "&CB\\([0-9]+\\);" str)
73   ;;        (setq code (string-to-int (match-string 1 str)))
74   ;;        (insert (format "<a href=\"%s"
75   ;;                        www-ids-find-char-viewer-url))
76   ;;        (insert str)
77   ;;        (insert (format "\"><img alt=\"CB%05d\" src=\"/glyphs/cb-gaiji/%02d/CB%05d.gif\">\n"
78   ;;                        code (/ code 1000) code))
79   ;;        (when code-desc
80   ;;          (insert (format "CB%05d</a>" code)))
81   ;;        )
82   ;;       ((string-match "&JC3-\\([0-9A-F]+\\);" str)
83   ;;        (setq code (string-to-int (match-string 1 str) 16))
84   ;;        (insert (format "<a href=\"%s"
85   ;;                        www-ids-find-char-viewer-url))
86   ;;        (insert str)
87   ;;        (insert (format "\"><img alt=\"JC3-%04X\" src=\"http://kanji.zinbun.kyoto-u.ac.jp/db/CHINA3/Gaiji/%04x.gif\">\n"
88   ;;                        code code))
89   ;;        (when code-desc
90   ;;          (insert (format "JC3-%04X</a>" code)))
91   ;;        )
92   ;;       ((string-match "&J\\(78\\|83\\|90\\|SP\\)-\\([0-9A-F]+\\);" str)
93   ;;        (setq plane (match-string 1 str)
94   ;;              code (string-to-int (match-string 2 str) 16))
95   ;;        (insert (format "<a href=\"%s"
96   ;;                        www-ids-find-char-viewer-url))
97   ;;        (insert str)
98   ;;        (insert (format "\"><img alt=\"J%s-%04X\" src=\"/glyphs/JIS-%s/%02d-%02d.gif\">\n"
99   ;;                        plane code plane
100   ;;                        (- (lsh code -8) 32)
101   ;;                        (- (logand code 255) 32)))
102   ;;        (when code-desc
103   ;;          (insert (format "J%s-%04X</a>" plane code)))
104   ;;        )
105   ;;       ((string-match "&G\\([01]\\)-\\([0-9A-F]+\\);" str)
106   ;;        (setq plane (string-to-int (match-string 1 str))
107   ;;              code (string-to-int (match-string 2 str) 16))
108   ;;        (insert (format "<a href=\"%s"
109   ;;                        www-ids-find-char-viewer-url))
110   ;;        (insert str)
111   ;;        (insert (format "\"><img alt=\"G%d-%04X\" src=\"/glyphs/GB%d/%02d-%02d.gif\">\n"
112   ;;                        plane code plane
113   ;;                        (- (lsh code -8) 32)
114   ;;                        (- (logand code 255) 32)))
115   ;;        (when code-desc
116   ;;          (insert (format "G%d-%04X</a>" plane code)))
117   ;;        )
118   ;;       ((string-match "&C\\([1-7]\\)-\\([0-9A-F]+\\);" str)
119   ;;        (setq plane (string-to-int (match-string 1 str))
120   ;;              code (string-to-int (match-string 2 str) 16))
121   ;;        (insert (format "<a href=\"%s"
122   ;;                        www-ids-find-char-viewer-url))
123   ;;        (insert str)
124   ;;        (insert (format "\"><img alt=\"C%d-%04X\" src=\"/glyphs/CNS%d/%04X.gif\">\n"
125   ;;                        plane code plane code))
126   ;;        (when code-desc
127   ;;          (insert (format "C%d-%04X</a>" plane code)))
128   ;;        )
129   ;;       ((string-match "&ZOB-\\([0-9]+\\);" str)
130   ;;        (setq code (string-to-int (match-string 1 str)))
131   ;;        (insert (format "<a href=\"%s"
132   ;;                        www-ids-find-char-viewer-url))
133   ;;        (insert str)
134   ;;        (insert (format "\"><img alt=\"ZOB-%04d\" src=\"/glyphs/ZOB-1968/%04d.png\">\n"
135   ;;                        code code))
136   ;;        (when code-desc
137   ;;          (insert (format "ZOB-%04d</a>" code)))
138   ;;        )
139   ;;       (t
140   ;;        (insert (format "<a href=\"%s"
141   ;;                        www-ids-find-char-viewer-url))
142   ;;        ;; (insert str)
143   ;;        (insert
144   ;;         (mapconcat (lambda (c)
145   ;;                      (if (<= (char-int c) #x7F)
146   ;;                          (char-to-string c)
147   ;;                        (format "%%%02X" c)))
148   ;;                    str ""))
149   ;;        (insert "\">")
150   ;;        (insert str)
151   ;;        (insert "</a>")
152   ;;        ))
153   ;;      (goto-char (point-min))
154   ;;      (while (search-forward "&" nil t)
155   ;;        (replace-match "&amp;" t 'literal))
156   ;;      (buffer-string))))
157   )
158   
159 (defun www-ids-find-format-line (c is)
160   (let (ucs len i ids)
161     (princ "<span class=\"entry\">")
162     (www-ids-find-format-char c 'code-desc)
163     (princ "</span>")
164     (princ
165      (or (if (setq ucs (or (char-ucs c)
166                            (encode-char c 'ucs)))
167              (format
168               " <a href=\"http://www.unicode.org/cgi-bin/GetUnihanData.pl?codepoint=%X\">%s</a>"
169               ucs
170               (cond ((<= ucs #xFFFF)
171                      (format "U+%04X" ucs))
172                     ((<= ucs #x10FFFF)
173                      (format "U-%08X" ucs))))
174            "          ")))
175     (when ucs
176       (princ
177        (format " <a href=\"%s%X\">(link map)</a>"
178                www-ids-find-chise-link-map-url-prefix ucs)))
179     (princ " ")
180     (when is
181       (setq ids (ideographic-structure-to-ids is))
182       (setq i 0
183             len (length ids))
184       (princ "<span class=\"ids\">")      
185       (while (< i len)
186         (www-ids-find-format-char (aref ids i))
187         (setq i (1+ i)))
188       (princ "</span>"))
189     (when (and ucs
190                (with-current-buffer
191                    (find-file-noselect
192                     www-ids-find-tang-chars-file-name)
193                  (goto-char (point-min))
194                  (re-search-forward (format "^%d$" ucs) nil t)))
195       (princ
196        (format " <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar?query=%s\">"
197                (mapconcat
198                 (lambda (c)
199                   (format "%%%02X" (char-int c)))
200                 (encode-coding-string (char-to-string c)
201                                       'utf-8-jp)
202                 "")))
203       (princ (encode-coding-string "\e$B"M\e(B[\e$BEbBeBsK\\e(B]</a>" 'utf-8-jp-er)))
204     (princ "<br>\n")))
205
206 (defun www-ids-insert-chars-including-components (components
207                                                   &optional ignored-chars)
208   (let ((ret (ideographic-products-find components))
209         products
210         is as bs len ignore-children)
211     (dolist (char ret)
212       (if (char-hng-p char)
213           (setq products (cons char products))))
214     (setq len (length products))
215     (when (>= len 1024)
216       (setq ignore-children t)
217       (princ
218        (encode-coding-string
219         "<p>\e$B7k2L$,B?$9$.$k$?$a!":F5"E*8!:w$r>JN,$7$^$7$?!#\e(B</p>"
220         'utf-8-jp-er)))
221     (if (>= len 2048)
222         (dolist (c products)
223           (www-ids-find-format-char c))
224       (princ "<ul>\n")
225       (dolist (c (cond
226                   ;; ((>= len 2048)
227                   ;;  (setq ignore-children t)
228                   ;;  products)
229                   ;; ((>= len 1024)
230                   ;;  products)
231                   ((>= len 1024)
232                    (sort (copy-list products)
233                          (lambda (a b)
234                            (< (char-int a)(char-int b))))
235                    )
236                   ((>= len 512)
237                    (sort (copy-list products)
238                          (lambda (a b)
239                            (if (setq as (char-total-strokes a))
240                                (if (setq bs (char-total-strokes b))
241                                    (if (= as bs)
242                                        (< (char-int a)(char-int b))
243                                      (< as bs))
244                                  t)
245                              (< (char-int a)(char-int b)))))
246                    )
247                   (t
248                    (sort (copy-list products)
249                          (lambda (a b)
250                            (if (setq as (char-total-strokes a))
251                                (if (setq bs (char-total-strokes b))
252                                    (if (= as bs)
253                                        (ideograph-char< a b)
254                                      (< as bs))
255                                  t)
256                              (ideograph-char< a b))))
257                    )))
258         (unless (memq c ignored-chars)
259           (setq is (char-feature c 'ideographic-structure))
260           (princ "<li>")
261           (www-ids-find-format-line c is)
262           (unless ignore-children
263             ;; (princ "<ul>\n")
264             (setq ignored-chars
265                   (www-ids-insert-chars-including-components
266                    (char-to-string c)
267                    (cons c ignored-chars)))
268             ;; (princ "</ul>\n")
269             ))
270         )
271       (princ "</ul>\n")
272       ))
273   ignored-chars)
274
275 (defun www-batch-ids-find ()
276   (let ((components (car command-line-args-left))
277         (coded-charset-entity-reference-alist
278          (list*
279           '(=cns11643-1         "C1-" 4 X)
280           '(=cns11643-2         "C2-" 4 X)
281           '(=cns11643-3         "C3-" 4 X)
282           '(=cns11643-4         "C4-" 4 X)
283           '(=cns11643-5         "C5-" 4 X)
284           '(=cns11643-6         "C6-" 4 X)
285           '(=cns11643-7         "C7-" 4 X)
286           '(=gb2312             "G0-" 4 X)
287           '(=gb12345            "G1-" 4 X)
288           '(=jis-x0208@1990     "J90-" 4 X)
289           '(=jis-x0212          "JSP-" 4 X)
290           '(=cbeta              "CB" 5 d)
291           '(=jef-china3         "JC3-" 4 X)
292           '(=jis-x0208@1978     "J78-" 4 X)
293           '(=jis-x0208@1983     "J83-" 4 X)
294           '(=daikanwa           "M-" 5 d)
295           coded-charset-entity-reference-alist))
296         )
297     (setq command-line-args-left (cdr command-line-args-left))
298     (cond
299      ((stringp components)
300       (if (string-match "^components=" components)
301           (setq components (substring components (match-end 0))))
302       (setq components
303             (if (> (length components) 0)
304                 (decode-url-string components 'utf-8-er)
305               nil))
306       )
307      (t
308       (setq components nil)
309       ))
310     (princ "Content-Type: text/html; charset=UTF-8
311
312 <!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"
313             \"http://www.w3.org/TR/html4/loose.dtd\">
314 <html lang=\"ja\">
315 <head>
316 <title>CHISE IDS Find</title>
317 <style type=\"text/css\">
318 <!--
319 .entry { font-size: 36px; }
320 .entry a img { height: 36px; }
321 .ids { font-size: 24px; }
322 .ids a img { height: 24px; }
323 img { vertical-align:middle; }
324 a { text-decoration:none; }
325 ul { margin: 0 0; }
326 li { margin: 0 0 -0.2em; }
327 .tooltip {
328     position: relative;
329     display: inline-block;
330 }
331 .tooltip .tooltiptext {
332     display: none;
333 }
334 -->
335 </style>
336 </head>
337
338 <body>
339
340 <h1>")
341     (princ (encode-coding-string "CHISE-IDS HNG \e$B4A;z8!:w\e(B" 'utf-8-jp-er))
342     (princ "</h1>")
343     (princ "
344 <p>Version ")
345     (princ www-hng-ids-find-version)
346     (princ (format-time-string
347             " (Last-modified: %Y-%m-%d %H:%M:%S)"
348             (nth 5
349                  (file-attributes
350                   www-ids-find-ideographic-products-file-name))))
351     (princ "
352 <hr>
353 <p>
354 <form action=\"/hng-ids-find\" method=\"GET\">
355 ")
356     (princ (encode-coding-string "\e$BItIJJ8;zNs\e(B" 'utf-8-jp-er))
357     (princ " <input type=\"text\" name=\"components\" size=\"30\" maxlength=\"30\" value=\"")
358     (if (> (length components) 0)
359         (princ (encode-coding-string components 'utf-8-er)))
360     (princ "\">
361 <input type=\"submit\" value=\"")
362     (princ (encode-coding-string "\e$B8!:w3+;O\e(B" 'utf-8-jp-er))
363     (princ "\">
364 </form>
365
366 ")
367     (unless (file-newer-than-file-p
368              www-ids-find-ideographic-products-file-name
369              (locate-file (car command-line-args) exec-path))
370       (princ (encode-coding-string "<hr>
371 <p>
372 \e$B8=:_!"%7%9%F%`$N99?7:n6HCf$G$9!#$7$P$i$/$*BT$A$/$@$5$$!#\e(B
373 <hr>
374 " 'utf-8-jp-er))
375       ;; (setq components nil)
376       )
377     (cond
378      (components
379       ;; (map-char-attribute
380       ;;  (lambda (c v)
381       ;;    (when (every (lambda (p)
382       ;;                   (ideographic-structure-member p v))
383       ;;                 components)
384       ;;      (princ (encode-coding-string
385       ;;              (ids-find-format-line c v)
386       ;;              'utf-8-jp-er))
387       ;;      (princ "<br>\n")
388       ;;      )
389       ;;    nil)
390       ;;  'ideographic-structure)
391       (when (= (length components) 1)
392         (www-ids-find-format-line (aref components 0)
393                                   (char-feature (aref components 0)
394                                                 'ideographic-structure)))
395       ;; (dolist (c (ideographic-products-find components))
396       ;;   (setq is (char-feature c 'ideographic-structure))
397       ;;   ;; to avoid problems caused by wrong indexes
398       ;;   (when (every (lambda (c)
399       ;;                  (ideographic-structure-member c is))
400       ;;                components)
401       ;;     (www-ids-find-format-line c is)))
402       ;; (princ "<ul>\n")
403       (www-ids-insert-chars-including-components components)
404       ;; (princ "</ul>\n")
405       )
406      (t
407       (princ (encode-coding-string "<hr>
408 <p>
409 \e$B;XDj$7$?ItIJ$rA4$F4^$`4A;z$N0lMw$rI=<($7$^$9!#\e(B
410 <p>
411 CHISE \e$B$GMQ$$$i$l$k<BBV;2>H7A<0!JNc!'\e(B&amp;M-00256;\e$B!K$GItIJ$r;XDj$9$k;v$b$G$-$^$9!#\e(B" 'utf-8-jp-er))
412       (princ (encode-coding-string "
413 <p>
414 \[Links\]
415 <ul>
416 <li><a href=\"http://www.shuiren.org/chuden/toyoshi/syoseki/chise_ids.html\"
417 >\e$B!V\e(BCHISE IDS FIND\e$B$G4A;z$r8!:w!W\e(B</a> \e$B!=\e(B \e$B;3ED?r?N$5$s!J\e(B<a
418 href=\"http://www.shuiren.org/\">\e$B?g?MDb\e(B</a>\e$B!K$K$h$k2r@b\e(B
419 </ul>
420 <ul>
421 <li><a href=\"http://www.karitsu.org/tools/firefox_plugin.htm\"
422 >Firefox \e$BMQ\e(B plugin</a> by \e$B=);3M[0lO:$5$s!J\e(B<a href=\"http://www.karitsu.org/\"
423 >\e$B2aN)c7\e(B</a>\e$B!K\e(B
424 </ul>
425 <ul>
426 <li><a href=\"http://cvs.m17n.org/viewcvs/chise/ids/www/www-ids-find.el?view=markup\"
427 >www-ids-find.el (source file (Emacs Lisp part))
428 <li><a href=\"http://www.chise.org/ids/\"
429 >\e$B!V\e(BCHISE \e$B4A;z9=B$>pJs%G!<%?%Y!<%9!W\e(B</a>
430 <li><a href=\"http://fonts.jp/chise_linkmap/\"
431 >\e$B!V\e(Bchise_linkmap : CHISE \e$B4A;zO"4D?^!W\e(B</a> by \e$B>eCO9(0l$5$s\e(B
432 <li><a href=\"http://www.chise.org/\"
433 >CHISE Project</a>
434 </ul>
435 <ul>
436 <li><a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar\"
437 >\e$B!VBsK\J8;z%G!<%?%Y!<%9!W\e(B</a> by
438 <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/\"
439 >\e$B5~ETBg3X\e(B21\e$B@$5*\e(BCOE\e$B!VEl%"%8%"@$3&$N?MJ8>pJs3X8&5f650i5rE@!W\e(B</a>
440 <li><a href=\"http://www.unicode.org/\"
441 >Unicode</a>
442 </ul>"
443  'utf-8-jp-er))
444
445       ))
446     (princ "<hr>")
447     (princ "<p>
448 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2015 <a href=\"http://kanji.zinbun.kyoto-u.ac.jp/~tomo/\"
449 >MORIOKA Tomohiko</a>")
450     (princ
451      (format
452       "<p>Powered by <a
453 href=\"http://www.chise.org/xemacs/\"
454 >XEmacs CHISE</a> %s."
455       (encode-coding-string xemacs-chise-version 'utf-8-jp-er)))
456     (princ "
457 </body>
458 </html>
459 ")))