New file.
[chise/ids.git] / www / www-hng-ids-find.el
1 (require 'ids-find)
2 (require 'cwiki-common)
3
4 (defvar hng-ccs-list
5   (let (dest)
6     (dolist (ccs (charset-list))
7       (when (string-match "^===hng-" (symbol-name ccs))
8         (setq dest (cons ccs dest))))
9     dest))
10
11 (defun char-hng-p (char)
12   (or (get-char-attribute char '->HNG)
13       (char-have-hng-p char)))
14
15 (defun char-have-hng-p (char)
16   (or (some (lambda (ccs)
17               (and (encode-char char ccs)
18                    char))
19             hng-ccs-list)
20       (some #'char-have-hng-p
21             (get-char-attribute char '->subsumptive))
22       (some #'char-have-hng-p
23             (get-char-attribute char '->denotational))))
24
25 (defun decode-url-string (string &optional coding-system)
26   (if (> (length string) 0)
27       (let ((i 0)
28             dest)
29         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
30           (setq dest (concat dest
31                              (substring string i (match-beginning 0))
32                              (char-to-string
33                               (int-char
34                                (string-to-int (match-string 1 string) 16))))
35                 i (match-end 0)))
36         (decode-coding-string
37          (concat dest (substring string i))
38          coding-system))))
39
40 (defconst www-hng-ids-find-version "0.26")
41
42 (defvar www-ids-find-ideographic-products-file-name
43   (expand-file-name "ideographic-products"
44                     (expand-file-name
45                      "feature"
46                      (expand-file-name
47                       "character"
48                       chise-system-db-directory))))
49
50 (defvar www-ids-find-char-viewer-url
51   "/est/view/character/")
52
53 (defvar www-ids-find-chise-link-map-url-prefix
54   "http://fonts.jp/chise_linkmap/map.cgi?code=")
55
56 (defvar www-ids-find-tang-chars-file-name
57   "~tomo/projects/chise/ids/www/tang-chars.udd")
58
59 (defun www-ids-find-format-char (c &optional code-desc)
60   (princ
61    (format "<a href=\"%s%s\">%s</a>"
62            www-ids-find-char-viewer-url
63            (www-uri-encode-object c)
64            (www-format-encode-string (char-to-string c))))
65   ;; (let ((str (encode-coding-string (format "%c" c) 'utf-8-er))
66   ;;       plane code)
67   ;;   (princ
68   ;;    (with-temp-buffer
69   ;;      (cond
70   ;;       ((string-match "&CB\\([0-9]+\\);" str)
71   ;;        (setq code (string-to-int (match-string 1 str)))
72   ;;        (insert (format "<a href=\"%s"
73   ;;                        www-ids-find-char-viewer-url))
74   ;;        (insert str)
75   ;;        (insert (format "\"><img alt=\"CB%05d\" src=\"/glyphs/cb-gaiji/%02d/CB%05d.gif\">\n"
76   ;;                        code (/ code 1000) code))
77   ;;        (when code-desc
78   ;;          (insert (format "CB%05d</a>" code)))
79   ;;        )
80   ;;       ((string-match "&JC3-\\([0-9A-F]+\\);" str)
81   ;;        (setq code (string-to-int (match-string 1 str) 16))
82   ;;        (insert (format "<a href=\"%s"
83   ;;                        www-ids-find-char-viewer-url))
84   ;;        (insert str)
85   ;;        (insert (format "\"><img alt=\"JC3-%04X\" src=\"http://kanji.zinbun.kyoto-u.ac.jp/db/CHINA3/Gaiji/%04x.gif\">\n"
86   ;;                        code code))
87   ;;        (when code-desc
88   ;;          (insert (format "JC3-%04X</a>" code)))
89   ;;        )
90   ;;       ((string-match "&J\\(78\\|83\\|90\\|SP\\)-\\([0-9A-F]+\\);" str)
91   ;;        (setq plane (match-string 1 str)
92   ;;              code (string-to-int (match-string 2 str) 16))
93   ;;        (insert (format "<a href=\"%s"
94   ;;                        www-ids-find-char-viewer-url))
95   ;;        (insert str)
96   ;;        (insert (format "\"><img alt=\"J%s-%04X\" src=\"/glyphs/JIS-%s/%02d-%02d.gif\">\n"
97   ;;                        plane code plane
98   ;;                        (- (lsh code -8) 32)
99   ;;                        (- (logand code 255) 32)))
100   ;;        (when code-desc
101   ;;          (insert (format "J%s-%04X</a>" plane code)))
102   ;;        )
103   ;;       ((string-match "&G\\([01]\\)-\\([0-9A-F]+\\);" str)
104   ;;        (setq plane (string-to-int (match-string 1 str))
105   ;;              code (string-to-int (match-string 2 str) 16))
106   ;;        (insert (format "<a href=\"%s"
107   ;;                        www-ids-find-char-viewer-url))
108   ;;        (insert str)
109   ;;        (insert (format "\"><img alt=\"G%d-%04X\" src=\"/glyphs/GB%d/%02d-%02d.gif\">\n"
110   ;;                        plane code plane
111   ;;                        (- (lsh code -8) 32)
112   ;;                        (- (logand code 255) 32)))
113   ;;        (when code-desc
114   ;;          (insert (format "G%d-%04X</a>" plane code)))
115   ;;        )
116   ;;       ((string-match "&C\\([1-7]\\)-\\([0-9A-F]+\\);" str)
117   ;;        (setq plane (string-to-int (match-string 1 str))
118   ;;              code (string-to-int (match-string 2 str) 16))
119   ;;        (insert (format "<a href=\"%s"
120   ;;                        www-ids-find-char-viewer-url))
121   ;;        (insert str)
122   ;;        (insert (format "\"><img alt=\"C%d-%04X\" src=\"/glyphs/CNS%d/%04X.gif\">\n"
123   ;;                        plane code plane code))
124   ;;        (when code-desc
125   ;;          (insert (format "C%d-%04X</a>" plane code)))
126   ;;        )
127   ;;       ((string-match "&ZOB-\\([0-9]+\\);" str)
128   ;;        (setq code (string-to-int (match-string 1 str)))
129   ;;        (insert (format "<a href=\"%s"
130   ;;                        www-ids-find-char-viewer-url))
131   ;;        (insert str)
132   ;;        (insert (format "\"><img alt=\"ZOB-%04d\" src=\"/glyphs/ZOB-1968/%04d.png\">\n"
133   ;;                        code code))
134   ;;        (when code-desc
135   ;;          (insert (format "ZOB-%04d</a>" code)))
136   ;;        )
137   ;;       (t
138   ;;        (insert (format "<a href=\"%s"
139   ;;                        www-ids-find-char-viewer-url))
140   ;;        ;; (insert str)
141   ;;        (insert
142   ;;         (mapconcat (lambda (c)
143   ;;                      (if (<= (char-int c) #x7F)
144   ;;                          (char-to-string c)
145   ;;                        (format "%%%02X" c)))
146   ;;                    str ""))
147   ;;        (insert "\">")
148   ;;        (insert str)
149   ;;        (insert "</a>")
150   ;;        ))
151   ;;      (goto-char (point-min))
152   ;;      (while (search-forward "&" nil t)
153   ;;        (replace-match "&amp;" t 'literal))
154   ;;      (buffer-string))))
155   )
156   
157 (defun www-ids-find-format-line (c is)
158   (let (ucs len i ids)
159     (www-ids-find-format-char c 'code-desc)
160     (princ
161      (or (if (setq ucs (or (char-ucs c)
162                            (encode-char c 'ucs)))
163              (format
164               " <a href=\"http://www.unicode.org/cgi-bin/GetUnihanData.pl?codepoint=%X\">%s</a>"
165               ucs
166               (cond ((<= ucs #xFFFF)
167                      (format "U+%04X" ucs))
168                     ((<= ucs #x10FFFF)
169                      (format "U-%08X" ucs))))
170            "          ")))
171     (when ucs
172       (princ
173        (format " <a href=\"%s%X\">(link map)</a>"
174                www-ids-find-chise-link-map-url-prefix ucs)))
175     (princ " ")
176     (when is
177       (setq ids (ideographic-structure-to-ids is))
178       (setq i 0
179             len (length ids))
180       (while (< i len)
181         (www-ids-find-format-char (aref ids i))
182         (setq i (1+ i))))
183     (when (and ucs
184                (with-current-buffer
185                    (find-file-noselect
186                     www-ids-find-tang-chars-file-name)
187                  (goto-char (point-min))
188                  (re-search-forward (format "^%d$" ucs) nil t)))
189       (princ
190        (format " <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar?query=%s\">"
191                (mapconcat
192                 (lambda (c)
193                   (format "%%%02X" (char-int c)))
194                 (encode-coding-string (char-to-string c)
195                                       'utf-8-jp)
196                 "")))
197       (princ (encode-coding-string "\e$B"M\e(B[\e$BEbBeBsK\\e(B]</a>" 'utf-8-jp-er)))
198     (princ "<br>\n")))
199
200 (defun www-ids-insert-chars-including-components (components
201                                                   &optional ignored-chars)
202   (let ((ret (ideographic-products-find components))
203         products
204         is as bs len ignore-children)
205     (dolist (char ret)
206       (if (char-hng-p char)
207           (setq products (cons char products))))
208     (setq len (length products))
209     (when (>= len 1024)
210       (setq ignore-children t)
211       (princ
212        (encode-coding-string
213         "<p>\e$B7k2L$,B?$9$.$k$?$a!":F5"E*8!:w$r>JN,$7$^$7$?!#\e(B</p>"
214         'utf-8-jp-er)))
215     (if (>= len 2048)
216         (dolist (c products)
217           (www-ids-find-format-char c))
218       (princ "<ul>\n")
219       (dolist (c (cond
220                   ;; ((>= len 2048)
221                   ;;  (setq ignore-children t)
222                   ;;  products)
223                   ;; ((>= len 1024)
224                   ;;  products)
225                   ((>= len 1024)
226                    (sort (copy-list products)
227                          (lambda (a b)
228                            (< (char-int a)(char-int b))))
229                    )
230                   ((>= len 512)
231                    (sort (copy-list products)
232                          (lambda (a b)
233                            (if (setq as (char-total-strokes a))
234                                (if (setq bs (char-total-strokes b))
235                                    (if (= as bs)
236                                        (< (char-int a)(char-int b))
237                                      (< as bs))
238                                  t)
239                              (< (char-int a)(char-int b)))))
240                    )
241                   (t
242                    (sort (copy-list products)
243                          (lambda (a b)
244                            (if (setq as (char-total-strokes a))
245                                (if (setq bs (char-total-strokes b))
246                                    (if (= as bs)
247                                        (ideograph-char< a b)
248                                      (< as bs))
249                                  t)
250                              (ideograph-char< a b))))
251                    )))
252         (unless (memq c ignored-chars)
253           (setq is (char-feature c 'ideographic-structure))
254           (princ "<li>")
255           (www-ids-find-format-line c is)
256           (unless ignore-children
257             ;; (princ "<ul>\n")
258             (setq ignored-chars
259                   (www-ids-insert-chars-including-components
260                    (char-to-string c)
261                    (cons c ignored-chars)))
262             ;; (princ "</ul>\n")
263             ))
264         )
265       (princ "</ul>\n")
266       ))
267   ignored-chars)
268
269 (defun www-batch-ids-find ()
270   (let ((components (car command-line-args-left))
271         (coded-charset-entity-reference-alist
272          (list*
273           '(=cns11643-1         "C1-" 4 X)
274           '(=cns11643-2         "C2-" 4 X)
275           '(=cns11643-3         "C3-" 4 X)
276           '(=cns11643-4         "C4-" 4 X)
277           '(=cns11643-5         "C5-" 4 X)
278           '(=cns11643-6         "C6-" 4 X)
279           '(=cns11643-7         "C7-" 4 X)
280           '(=gb2312             "G0-" 4 X)
281           '(=gb12345            "G1-" 4 X)
282           '(=jis-x0208@1990     "J90-" 4 X)
283           '(=jis-x0212          "JSP-" 4 X)
284           '(=cbeta              "CB" 5 d)
285           '(=jef-china3         "JC3-" 4 X)
286           '(=jis-x0208@1978     "J78-" 4 X)
287           '(=jis-x0208@1983     "J83-" 4 X)
288           '(=daikanwa           "M-" 5 d)
289           coded-charset-entity-reference-alist))
290         )
291     (setq command-line-args-left (cdr command-line-args-left))
292     (cond
293      ((stringp components)
294       (if (string-match "^components=" components)
295           (setq components (substring components (match-end 0))))
296       (setq components
297             (if (> (length components) 0)
298                 (decode-url-string components 'utf-8-er)
299               nil))
300       )
301      (t
302       (setq components nil)
303       ))
304     (princ "Content-Type: text/html; charset=UTF-8
305
306 <!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"
307             \"http://www.w3.org/TR/html4/loose.dtd\">
308 <html lang=\"ja\">
309 <head>
310 <title>CHISE IDS Find</title>
311 </head>
312
313 <body>
314
315 <h1>")
316     (princ (encode-coding-string "CHISE-IDS HNG \e$B4A;z8!:w\e(B" 'utf-8-jp-er))
317     (princ "</h1>")
318     (princ "
319 <p>Version ")
320     (princ www-hng-ids-find-version)
321     (princ (format-time-string
322             " (Last-modified: %Y-%m-%d %H:%M:%S)"
323             (nth 5
324                  (file-attributes
325                   www-ids-find-ideographic-products-file-name))))
326     (princ "
327 <hr>
328 <p>
329 <form action=\"/hng-ids-find\" method=\"GET\">
330 ")
331     (princ (encode-coding-string "\e$BItIJJ8;zNs\e(B" 'utf-8-jp-er))
332     (princ " <input type=\"text\" name=\"components\" size=\"30\" maxlength=\"30\" value=\"")
333     (if (> (length components) 0)
334         (princ (encode-coding-string components 'utf-8-er)))
335     (princ "\">
336 <input type=\"submit\" value=\"")
337     (princ (encode-coding-string "\e$B8!:w3+;O\e(B" 'utf-8-jp-er))
338     (princ "\">
339 </form>
340
341 ")
342     (unless (file-newer-than-file-p
343              www-ids-find-ideographic-products-file-name
344              (locate-file (car command-line-args) exec-path))
345       (princ (encode-coding-string "<hr>
346 <p>
347 \e$B8=:_!"%7%9%F%`$N99?7:n6HCf$G$9!#$7$P$i$/$*BT$A$/$@$5$$!#\e(B
348 <hr>
349 " 'utf-8-jp-er))
350       ;; (setq components nil)
351       )
352     (cond
353      (components
354       ;; (map-char-attribute
355       ;;  (lambda (c v)
356       ;;    (when (every (lambda (p)
357       ;;                   (ideographic-structure-member p v))
358       ;;                 components)
359       ;;      (princ (encode-coding-string
360       ;;              (ids-find-format-line c v)
361       ;;              'utf-8-jp-er))
362       ;;      (princ "<br>\n")
363       ;;      )
364       ;;    nil)
365       ;;  'ideographic-structure)
366       (when (= (length components) 1)
367         (www-ids-find-format-line (aref components 0)
368                                   (char-feature (aref components 0)
369                                                 'ideographic-structure)))
370       ;; (dolist (c (ideographic-products-find components))
371       ;;   (setq is (char-feature c 'ideographic-structure))
372       ;;   ;; to avoid problems caused by wrong indexes
373       ;;   (when (every (lambda (c)
374       ;;                  (ideographic-structure-member c is))
375       ;;                components)
376       ;;     (www-ids-find-format-line c is)))
377       ;; (princ "<ul>\n")
378       (www-ids-insert-chars-including-components components)
379       ;; (princ "</ul>\n")
380       )
381      (t
382       (princ (encode-coding-string "<hr>
383 <p>
384 \e$B;XDj$7$?ItIJ$rA4$F4^$`4A;z$N0lMw$rI=<($7$^$9!#\e(B
385 <p>
386 CHISE \e$B$GMQ$$$i$l$k<BBV;2>H7A<0!JNc!'\e(B&amp;M-00256;\e$B!K$GItIJ$r;XDj$9$k;v$b$G$-$^$9!#\e(B" 'utf-8-jp-er))
387       (princ (encode-coding-string "
388 <p>
389 \[Links\]
390 <ul>
391 <li><a href=\"http://www.shuiren.org/chuden/toyoshi/syoseki/chise_ids.html\"
392 >\e$B!V\e(BCHISE IDS FIND\e$B$G4A;z$r8!:w!W\e(B</a> \e$B!=\e(B \e$B;3ED?r?N$5$s!J\e(B<a
393 href=\"http://www.shuiren.org/\">\e$B?g?MDb\e(B</a>\e$B!K$K$h$k2r@b\e(B
394 </ul>
395 <ul>
396 <li><a href=\"http://www.karitsu.org/tools/firefox_plugin.htm\"
397 >Firefox \e$BMQ\e(B plugin</a> by \e$B=);3M[0lO:$5$s!J\e(B<a href=\"http://www.karitsu.org/\"
398 >\e$B2aN)c7\e(B</a>\e$B!K\e(B
399 </ul>
400 <ul>
401 <li><a href=\"http://cvs.m17n.org/viewcvs/chise/ids/www/www-ids-find.el?view=markup\"
402 >www-ids-find.el (source file (Emacs Lisp part))
403 <li><a href=\"http://www.chise.org/ids/\"
404 >\e$B!V\e(BCHISE \e$B4A;z9=B$>pJs%G!<%?%Y!<%9!W\e(B</a>
405 <li><a href=\"http://fonts.jp/chise_linkmap/\"
406 >\e$B!V\e(Bchise_linkmap : CHISE \e$B4A;zO"4D?^!W\e(B</a> by \e$B>eCO9(0l$5$s\e(B
407 <li><a href=\"http://www.chise.org/\"
408 >CHISE Project</a>
409 </ul>
410 <ul>
411 <li><a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar\"
412 >\e$B!VBsK\J8;z%G!<%?%Y!<%9!W\e(B</a> by
413 <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/\"
414 >\e$B5~ETBg3X\e(B21\e$B@$5*\e(BCOE\e$B!VEl%"%8%"@$3&$N?MJ8>pJs3X8&5f650i5rE@!W\e(B</a>
415 <li><a href=\"http://www.unicode.org/\"
416 >Unicode</a>
417 </ul>"
418  'utf-8-jp-er))
419
420       ))
421     (princ "<hr>")
422     (princ "<p>
423 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2015 <a href=\"http://kanji.zinbun.kyoto-u.ac.jp/~tomo/\"
424 >MORIOKA Tomohiko</a>")
425     (princ
426      (format
427       "<p>Powered by <a
428 href=\"http://www.chise.org/xemacs/\"
429 >XEmacs CHISE</a> %s."
430       (encode-coding-string xemacs-chise-version 'utf-8-jp-er)))
431     (princ "
432 </body>
433 </html>
434 ")))