update.
[chise/ids.git] / www / www-ids-find.el
1 (require 'ids-find)
2 (require 'cwiki-common)
3
4 (setq www-format-char-img-style "vertical-align:middle;")
5
6 (defun decode-url-string (string &optional coding-system)
7   (if (> (length string) 0)
8       (let ((i 0)
9             dest)
10         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
11           (setq dest (concat dest
12                              (substring string i (match-beginning 0))
13                              (char-to-string
14                               (int-char
15                                (string-to-int (match-string 1 string) 16))))
16                 i (match-end 0)))
17         (decode-coding-string
18          (concat dest (substring string i))
19          coding-system))))
20
21 (defconst www-ids-find-version "0.99.1")
22
23 (defvar www-ids-find-ideographic-products-file-name
24   (expand-file-name "ideographic-products"
25                     (expand-file-name
26                      "feature"
27                      (expand-file-name
28                       "character"
29                       chise-system-db-directory))))
30
31 (defvar www-ids-find-char-viewer-url
32   "/est/view/character/")
33
34 (defvar www-ids-find-chise-link-map-url-prefix
35   "http://fonts.jp/chise_linkmap/map.cgi?code=")
36
37 (defvar www-ids-find-tang-chars-file-name
38   "~tomo/projects/chise/ids/www/tang-chars.udd")
39
40 (defun www-ids-find-format-char (c &optional code-desc)
41   (princ
42    (format "<a href=\"%s%s\">%s</a>"
43            www-ids-find-char-viewer-url
44            (www-uri-encode-object c)
45            (www-format-encode-string (char-to-string c))))
46   ;; (let ((str (encode-coding-string (format "%c" c) 'utf-8-er))
47   ;;       plane code)
48   ;;   (princ
49   ;;    (with-temp-buffer
50   ;;      (cond
51   ;;       ((string-match "&CB\\([0-9]+\\);" str)
52   ;;        (setq code (string-to-int (match-string 1 str)))
53   ;;        (insert (format "<a href=\"%s"
54   ;;                        www-ids-find-char-viewer-url))
55   ;;        (insert str)
56   ;;        (insert (format "\"><img alt=\"CB%05d\" src=\"/glyphs/cb-gaiji/%02d/CB%05d.gif\">\n"
57   ;;                        code (/ code 1000) code))
58   ;;        (when code-desc
59   ;;          (insert (format "CB%05d</a>" code)))
60   ;;        )
61   ;;       ((string-match "&JC3-\\([0-9A-F]+\\);" str)
62   ;;        (setq code (string-to-int (match-string 1 str) 16))
63   ;;        (insert (format "<a href=\"%s"
64   ;;                        www-ids-find-char-viewer-url))
65   ;;        (insert str)
66   ;;        (insert (format "\"><img alt=\"JC3-%04X\" src=\"http://kanji.zinbun.kyoto-u.ac.jp/db/CHINA3/Gaiji/%04x.gif\">\n"
67   ;;                        code code))
68   ;;        (when code-desc
69   ;;          (insert (format "JC3-%04X</a>" code)))
70   ;;        )
71   ;;       ((string-match "&J\\(78\\|83\\|90\\|SP\\)-\\([0-9A-F]+\\);" str)
72   ;;        (setq plane (match-string 1 str)
73   ;;              code (string-to-int (match-string 2 str) 16))
74   ;;        (insert (format "<a href=\"%s"
75   ;;                        www-ids-find-char-viewer-url))
76   ;;        (insert str)
77   ;;        (insert (format "\"><img alt=\"J%s-%04X\" src=\"/glyphs/JIS-%s/%02d-%02d.gif\">\n"
78   ;;                        plane code plane
79   ;;                        (- (lsh code -8) 32)
80   ;;                        (- (logand code 255) 32)))
81   ;;        (when code-desc
82   ;;          (insert (format "J%s-%04X</a>" plane code)))
83   ;;        )
84   ;;       ((string-match "&G\\([01]\\)-\\([0-9A-F]+\\);" str)
85   ;;        (setq plane (string-to-int (match-string 1 str))
86   ;;              code (string-to-int (match-string 2 str) 16))
87   ;;        (insert (format "<a href=\"%s"
88   ;;                        www-ids-find-char-viewer-url))
89   ;;        (insert str)
90   ;;        (insert (format "\"><img alt=\"G%d-%04X\" src=\"/glyphs/GB%d/%02d-%02d.gif\">\n"
91   ;;                        plane code plane
92   ;;                        (- (lsh code -8) 32)
93   ;;                        (- (logand code 255) 32)))
94   ;;        (when code-desc
95   ;;          (insert (format "G%d-%04X</a>" plane code)))
96   ;;        )
97   ;;       ((string-match "&C\\([1-7]\\)-\\([0-9A-F]+\\);" str)
98   ;;        (setq plane (string-to-int (match-string 1 str))
99   ;;              code (string-to-int (match-string 2 str) 16))
100   ;;        (insert (format "<a href=\"%s"
101   ;;                        www-ids-find-char-viewer-url))
102   ;;        (insert str)
103   ;;        (insert (format "\"><img alt=\"C%d-%04X\" src=\"/glyphs/CNS%d/%04X.gif\">\n"
104   ;;                        plane code plane code))
105   ;;        (when code-desc
106   ;;          (insert (format "C%d-%04X</a>" plane code)))
107   ;;        )
108   ;;       ((string-match "&ZOB-\\([0-9]+\\);" str)
109   ;;        (setq code (string-to-int (match-string 1 str)))
110   ;;        (insert (format "<a href=\"%s"
111   ;;                        www-ids-find-char-viewer-url))
112   ;;        (insert str)
113   ;;        (insert (format "\"><img alt=\"ZOB-%04d\" src=\"/glyphs/ZOB-1968/%04d.png\">\n"
114   ;;                        code code))
115   ;;        (when code-desc
116   ;;          (insert (format "ZOB-%04d</a>" code)))
117   ;;        )
118   ;;       (t
119   ;;        (insert (format "<a href=\"%s"
120   ;;                        www-ids-find-char-viewer-url))
121   ;;        ;; (insert str)
122   ;;        (insert
123   ;;         (mapconcat (lambda (c)
124   ;;                      (if (<= (char-int c) #x7F)
125   ;;                          (char-to-string c)
126   ;;                        (format "%%%02X" c)))
127   ;;                    str ""))
128   ;;        (insert "\">")
129   ;;        (insert str)
130   ;;        (insert "</a>")
131   ;;        ))
132   ;;      (goto-char (point-min))
133   ;;      (while (search-forward "&" nil t)
134   ;;        (replace-match "&amp;" t 'literal))
135   ;;      (buffer-string))))
136   )
137   
138 (defun www-ids-find-format-line (c is)
139   (let (ucs len i ids)
140     (princ "<span class=\"entry\">")
141     (www-ids-find-format-char c 'code-desc)
142     (princ "</span>")
143     (princ
144      (or (if (setq ucs (or (char-ucs c)
145                            (encode-char c 'ucs)))
146              (format
147               " <a href=\"http://www.unicode.org/cgi-bin/GetUnihanData.pl?codepoint=%X\">%s</a>"
148               ucs
149               (cond ((<= ucs #xFFFF)
150                      (format "U+%04X" ucs))
151                     ((<= ucs #x10FFFF)
152                      (format "U-%08X" ucs))))
153            "          ")))
154     (when ucs
155       (princ
156        (format " <a href=\"%s%X\">(link map)</a>"
157                www-ids-find-chise-link-map-url-prefix ucs)))
158     (princ " ")
159     (when is
160       (setq ids (ideographic-structure-to-ids is))
161       (setq i 0
162             len (length ids))
163       (princ "<span class=\"ids\">")      
164       (while (< i len)
165         (www-ids-find-format-char (aref ids i))
166         (setq i (1+ i)))
167       (princ "</span>"))
168     (when (and ucs
169                (with-current-buffer
170                    (find-file-noselect
171                     www-ids-find-tang-chars-file-name)
172                  (goto-char (point-min))
173                  (re-search-forward (format "^%d$" ucs) nil t)))
174       (princ
175        (format " <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar?query=%s\">"
176                (mapconcat
177                 (lambda (c)
178                   (format "%%%02X" (char-int c)))
179                 (encode-coding-string (char-to-string c)
180                                       'utf-8-jp)
181                 "")))
182       (princ (encode-coding-string "\e$B"M\e(B[\e$BEbBeBsK\\e(B]</a>" 'utf-8-jp-er)))
183     (princ "<br>\n")))
184
185 (defun www-ids-insert-chars-including-components* (components
186                                                    &optional ignored-chars products)
187   (unless products
188     (setq products (ideograph-find-products components ignored-chars)))
189   (let (is as bs len)
190     (setq len (length products))
191     (princ "<ul>\n")
192     (dolist (c (cond
193                 ((>= len 1024)
194                  (sort (copy-list products)
195                        (lambda (a b)
196                          (< (char-int a)(char-int b))))
197                  )
198                 ((>= len 512)
199                  (sort (copy-list products)
200                        (lambda (a b)
201                          (if (setq as (char-total-strokes a))
202                              (if (setq bs (char-total-strokes b))
203                                  (if (= as bs)
204                                      (< (char-int a)(char-int b))
205                                    (< as bs))
206                                t)
207                            (< (char-int a)(char-int b)))))
208                  )
209                 (t
210                  (sort (copy-list products)
211                        (lambda (a b)
212                          (if (setq as (char-total-strokes a))
213                              (if (setq bs (char-total-strokes b))
214                                  (if (= as bs)
215                                      (ideograph-char< a b)
216                                    (< as bs))
217                                t)
218                            (ideograph-char< a b))))
219                  )))
220       (unless (memq c ignored-chars)
221         (setq is (char-feature c 'ideographic-structure))
222         (princ "<li>")
223         (www-ids-find-format-line c is)
224         (setq ignored-chars
225               (www-ids-insert-chars-including-components*
226                (char-to-string c) (cons c ignored-chars)))
227         )
228       )
229     (princ "</ul>\n")
230     )
231   ignored-chars)
232
233 (defun www-ids-insert-chars-including-components (components
234                                                   &optional ignored-chars)
235   (let ((products (ideograph-find-products components ignored-chars))
236         is as bs len ignore-children)
237     (setq len (length products))
238     (when (>= len 1024)
239       (setq ignore-children t)
240       (princ
241        (encode-coding-string
242         "<p>\e$B7k2L$,B?$9$.$k$?$a!":F5"E*8!:w$r>JN,$7$^$7$?!#\e(B</p>"
243         'utf-8-jp-er)))
244     (if (>= len 2048)
245         (dolist (c products)
246           (www-ids-find-format-char c))
247       (setq ignored-chars
248             (nreverse
249              (www-ids-insert-chars-including-components* components ignored-chars products)))
250       (dolist (c ignored-chars)
251         (dolist (vc (char-component-variants c))
252           (unless (memq vc ignored-chars)
253             (when (setq is (get-char-attribute vc 'ideographic-structure))
254               (princ "<li>")
255               (www-ids-find-format-line vc is)
256               (setq ignored-chars
257                     (www-ids-insert-chars-including-components*
258                      (char-to-string vc)
259                      (cons vc ignored-chars)))))))
260       (setq products (ideograph-find-products-with-variants components ignored-chars))
261       (setq len (length products))
262       (when (>= len 512)
263         (setq ignore-children t)
264         (princ
265          (encode-coding-string
266           "<p>\e$B7k2L$,B?$9$.$k$?$a!"4XO";z$N:F5"E*8!:w$r>JN,$7$^$7$?!#\e(B</p>"
267           'utf-8-jp-er)))
268       (if (>= len 1024)
269           (dolist (c products)
270             (www-ids-find-format-char c))
271         (dolist (c (sort (copy-tree products)
272                          (lambda (a b)
273                            (if (setq as (char-total-strokes a))
274                                (if (setq bs (char-total-strokes b))
275                                    (if (= as bs)
276                                        (ideograph-char< a b)
277                                      (< as bs))
278                                  t)
279                              (ideograph-char< a b)))))
280           (unless (memq c ignored-chars)
281             (setq is (get-char-attribute c 'ideographic-structure))
282             (princ "<li>")
283             (www-ids-find-format-line c is)
284             (unless ignore-children
285               (setq ignored-chars
286                     (www-ids-insert-chars-including-components*
287                      (char-to-string c)
288                      (cons c ignored-chars))))
289             ))
290         ))
291     )
292   ignored-chars)
293
294 (defun www-batch-ids-find ()
295   (let ((components (car command-line-args-left))
296         (coded-charset-entity-reference-alist
297          (list*
298           '(=cns11643-1         "C1-" 4 X)
299           '(=cns11643-2         "C2-" 4 X)
300           '(=cns11643-3         "C3-" 4 X)
301           '(=cns11643-4         "C4-" 4 X)
302           '(=cns11643-5         "C5-" 4 X)
303           '(=cns11643-6         "C6-" 4 X)
304           '(=cns11643-7         "C7-" 4 X)
305           '(=gb2312             "G0-" 4 X)
306           '(=gb12345            "G1-" 4 X)
307           '(=jis-x0208@1990     "J90-" 4 X)
308           '(=jis-x0212          "JSP-" 4 X)
309           '(=cbeta              "CB" 5 d)
310           '(=jef-china3         "JC3-" 4 X)
311           '(=jis-x0208@1978     "J78-" 4 X)
312           '(=jis-x0208@1983     "J83-" 4 X)
313           '(=daikanwa           "M-" 5 d)
314           coded-charset-entity-reference-alist))
315         )
316     (setq command-line-args-left (cdr command-line-args-left))
317     (cond
318      ((stringp components)
319       (if (string-match "^components=" components)
320           (setq components (substring components (match-end 0))))
321       (setq components
322             (if (> (length components) 0)
323                 (decode-url-string components 'utf-8-er)
324               nil))
325       )
326      (t
327       (setq components nil)
328       ))
329     (princ "Content-Type: text/html; charset=UTF-8
330
331 <!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"
332             \"http://www.w3.org/TR/html4/loose.dtd\">
333 <html lang=\"ja\">
334 <head>
335 <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">
336 <title>CHISE IDS Find</title>
337 <link href=\"/css/bootstrap-4.5.0.min.css\" rel=\"stylesheet\">
338 <style type=\"text/css\">
339 <!--
340 .entry { font-size: 36px; }
341 .entry a img { height: 36px; }
342 .ids { font-size: 24px; }
343 .ids a img { height: 24px; }
344 img { vertical-align:middle; }
345 a { text-decoration:none; }
346 ul { margin: 0 0; }
347 li { margin: 0 0 -0.2em; }
348 .tooltip {
349     position: relative;
350     display: inline-block;
351 }
352 .tooltip .tooltiptext {
353     display: none;
354 }
355 -->
356 </style>
357 </head>
358
359 <body>
360
361 <div class=\"jumbotron jumbotron-fluid mb-0\">
362 <h1 class=\"display-4 text-center\">")
363     (princ (encode-coding-string "CHISE IDS \e$B4A;z8!:w\e(B" 'utf-8-jp-er))
364     (princ "</h1>")
365     (princ "
366 <p class=\"text-center\">Version ")
367     (princ www-ids-find-version)
368     (princ (format-time-string
369             " (Last-modified: %Y-%m-%d %H:%M:%S)</p>"
370             (nth 5
371                  (file-attributes
372                   www-ids-find-ideographic-products-file-name))))
373     (princ "
374 </div>
375 <div class=\"container mt-0 mw-100 d-inline-block align-top bg-dark\">
376 <p />
377 <div class=\"input-group mb-3 h3 my-4\">
378 <div class=\"input-group-prepend mw-75 ml-3\">
379 <form action=\"/ids-find\" method=\"GET\">
380 <span class=\"input-group-text\" id=\"basic-addon1\">
381 ")
382     (princ (encode-coding-string "\e$BItIJJ8;zNs\e(B" 'utf-8-jp-er))
383     (princ "</span>
384 </div>
385 <input type=\"text\" class=\"form-control\" aria-describedby=\"basic-addon1\" name=\"components\" size=\"30\" maxlength=\"30\" value=\"")
386     (if (> (length components) 0)
387         (princ (encode-coding-string components 'utf-8-er)))
388     (princ "\">
389 <input class=\"mr-3\" type=\"submit\" value=\"")
390     (princ (encode-coding-string "\e$B8!:w3+;O\e(B" 'utf-8-jp-er))
391     (princ "\">
392 </form>
393 </div>
394 </div>
395
396 ")
397     (unless (file-newer-than-file-p
398              www-ids-find-ideographic-products-file-name
399              (locate-file (car command-line-args) exec-path))
400       (princ (encode-coding-string "<hr>
401 <p>
402 \e$B8=:_!"%7%9%F%`$N99?7:n6HCf$G$9!#$7$P$i$/$*BT$A$/$@$5$$!#\e(B
403 <hr>
404 " 'utf-8-jp-er))
405       ;; (setq components nil)
406       )
407     (cond
408      (components
409       (princ "<div class=\"container\">
410 ")
411       ;; (map-char-attribute
412       ;;  (lambda (c v)
413       ;;    (when (every (lambda (p)
414       ;;                   (ideographic-structure-member p v))
415       ;;                 components)
416       ;;      (princ (encode-coding-string
417       ;;              (ids-find-format-line c v)
418       ;;              'utf-8-jp-er))
419       ;;      (princ "<br>\n")
420       ;;      )
421       ;;    nil)
422       ;;  'ideographic-structure)
423       (when (= (length components) 1)
424         (www-ids-find-format-line (aref components 0)
425                                   (char-feature (aref components 0)
426                                                 'ideographic-structure)))
427       ;; (dolist (c (ideographic-products-find components))
428       ;;   (setq is (char-feature c 'ideographic-structure))
429       ;;   ;; to avoid problems caused by wrong indexes
430       ;;   (when (every (lambda (c)
431       ;;                  (ideographic-structure-member c is))
432       ;;                components)
433       ;;     (www-ids-find-format-line c is)))
434       ;; (princ "<ul>\n")
435       (www-ids-insert-chars-including-components components)
436       ;; (princ "</ul>\n")
437       (princ "</div>\n")
438       )
439      (t
440       (princ (encode-coding-string "<div class=\"container mt-4\">
441 <div class=\"ml-3\">
442 <p>
443 \e$B;XDj$7$?ItIJ$rA4$F4^$`4A;z$N0lMw$rI=<($7$^$9!#\e(B
444 </p>
445 <p>
446 CHISE \e$B$GMQ$$$i$l$k<BBV;2>H7A<0!JNc!'\e(B&amp;M-00256;\e$B!K$GItIJ$r;XDj$9$k;v$b$G$-$^$9!#\e(B
447 </p>
448 </div>
449 " 'utf-8-jp-er))
450       (princ (encode-coding-string "
451 <p  class=\"ml-0\">
452 \[Links\]
453 <ul>
454 <li><a href=\"http://www.shuiren.org/chuden/toyoshi/syoseki/chise_ids.html\"
455 >\e$B!V\e(BCHISE IDS FIND\e$B$G4A;z$r8!:w!W\e(B</a> \e$B!=\e(B \e$B;3ED?r?N$5$s!J\e(B<a
456 href=\"http://www.shuiren.org/\">\e$B?g?MDb\e(B</a>\e$B!K$K$h$k2r@b\e(B
457 </ul>
458 <ul>
459 <li><a href=\"http://www.karitsu.org/tools/firefox_plugin.htm\"
460 >Firefox \e$BMQ\e(B plugin</a> by \e$B=);3M[0lO:$5$s!J\e(B<a href=\"http://www.karitsu.org/\"
461 >\e$B2aN)c7\e(B</a>\e$B!K\e(B
462 </ul>
463 <ul>
464 <li><a href=\"http://git.chise.org/gitweb/?p=chise/ids.git;a=blob;f=www/www-ids-find.el\"
465 >www-ids-find.el (source file (Emacs Lisp part))
466 <li><a href=\"http://www.chise.org/ids/\"
467 >\e$B!V\e(BCHISE \e$B4A;z9=B$>pJs%G!<%?%Y!<%9!W\e(B</a>
468 <li><a href=\"http://fonts.jp/chise_linkmap/\"
469 >\e$B!V\e(Bchise_linkmap : CHISE \e$B4A;zO"4D?^!W\e(B</a> by \e$B>eCO9(0l$5$s\e(B
470 <li><a href=\"http://www.chise.org/\"
471 >CHISE Project</a>
472 </ul>
473 <ul>
474 <li><a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar\"
475 >\e$B!VBsK\J8;z%G!<%?%Y!<%9!W\e(B</a> by
476 <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/\"
477 >\e$B5~ETBg3X\e(B21\e$B@$5*\e(BCOE\e$B!VEl%"%8%"@$3&$N?MJ8>pJs3X8&5f650i5rE@!W\e(B</a>
478 <li><a href=\"http://www.unicode.org/\"
479 >Unicode</a>
480 </ul>
481 </p>
482 </div>
483 "
484  'utf-8-jp-er))
485
486       ))
487     (princ "<hr>
488 <div class=\"container\">
489 ")
490     (princ "<div class=\"ml-0\">
491 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2015, 2016, 2017, 2020 <a href=\"http://kanji.zinbun.kyoto-u.ac.jp/~tomo/\"
492 >MORIOKA Tomohiko</a></div>")
493     (princ
494      (format
495       "<div>Powered by <a
496 href=\"http://www.chise.org/xemacs/\"
497 >XEmacs CHISE</a> %s.</div>"
498       (encode-coding-string xemacs-chise-version 'utf-8-jp-er)))
499     (princ "
500 </div>
501 </body>
502 </html>
503 ")))