(www-batch-ids-find): Display a brief description when components are
[chise/ids.git] / www / www-ids-find.el
1 (require 'ids-find)
2
3 (defun decode-url-string (string &optional coding-system)
4   (if (> (length string) 0)
5       (let ((i 0)
6             dest)
7         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
8           (setq dest (concat dest
9                              (substring string i (match-beginning 0))
10                              (char-to-string
11                               (int-char
12                                (string-to-int (match-string 1 string) 16))))
13                 i (match-end 0)))
14         (decode-coding-string
15          (concat dest (substring string i))
16          coding-system))))
17
18 (defvar www-ids-find-tang-chars-file-name
19   "~tomo/projects/chise/ids/www/tang-chars.udd")
20
21 (defun www-batch-ids-find ()
22   (let ((components (car command-line-args-left))
23         (coded-charset-entity-reference-alist
24          (list*
25           '((=cbeta      "CB" 5 d)
26             (=jef-china3 "JC3-" 4 X))
27           coded-charset-entity-reference-alist))
28         is ucs str code)
29     (setq command-line-args-left (cdr command-line-args-left))
30     (cond
31      ((stringp components)
32       (if (string-match "^components=" components)
33           (setq components (substring components (match-end 0))))
34       (setq components
35             (if (> (length components) 0)
36                 (decode-url-string components 'utf-8-jp-er)
37               nil))
38       )
39      (t
40       (setq components nil)
41       ))
42     (princ "Content-Type: text/html; charset=\"UTF-8\"
43
44 <!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"
45             \"http://www.w3.org/TR/html4/loose.dtd\">
46 <html lang=\"ja\">
47 <head>
48 <title>CHISE IDS Find</title>
49 </head>
50
51 <body>
52
53 <h1>")
54     (princ (encode-coding-string "CHISE IDS \e$B4A;z8!:w\e(B" 'utf-8-jp-er))
55     (princ "</h1>
56 <p>
57 <form action=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/ids-find\" method=\"GET\">
58 ")
59     (princ (encode-coding-string "\e$BItIJJ8;zNs\e(B" 'utf-8-jp-er))
60     (princ " <input type=\"text\" name=\"components\" size=\"30\" maxlength=\"30\" value=\"")
61     (if (> (length components) 0)
62         (princ (encode-coding-string components 'utf-8-jp-er)))
63     (princ "\">
64 <input type=\"submit\" value=\"")
65     (princ (encode-coding-string "\e$B8!:w3+;O\e(B" 'utf-8-jp-er))
66     (princ "\">
67 </form>
68
69 ")
70     (cond
71      (components
72       ;; (map-char-attribute
73       ;;  (lambda (c v)
74       ;;    (when (every (lambda (p)
75       ;;                   (ideographic-structure-member p v))
76       ;;                 components)
77       ;;      (princ (encode-coding-string
78       ;;              (ids-find-format-line c v)
79       ;;              'utf-8-jp-er))
80       ;;      (princ "<br>\n")
81       ;;      )
82       ;;    nil)
83       ;;  'ideographic-structure)
84       (dolist (c (ideographic-products-find components))
85         (setq is (char-feature c 'ideographic-structure))
86         ;; to avoid problems caused by wrong indexes
87         (when (every (lambda (c)
88                        (ideographic-structure-member c is))
89                      components)
90           (setq str
91                 (encode-coding-string (format "%c" c) 'utf-8-jp-er))
92           (cond
93            ((string-match "&CB\\([0-9]+\\);" str)
94             (setq code (string-to-int (match-string 1 str)))
95             (princ (format "<img alt=\"CB%05d\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/cb-gaiji/%02d/CB%05d.gif\">\n"
96                            code (/ code 1000) code))
97             (princ (format "CB%05d" code))
98             )
99            (t
100             (princ str)))
101           (princ
102            (or (if (setq ucs (or (char-ucs c)
103                                  (encode-char c 'ucs)))
104                    (format " <a href=\"http://www.unicode.org/cgi-bin/GetUnihanData.pl?codepoint=%X\">%s</a>"
105                            ucs
106                            (cond ((<= ucs #xFFFF)
107                                   (format "U+%04X" ucs))
108                                  ((<= ucs #x10FFFF)
109                                   (format "U-%08X" ucs))))
110                  "          ")))
111           (princ " ")
112           (princ
113            (with-temp-buffer
114              (insert
115               (encode-coding-string
116                (ideographic-structure-to-ids is)
117                'utf-8-jp-er))
118              (goto-char (point-min))
119              (while (re-search-forward "&CB\\([0-9]+\\);" nil t)
120                (setq code (string-to-int (match-string 1)))
121                (replace-match
122                 (format "<img alt=\"CB%05d\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/cb-gaiji/%02d/CB%05d.gif\">"
123                         code (/ code 1000) code)
124                 t 'literal))
125              (buffer-string)))
126           (when (and ucs
127                      (with-current-buffer
128                          (find-file-noselect
129                           www-ids-find-tang-chars-file-name)
130                        (goto-char (point-min))
131                        (re-search-forward (format "^%d$" ucs) nil t)))
132             (princ
133              (format " <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar?query=%s\">"
134                      (mapconcat
135                       (lambda (c)
136                         (format "%%%02X" (char-int c)))
137                       (encode-coding-string (char-to-string c)
138                                             'utf-8-jp)
139                       "")))
140             (princ (encode-coding-string "\e$B"M\e(B[\e$BEbBeBsK\\e(B]</a>" 'utf-8-jp-er)))
141           (princ "<br>\n")
142           ))
143       )
144      (t
145       (princ (encode-coding-string "<hr>
146 <p>
147 \e$B;XDj$7$?ItIJ$rA4$F4^$`4A;z$N0lMw$rI=<($7$^$9!#\e(B
148 <p>
149 CHISE \e$B$GMQ$$$i$l$k<BBV;2>H7A<0!JNc!'\e(B&amp;M-00003;\e$B!K$GItIJ$r;XDj$9$k;v$b$G$-$^$9!#\e(B" 'utf-8-jp-er))
150       ))
151     (princ "<hr>")
152     (princ
153      (format
154       "Powered by <a
155 href=\"http://kanji.zinbun.kyoto-u.ac.jp/projects/chise/xemacs/\"
156 >XEmacs CHISE</a> %s."
157       xemacs-chise-version))
158     (princ "
159 </body>
160 </html>
161 ")))