(www-ids-insert-chars-including-components): New function.
[chise/ids.git] / www / www-ids-find.el
1 (require 'ids-find)
2
3 (defun decode-url-string (string &optional coding-system)
4   (if (> (length string) 0)
5       (let ((i 0)
6             dest)
7         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
8           (setq dest (concat dest
9                              (substring string i (match-beginning 0))
10                              (char-to-string
11                               (int-char
12                                (string-to-int (match-string 1 string) 16))))
13                 i (match-end 0)))
14         (decode-coding-string
15          (concat dest (substring string i))
16          coding-system))))
17
18 (defvar www-ids-find-tang-chars-file-name
19   "~tomo/projects/chise/ids/www/tang-chars.udd")
20
21 (defun www-ids-find-format-line (c is)
22   (let ((str (encode-coding-string (format "%c" c) 'utf-8-er))
23         plane code ucs)
24     (princ
25      (with-temp-buffer
26        (cond
27         ((string-match "&CB\\([0-9]+\\);" str)
28          (setq code (string-to-int (match-string 1 str)))
29          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
30          (insert str)
31          (insert (format "\"><img alt=\"CB%05d\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/cb-gaiji/%02d/CB%05d.gif\">\n"
32                          code (/ code 1000) code))
33          (insert (format "CB%05d</a>" code))
34          )
35         ((string-match "&JC3-\\([0-9A-F]+\\);" str)
36          (setq code (string-to-int (match-string 1 str) 16))
37          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
38          (insert str)
39          (insert (format "\"><img alt=\"JC3-%04X\" src=\"http://kanji.zinbun.kyoto-u.ac.jp/db/CHINA3/Gaiji/%04x.gif\">\n"
40                          code code))
41          (insert (format "JC3-%04X</a>" code))
42          )
43         ((string-match "&J\\(78\\|83\\|90\\|SP\\)-\\([0-9A-F]+\\);" str)
44          (setq plane (match-string 1 str)
45                code (string-to-int (match-string 2 str) 16))
46          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
47          (insert str)
48          (insert (format "\"><img alt=\"J%s-%04X\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/JIS-%s/%02d-%02d.gif\">\n"
49                          plane code plane
50                          (- (lsh code -8) 32)
51                          (- (logand code 255) 32)))
52          (insert (format "J%s-%04X</a>" plane code))
53          )
54         ((string-match "&G\\([01]\\)-\\([0-9A-F]+\\);" str)
55          (setq plane (string-to-int (match-string 1 str))
56                code (string-to-int (match-string 2 str) 16))
57          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
58          (insert str)
59          (insert (format "\"><img alt=\"G%d-%04X\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/GB%d/%02d-%02d.gif\">\n"
60                          plane code plane
61                          (- (lsh code -8) 32)
62                          (- (logand code 255) 32)))
63          (insert (format "G%d-%04X</a>" plane code))
64          )
65         ((string-match "&C\\([1-7]\\)-\\([0-9A-F]+\\);" str)
66          (setq plane (string-to-int (match-string 1 str))
67                code (string-to-int (match-string 2 str) 16))
68          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
69          (insert str)
70          (insert (format "\"><img alt=\"C%d-%04X\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/CNS%d/%04X.gif\">\n"
71                          plane code plane code))
72          (insert (format "C%d-%04X</a>" plane code))
73          )
74         ((string-match "&ZOB-\\([0-9]+\\);" str)
75          (setq code (string-to-int (match-string 1 str)))
76          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
77          (insert str)
78          (insert (format "\"><img alt=\"ZOB-%04d\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/ZOB-1968/%04d.png\">\n"
79                          code code))
80          (insert (format "ZOB-%04d</a>" code))
81          )
82         (t
83          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
84          (insert str)
85          (insert "\">")
86          (insert str)
87          (insert "</a>")
88          ))
89        (goto-char (point-min))
90        (while (search-forward "&" nil t)
91          (replace-match "&amp;" t 'literal))
92        (buffer-string)
93        ))
94     (princ
95      (or (if (setq ucs (or (char-ucs c)
96                            (encode-char c 'ucs)))
97              (format " <a href=\"http://www.unicode.org/cgi-bin/GetUnihanData.pl?codepoint=%X\">%s</a>"
98                      ucs
99                      (cond ((<= ucs #xFFFF)
100                             (format "U+%04X" ucs))
101                            ((<= ucs #x10FFFF)
102                             (format "U-%08X" ucs))))
103            "          ")))
104     (princ " ")
105     (when is
106       (princ
107        (with-temp-buffer
108          (insert
109           (encode-coding-string
110            (ideographic-structure-to-ids is)
111            'utf-8-jp-er))
112          (goto-char (point-min))
113          (while (re-search-forward "&CB\\([0-9]+\\);" nil t)
114            (setq code (string-to-int (match-string 1)))
115            (replace-match
116             (format "<img alt=\"CB%05d\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/cb-gaiji/%02d/CB%05d.gif\">"
117                     code (/ code 1000) code)
118             t 'literal))
119          (buffer-string))))
120     (when (and ucs
121                (with-current-buffer
122                    (find-file-noselect
123                     www-ids-find-tang-chars-file-name)
124                  (goto-char (point-min))
125                  (re-search-forward (format "^%d$" ucs) nil t)))
126       (princ
127        (format " <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar?query=%s\">"
128                (mapconcat
129                 (lambda (c)
130                   (format "%%%02X" (char-int c)))
131                 (encode-coding-string (char-to-string c)
132                                       'utf-8-jp)
133                 "")))
134       (princ (encode-coding-string "\e$B"M\e(B[\e$BEbBeBsK\\e(B]</a>" 'utf-8-jp-er)))
135     (princ "<br>\n")))
136
137 (defun www-ids-insert-chars-including-components (components)
138   (let (is)
139     (dolist (c (ideographic-products-find components))
140       (setq is (char-feature c 'ideographic-structure))
141       ;; to avoid problems caused by wrong indexes
142       (when (every (lambda (cc)
143                      (ideographic-structure-member cc is))
144                    components)
145         (princ "<li>")
146         (www-ids-find-format-line c is)
147         (princ "<ul>\n")
148         (www-ids-insert-chars-including-components (char-to-string c))
149         (princ "</ul>\n")
150         )
151       )))
152
153 (defun www-batch-ids-find ()
154   (let ((components (car command-line-args-left))
155         (coded-charset-entity-reference-alist
156          (list*
157           '(=cns11643-1         "C1-" 4 X)
158           '(=cns11643-2         "C2-" 4 X)
159           '(=cns11643-3         "C3-" 4 X)
160           '(=cns11643-4         "C4-" 4 X)
161           '(=cns11643-5         "C5-" 4 X)
162           '(=cns11643-6         "C6-" 4 X)
163           '(=cns11643-7         "C7-" 4 X)
164           '(=gb2312             "G0-" 4 X)
165           '(=gb12345            "G1-" 4 X)
166           '(=jis-x0208@1990     "J90-" 4 X)
167           '(=jis-x0212          "JSP-" 4 X)
168           '(=cbeta              "CB" 5 d)
169           '(=jef-china3         "JC3-" 4 X)
170           '(=jis-x0208@1978     "J78-" 4 X)
171           '(=jis-x0208@1983     "J83-" 4 X)
172           '(=daikanwa           "M-" 5 d)
173           coded-charset-entity-reference-alist))
174         is)
175     (setq command-line-args-left (cdr command-line-args-left))
176     (cond
177      ((stringp components)
178       (if (string-match "^components=" components)
179           (setq components (substring components (match-end 0))))
180       (setq components
181             (if (> (length components) 0)
182                 (decode-url-string components 'utf-8-jp-er)
183               nil))
184       )
185      (t
186       (setq components nil)
187       ))
188     (princ "Content-Type: text/html; charset=\"UTF-8\"
189
190 <!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"
191             \"http://www.w3.org/TR/html4/loose.dtd\">
192 <html lang=\"ja\">
193 <head>
194 <title>CHISE IDS Find</title>
195 </head>
196
197 <body>
198
199 <h1>")
200     (princ (encode-coding-string "CHISE IDS \e$B4A;z8!:w\e(B" 'utf-8-jp-er))
201     (princ "</h1>
202 <p>
203 <form action=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/ids-find\" method=\"GET\">
204 ")
205     (princ (encode-coding-string "\e$BItIJJ8;zNs\e(B" 'utf-8-jp-er))
206     (princ " <input type=\"text\" name=\"components\" size=\"30\" maxlength=\"30\" value=\"")
207     (if (> (length components) 0)
208         (princ (encode-coding-string components 'utf-8-jp-er)))
209     (princ "\">
210 <input type=\"submit\" value=\"")
211     (princ (encode-coding-string "\e$B8!:w3+;O\e(B" 'utf-8-jp-er))
212     (princ "\">
213 </form>
214
215 ")
216     (cond
217      (components
218       ;; (map-char-attribute
219       ;;  (lambda (c v)
220       ;;    (when (every (lambda (p)
221       ;;                   (ideographic-structure-member p v))
222       ;;                 components)
223       ;;      (princ (encode-coding-string
224       ;;              (ids-find-format-line c v)
225       ;;              'utf-8-jp-er))
226       ;;      (princ "<br>\n")
227       ;;      )
228       ;;    nil)
229       ;;  'ideographic-structure)
230       (when (= (length components) 1)
231         (www-ids-find-format-line (aref components 0)
232                                   (char-feature (aref components 0)
233                                                 'ideographic-structure)))
234       ;; (dolist (c (ideographic-products-find components))
235       ;;   (setq is (char-feature c 'ideographic-structure))
236       ;;   ;; to avoid problems caused by wrong indexes
237       ;;   (when (every (lambda (c)
238       ;;                  (ideographic-structure-member c is))
239       ;;                components)
240       ;;     (www-ids-find-format-line c is)))
241       (princ "<ul>\n")
242       (www-ids-insert-chars-including-components components)
243       (princ "</ul>\n")
244       )
245      (t
246       (princ (encode-coding-string "<hr>
247 <p>
248 \e$B;XDj$7$?ItIJ$rA4$F4^$`4A;z$N0lMw$rI=<($7$^$9!#\e(B
249 <p>
250 CHISE \e$B$GMQ$$$i$l$k<BBV;2>H7A<0!JNc!'\e(B&amp;M-00256;\e$B!K$GItIJ$r;XDj$9$k;v$b$G$-$^$9!#\e(B" 'utf-8-jp-er))
251       ))
252     (princ "<hr>")
253     (princ
254      (format
255       "Powered by <a
256 href=\"http://kanji.zinbun.kyoto-u.ac.jp/projects/chise/xemacs/\"
257 >XEmacs CHISE</a> %s."
258       xemacs-chise-version))
259     (princ "
260 </body>
261 </html>
262 ")))