(www-ids-find-format-line): Add links for
[chise/ids.git] / www / www-ids-find.el
1 (require 'ids-find)
2
3 (defun decode-url-string (string &optional coding-system)
4   (if (> (length string) 0)
5       (let ((i 0)
6             dest)
7         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
8           (setq dest (concat dest
9                              (substring string i (match-beginning 0))
10                              (char-to-string
11                               (int-char
12                                (string-to-int (match-string 1 string) 16))))
13                 i (match-end 0)))
14         (decode-coding-string
15          (concat dest (substring string i))
16          coding-system))))
17
18 (defvar www-ids-find-tang-chars-file-name
19   "~tomo/projects/chise/ids/www/tang-chars.udd")
20
21 (defun www-ids-find-format-line (c is)
22   (let ((str (encode-coding-string (format "%c" c) 'utf-8-er))
23         code ucs)
24     (princ
25      (with-temp-buffer
26        (cond
27         ((string-match "&CB\\([0-9]+\\);" str)
28          (setq code (string-to-int (match-string 1 str)))
29          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
30          (insert str)
31          (insert (format "\"><img alt=\"CB%05d\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/cb-gaiji/%02d/CB%05d.gif\">\n"
32                          code (/ code 1000) code))
33          (insert (format "CB%05d</a>" code))
34          )
35         ((string-match "&JC3-\\([0-9A-F]+\\);" str)
36          (setq code (string-to-int (match-string 1 str) 16))
37          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
38          (insert str)
39          (insert (format "\"><img alt=\"JC3-%04X\" src=\"http://kanji.zinbun.kyoto-u.ac.jp/db/CHINA3/Gaiji/%04x.gif\">\n"
40                          code code))
41          (insert (format "JC3-%04X</a>" code))
42          )
43         (t
44          (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
45          (insert str)
46          (insert "\">")
47          (insert str)
48          (insert "</a>")
49          ))
50        (goto-char (point-min))
51        (while (search-forward "&" nil t)
52          (replace-match "&amp;" t 'literal))
53        (buffer-string)
54        ))
55     (princ
56      (or (if (setq ucs (or (char-ucs c)
57                            (encode-char c 'ucs)))
58              (format " <a href=\"http://www.unicode.org/cgi-bin/GetUnihanData.pl?codepoint=%X\">%s</a>"
59                      ucs
60                      (cond ((<= ucs #xFFFF)
61                             (format "U+%04X" ucs))
62                            ((<= ucs #x10FFFF)
63                             (format "U-%08X" ucs))))
64            "          ")))
65     (princ " ")
66     (when is
67       (princ
68        (with-temp-buffer
69          (insert
70           (encode-coding-string
71            (ideographic-structure-to-ids is)
72            'utf-8-jp-er))
73          (goto-char (point-min))
74          (while (re-search-forward "&CB\\([0-9]+\\);" nil t)
75            (setq code (string-to-int (match-string 1)))
76            (replace-match
77             (format "<img alt=\"CB%05d\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/cb-gaiji/%02d/CB%05d.gif\">"
78                     code (/ code 1000) code)
79             t 'literal))
80          (buffer-string))))
81     (when (and ucs
82                (with-current-buffer
83                    (find-file-noselect
84                     www-ids-find-tang-chars-file-name)
85                  (goto-char (point-min))
86                  (re-search-forward (format "^%d$" ucs) nil t)))
87       (princ
88        (format " <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar?query=%s\">"
89                (mapconcat
90                 (lambda (c)
91                   (format "%%%02X" (char-int c)))
92                 (encode-coding-string (char-to-string c)
93                                       'utf-8-jp)
94                 "")))
95       (princ (encode-coding-string "\e$B"M\e(B[\e$BEbBeBsK\\e(B]</a>" 'utf-8-jp-er)))
96     (princ "<br>\n")))
97
98 (defun www-batch-ids-find ()
99   (let ((components (car command-line-args-left))
100         (coded-charset-entity-reference-alist
101          (list*
102           '(=cbeta      "CB" 5 d)
103           '(=jef-china3 "JC3-" 4 X)
104           coded-charset-entity-reference-alist))
105         is)
106     (setq command-line-args-left (cdr command-line-args-left))
107     (cond
108      ((stringp components)
109       (if (string-match "^components=" components)
110           (setq components (substring components (match-end 0))))
111       (setq components
112             (if (> (length components) 0)
113                 (decode-url-string components 'utf-8-jp-er)
114               nil))
115       )
116      (t
117       (setq components nil)
118       ))
119     (princ "Content-Type: text/html; charset=\"UTF-8\"
120
121 <!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"
122             \"http://www.w3.org/TR/html4/loose.dtd\">
123 <html lang=\"ja\">
124 <head>
125 <title>CHISE IDS Find</title>
126 </head>
127
128 <body>
129
130 <h1>")
131     (princ (encode-coding-string "CHISE IDS \e$B4A;z8!:w\e(B" 'utf-8-jp-er))
132     (princ "</h1>
133 <p>
134 <form action=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/ids-find\" method=\"GET\">
135 ")
136     (princ (encode-coding-string "\e$BItIJJ8;zNs\e(B" 'utf-8-jp-er))
137     (princ " <input type=\"text\" name=\"components\" size=\"30\" maxlength=\"30\" value=\"")
138     (if (> (length components) 0)
139         (princ (encode-coding-string components 'utf-8-jp-er)))
140     (princ "\">
141 <input type=\"submit\" value=\"")
142     (princ (encode-coding-string "\e$B8!:w3+;O\e(B" 'utf-8-jp-er))
143     (princ "\">
144 </form>
145
146 ")
147     (cond
148      (components
149       ;; (map-char-attribute
150       ;;  (lambda (c v)
151       ;;    (when (every (lambda (p)
152       ;;                   (ideographic-structure-member p v))
153       ;;                 components)
154       ;;      (princ (encode-coding-string
155       ;;              (ids-find-format-line c v)
156       ;;              'utf-8-jp-er))
157       ;;      (princ "<br>\n")
158       ;;      )
159       ;;    nil)
160       ;;  'ideographic-structure)
161       (when (= (length components) 1)
162         (www-ids-find-format-line (aref components 0)
163                                   (char-feature (aref components 0)
164                                                 'ideographic-structure)))
165       (dolist (c (ideographic-products-find components))
166         (setq is (char-feature c 'ideographic-structure))
167         ;; to avoid problems caused by wrong indexes
168         (when (every (lambda (c)
169                        (ideographic-structure-member c is))
170                      components)
171           (www-ids-find-format-line c is)))
172       )
173      (t
174       (princ (encode-coding-string "<hr>
175 <p>
176 \e$B;XDj$7$?ItIJ$rA4$F4^$`4A;z$N0lMw$rI=<($7$^$9!#\e(B
177 <p>
178 CHISE \e$B$GMQ$$$i$l$k<BBV;2>H7A<0!JNc!'\e(B&amp;M-00256;\e$B!K$GItIJ$r;XDj$9$k;v$b$G$-$^$9!#\e(B" 'utf-8-jp-er))
179       ))
180     (princ "<hr>")
181     (princ
182      (format
183       "Powered by <a
184 href=\"http://kanji.zinbun.kyoto-u.ac.jp/projects/chise/xemacs/\"
185 >XEmacs CHISE</a> %s."
186       xemacs-chise-version))
187     (princ "
188 </body>
189 </html>
190 ")))