(www-batch-ids-find): If only one character is specified as
[chise/ids.git] / www / www-ids-find.el
1 (require 'ids-find)
2
3 (defun decode-url-string (string &optional coding-system)
4   (if (> (length string) 0)
5       (let ((i 0)
6             dest)
7         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
8           (setq dest (concat dest
9                              (substring string i (match-beginning 0))
10                              (char-to-string
11                               (int-char
12                                (string-to-int (match-string 1 string) 16))))
13                 i (match-end 0)))
14         (decode-coding-string
15          (concat dest (substring string i))
16          coding-system))))
17
18 (defvar www-ids-find-tang-chars-file-name
19   "~tomo/projects/chise/ids/www/tang-chars.udd")
20
21 (defun www-ids-find-format-line (c is)
22   (let ((str (encode-coding-string (format "%c" c) 'utf-8-jp-er))
23         code ucs)
24     (cond
25      ((string-match "&CB\\([0-9]+\\);" str)
26       (setq code (string-to-int (match-string 1 str)))
27       (princ (format "<img alt=\"CB%05d\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/cb-gaiji/%02d/CB%05d.gif\">\n"
28                      code (/ code 1000) code))
29       (princ (format "CB%05d" code))
30       )
31      (t
32       (princ str)))
33     (princ
34      (or (if (setq ucs (or (char-ucs c)
35                            (encode-char c 'ucs)))
36              (format " <a href=\"http://www.unicode.org/cgi-bin/GetUnihanData.pl?codepoint=%X\">%s</a>"
37                      ucs
38                      (cond ((<= ucs #xFFFF)
39                             (format "U+%04X" ucs))
40                            ((<= ucs #x10FFFF)
41                             (format "U-%08X" ucs))))
42            "          ")))
43     (princ " ")
44     (when is
45       (princ
46        (with-temp-buffer
47          (insert
48           (encode-coding-string
49            (ideographic-structure-to-ids is)
50            'utf-8-jp-er))
51          (goto-char (point-min))
52          (while (re-search-forward "&CB\\([0-9]+\\);" nil t)
53            (setq code (string-to-int (match-string 1)))
54            (replace-match
55             (format "<img alt=\"CB%05d\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/cb-gaiji/%02d/CB%05d.gif\">"
56                     code (/ code 1000) code)
57             t 'literal))
58          (buffer-string))))
59     (when (and ucs
60                (with-current-buffer
61                    (find-file-noselect
62                     www-ids-find-tang-chars-file-name)
63                  (goto-char (point-min))
64                  (re-search-forward (format "^%d$" ucs) nil t)))
65       (princ
66        (format " <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar?query=%s\">"
67                (mapconcat
68                 (lambda (c)
69                   (format "%%%02X" (char-int c)))
70                 (encode-coding-string (char-to-string c)
71                                       'utf-8-jp)
72                 "")))
73       (princ (encode-coding-string "\e$B"M\e(B[\e$BEbBeBsK\\e(B]</a>" 'utf-8-jp-er)))
74     (princ "<br>\n")))
75
76 (defun www-batch-ids-find ()
77   (let ((components (car command-line-args-left))
78         (coded-charset-entity-reference-alist
79          (list*
80           '((=cbeta      "CB" 5 d)
81             (=jef-china3 "JC3-" 4 X))
82           coded-charset-entity-reference-alist))
83         is)
84     (setq command-line-args-left (cdr command-line-args-left))
85     (cond
86      ((stringp components)
87       (if (string-match "^components=" components)
88           (setq components (substring components (match-end 0))))
89       (setq components
90             (if (> (length components) 0)
91                 (decode-url-string components 'utf-8-jp-er)
92               nil))
93       )
94      (t
95       (setq components nil)
96       ))
97     (princ "Content-Type: text/html; charset=\"UTF-8\"
98
99 <!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"
100             \"http://www.w3.org/TR/html4/loose.dtd\">
101 <html lang=\"ja\">
102 <head>
103 <title>CHISE IDS Find</title>
104 </head>
105
106 <body>
107
108 <h1>")
109     (princ (encode-coding-string "CHISE IDS \e$B4A;z8!:w\e(B" 'utf-8-jp-er))
110     (princ "</h1>
111 <p>
112 <form action=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/ids-find\" method=\"GET\">
113 ")
114     (princ (encode-coding-string "\e$BItIJJ8;zNs\e(B" 'utf-8-jp-er))
115     (princ " <input type=\"text\" name=\"components\" size=\"30\" maxlength=\"30\" value=\"")
116     (if (> (length components) 0)
117         (princ (encode-coding-string components 'utf-8-jp-er)))
118     (princ "\">
119 <input type=\"submit\" value=\"")
120     (princ (encode-coding-string "\e$B8!:w3+;O\e(B" 'utf-8-jp-er))
121     (princ "\">
122 </form>
123
124 ")
125     (cond
126      (components
127       ;; (map-char-attribute
128       ;;  (lambda (c v)
129       ;;    (when (every (lambda (p)
130       ;;                   (ideographic-structure-member p v))
131       ;;                 components)
132       ;;      (princ (encode-coding-string
133       ;;              (ids-find-format-line c v)
134       ;;              'utf-8-jp-er))
135       ;;      (princ "<br>\n")
136       ;;      )
137       ;;    nil)
138       ;;  'ideographic-structure)
139       (when (= (length components) 1)
140         (www-ids-find-format-line (aref components 0)
141                                   (char-feature (aref components 0)
142                                                 'ideographic-structure)))
143       (dolist (c (ideographic-products-find components))
144         (setq is (char-feature c 'ideographic-structure))
145         ;; to avoid problems caused by wrong indexes
146         (when (every (lambda (c)
147                        (ideographic-structure-member c is))
148                      components)
149           (www-ids-find-format-line c is)))
150       )
151      (t
152       (princ (encode-coding-string "<hr>
153 <p>
154 \e$B;XDj$7$?ItIJ$rA4$F4^$`4A;z$N0lMw$rI=<($7$^$9!#\e(B
155 <p>
156 CHISE \e$B$GMQ$$$i$l$k<BBV;2>H7A<0!JNc!'\e(B&amp;M-00256;\e$B!K$GItIJ$r;XDj$9$k;v$b$G$-$^$9!#\e(B" 'utf-8-jp-er))
157       ))
158     (princ "<hr>")
159     (princ
160      (format
161       "Powered by <a
162 href=\"http://kanji.zinbun.kyoto-u.ac.jp/projects/chise/xemacs/\"
163 >XEmacs CHISE</a> %s."
164       xemacs-chise-version))
165     (princ "
166 </body>
167 </html>
168 ")))