b2182f2a611bd47263bee69e3eda9d8928d30784
[chise/ids.git] / www / www-ids-find.el
1 (require 'ids-find)
2 (require 'cwiki-common)
3
4 (defun www-format-encode-string (string &optional without-tags as-body)
5   (with-temp-buffer
6     (insert string)
7     (let (plane code subcode start end char variants ret rret)
8       (when as-body
9         (goto-char (point-min))
10         (while (search-forward "&" nil t)
11           (replace-match "&" nil t)))
12       (goto-char (point-min))
13       (while (search-forward "<" nil t)
14         (replace-match "&lt;" nil t))
15       (goto-char (point-min))
16       (while (search-forward ">" nil t)
17         (replace-match "&gt;" nil t))
18       (if without-tags
19           (encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
20         (let ((coded-charset-entity-reference-alist
21                est-coded-charset-entity-reference-alist))
22           (encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
23
24           (goto-char (point-min))
25           (while (re-search-forward "&\\(A-\\|G-\\|g2-\\|R-\\)?CB\\([0-9]+\\);" nil t)
26             (setq code (string-to-int (match-string 2)))
27             (replace-match
28              (format "<img alt=\"CB%05d\" src=\"%s/cb-gaiji/%02d/CB%05d.gif\"
29 style=\"%s\">"
30                      code
31                      chise-wiki-legacy-bitmap-glyphs-url
32                      (/ code 1000) code
33                      www-format-char-img-style)
34              t 'literal))
35
36           (goto-char (point-min))
37           (while (re-search-forward "&\\(o-\\|G-\\|g2-\\|R-\\)?J\\(78\\|83\\|90\\|97\\|SP\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
38             (setq plane (match-string 2)
39                   code (string-to-int (match-string 3) 16))
40             (replace-match
41              (format "<img alt=\"J%s-%04X\" src=\"%s/JIS-%s/%02d-%02d.gif\"
42 style=\"%s\">"
43                      plane code
44                      chise-wiki-legacy-bitmap-glyphs-url
45                      plane
46                      (- (lsh code -8) 32)
47                      (- (logand code 255) 32)
48                      www-format-char-img-style)
49              t 'literal))
50
51           (goto-char (point-min))
52           (while (re-search-forward "&\\(o-\\|G-\\|g2-\\|R-\\)?J0-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
53             (setq code (string-to-int (match-string 2) 16))
54             (replace-match
55              (format "<img alt=\"J0-%04X\" src=\"%s/JIS-90/%02d-%02d.gif\"
56 style=\"%s\">"
57                      code
58                      chise-wiki-legacy-bitmap-glyphs-url
59                      (- (lsh code -8) 32)
60                      (- (logand code 255) 32)
61                      www-format-char-img-style)
62              t 'literal))
63
64           (goto-char (point-min))
65           (while (re-search-forward "&\\(o-\\|G-\\|g2-\\|R-\\)?HD-\\(JA\\|JB\\|JC\\|JD\\|FT\\|IA\\|IB\\|HG\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
66             (setq plane (match-string 2)
67                   code (string-to-int (match-string 3) 16))
68             (replace-match
69              (format "<img alt=\"HD-%s-%04X\" src=\"%s/IVD/HanyoDenshi/%s%02d%02d.png\"
70 style=\"%s\">"
71                      plane code
72                      chise-wiki-legacy-bitmap-glyphs-url
73                      plane
74                      (- (lsh code -8) 32)
75                      (- (logand code 255) 32)
76                      www-format-char-img-style)
77              t 'literal))
78
79           (goto-char (point-min))
80           (while (re-search-forward "&\\(o-\\|G-\\|g2-\\|R-\\)?HD-\\(IP\\|JT\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
81             (setq plane (match-string 2)
82                   code (string-to-int (match-string 3) 16))
83             (replace-match
84              (format "<img alt=\"HD-%s-%04X\" src=\"%s/IVD/HanyoDenshi/%s%04X.png\"
85 style=\"%s\">"
86                      plane code
87                      chise-wiki-legacy-bitmap-glyphs-url
88                      plane code
89                      www-format-char-img-style)
90              t 'literal))
91
92           (goto-char (point-min))
93           (while (re-search-forward "&\\(o-\\|G-\\|g2-\\|R-\\)?HD-KS-\\([0-9]+\\);" nil t)
94             (setq code (string-to-int (match-string 2)))
95             (replace-match
96              (format "<img alt=\"HD-KS%06d\" src=\"%s/IVD/HanyoDenshi/KS%06d.png\"
97 style=\"vertical-align:middle\">"
98                      code
99                      chise-wiki-legacy-bitmap-glyphs-url
100                      code
101                      www-format-char-img-style)
102              t 'literal))
103
104           (goto-char (point-min))
105           (while (re-search-forward "&\\(o-\\|G-\\|g2-\\|R-\\)?HD-TK-\\([0-9]+\\);" nil t)
106             (setq code (string-to-int (match-string 2)))
107             (replace-match
108              (format "<img alt=\"HD-KS%06d\" src=\"%s/IVD/HanyoDenshi/TK%08d.png\"
109 style=\"vertical-align:middle\">"
110                      code
111                      chise-wiki-legacy-bitmap-glyphs-url
112                      code
113                      www-format-char-img-style)
114              t 'literal))
115
116           (goto-char (point-min))
117           (while (re-search-forward "&G\\([01]\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
118             (setq plane (string-to-int (match-string 1))
119                   code (string-to-int (match-string 2) 16))
120             (replace-match
121              (format "<img alt=\"GB%d-%04X\" src=\"%s/GB%d/%02d-%02d.gif\"
122 style=\"%s\">"
123                      plane code
124                      chise-wiki-legacy-bitmap-glyphs-url
125                      plane
126                      (- (lsh code -8) 32)
127                      (- (logand code 255) 32)
128                      www-format-char-img-style)
129              t 'literal))
130
131           (goto-char (point-min))
132           (while (re-search-forward "&\\(R-\\)?C\\([1-7]\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
133             (setq plane (string-to-int (match-string 2))
134                   code (string-to-int (match-string 3) 16))
135             (replace-match
136              (format "<img alt=\"CNS%d-%04X\" src=\"%s/CNS%d/%04X.gif\"
137 style=\"%s\">"
138                      plane code
139                      chise-wiki-legacy-bitmap-glyphs-url
140                      plane code
141                      www-format-char-img-style)
142              t 'literal))
143
144           (goto-char (point-min))
145           (while (re-search-forward "&\\(R-\\)?JC3-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
146             (setq code (string-to-int (match-string 2) 16))
147             (replace-match
148              (format "<img alt=\"JC3-%04X\" src=\"%s/JEF-CHINA3/%04X.png\">"
149                      code chise-wiki-bitmap-glyph-image-url code)
150              t 'literal))
151
152           (goto-char (point-min))
153           (while (re-search-forward "&\\(A-\\)?ZOB-\\([0-9]+\\);" nil t)
154             (setq code (string-to-int (match-string 2)))
155             (replace-match
156              (format "<img alt=\"ZOB-%04d\" src=\"%s/ZOB-1968/%04d.png\"
157 style=\"vertical-align:middle\">"
158                      code
159                      chise-wiki-legacy-bitmap-glyphs-url
160                      code
161                      www-format-char-img-style)
162              t 'literal))
163
164           (goto-char (point-min))
165           (while (re-search-forward "&\\(A2-\\|g2-\\|R-\\)?DJT-\\([0-9]+\\);" nil t)
166             (setq code (string-to-int (match-string 2)))
167             (replace-match
168              (format "<img alt=\"DJT-%05d\" src=\"%s/%05d.png\"
169 style=\"vertical-align:middle; width: auto; max-height: 60px\">"
170                      code
171                      chise-wiki-daijiten-bitmap-glyphs-url
172                      code
173                      www-format-char-img-style)
174              t 'literal))
175
176           (goto-char (point-min))
177           (while (re-search-forward "&SW-JIGUGE\\([45]?\\)-\\([0-9]+\\);" nil t)
178             (setq subcode (match-string 1)
179                   code (string-to-int (match-string 2)))
180             (setq plane
181                   (if (string= subcode "")
182                       "5"
183                     subcode))
184             (replace-match
185              (format "<img alt=\"SW-JIGUGE%s-%05d\" src=\"%s/ShuoWen/Jiguge%s/%05d.png\"
186 style=\"vertical-align:middle; width: auto; max-height: 80px\">"
187                      plane code
188                      chise-wiki-legacy-bitmap-glyphs-url
189                      plane code)
190              t 'literal))
191
192           (goto-char (point-min))
193           (while (re-search-forward "&HNG\\([0-9]+\\)-\\([0-9][0-9][0-9][0-9]\\)\\([0-9]\\);" nil t)
194             (setq plane (match-string 1)
195                   code (string-to-int (match-string 2))
196                   subcode (string-to-int (match-string 3)))
197             (setq subcode
198                   (if (eq subcode 0)
199                       ""
200                     (char-to-string (decode-char 'ascii (+ 96 subcode)))))
201             (replace-match
202              (format
203               "<img alt=\"HNG%s-%04d%s\" src=\"%s/%s/%04d%s.png\" style=\"
204 vertical-align:middle; width: auto; max-height: 60px\">"
205               plane code subcode
206               chise-wiki-hng-bitmap-glyphs-url
207               plane code subcode
208               )
209              t 'literal))
210
211           (goto-char (point-min))
212           (while (re-search-forward "&\\(R-\\)?CHISE-HDIC-TSJ\\([0-9A-F]+\\);" nil t)
213             (setq code (string-to-int (match-string 2) 16))
214             (setq char (decode-char '===chise-hdic-tsj code))
215             (when (setq ret (get-char-attribute char '=hdic-tsj-glyph-id))
216               (replace-match
217                (format
218                 "<img alt=\"HDIC-TSJ-%s\" src=\"https://viewer.hdic.jp/img/tsj/%s.jpg\" style=\"
219 vertical-align:middle; width: auto; max-height: 60px\">"
220                 ret ret)
221                t 'literal)))
222
223           (goto-char (point-min))
224           (while (re-search-forward "&\\(R-\\)?CHISE-HDIC-SYP\\([0-9A-F]+\\);" nil t)
225             (setq code (string-to-int (match-string 2) 16))
226             (setq char (decode-char '===chise-hdic-syp code))
227             (when (setq ret (get-char-attribute char '=hdic-syp-entry-id))
228               (replace-match
229                (format
230                 "<img alt=\"HDIC-SYP-%s\" src=\"https://viewer.hdic.jp/img/syp/%s\" style=\"
231 vertical-align:middle; width: auto; max-height: 60px\">"
232                 ret ret)
233                t 'literal)))
234
235           (goto-char (point-min))
236           (while (re-search-forward "&\\(R-\\)?CHISE-HDIC-KTB\\([0-9A-F]+\\);" nil t)
237             (setq code (string-to-int (match-string 2) 16))
238             (setq char (decode-char '===chise-hdic-ktb code))
239             (when (setq ret (get-char-attribute char '=hdic-ktb-entry-id))
240               (replace-match
241                (format
242                 "<img alt=\"HDIC-KTB-%s\" src=\"https://hdic.chise.org/img/ktb/%s.jpg\" style=\"
243 vertical-align:middle; width: auto; max-height: 60px\">"
244                 ret ret)
245                t 'literal)))
246
247           (goto-char (point-min))
248           (while (re-search-forward "&\\(o-\\|G-\\|g2-\\|R-\\)?AJ1-\\([0-9]+\\);" nil t)
249             (setq code (string-to-int (match-string 2)))
250             (replace-match
251              (format "<img alt=\"AJ1-%05d\" src=\"%s/IVD/AdobeJapan1/CID+%d.png\"
252 style=\"vertical-align:middle\">"
253                      code
254                      chise-wiki-legacy-bitmap-glyphs-url
255                      code
256                      www-format-char-img-style)
257              t 'literal))
258
259           (goto-char (point-min))
260           (while (re-search-forward "&\\(A-\\|o-\\|G-\\|g2-\\|R-\\)?MJ\\([0-9]+\\);" nil t)
261             (setq code (string-to-int (match-string 2)))
262             (replace-match
263              (format "<img alt=\"MJ%06d\" src=\"https://moji.or.jp/mojikibansearch/img/MJ/MJ%06d.png\"
264 style=\"vertical-align:middle; width: 48px; height: 48px\">"
265                      code
266                      code
267                      www-format-char-img-style)
268              t 'literal))
269
270           (goto-char (point-min))
271           (while (re-search-forward "&\\(o-\\|G-\\|g2-\\)?IU[+-]\\([0-9A-F]+\\);" nil t)
272             (setq code (string-to-int (match-string 2) 16))
273             (replace-match
274              (format "<img alt=\"u%04x\" src=\"%s/u%04x.svg\"
275 style=\"vertical-align:middle; width: 48px; height: 48px\">"
276                      code
277                      chise-wiki-glyphwiki-glyph-image-url
278                      code
279                      www-format-char-img-style)
280              t 'literal))
281
282           (goto-char (point-min))
283           (while (re-search-forward "&\\(o-\\|G-\\|g2-\\|R-\\)?KU[+-]\\([0-9A-F]+\\);" nil t)
284             (setq code (string-to-int (match-string 2) 16))
285             (replace-match
286              (format "<img alt=\"u%04x-k\" src=\"%s/u%04x-k.svg\"
287 style=\"vertical-align:middle; width: 48px; height: 48px\">"
288                      code
289                      chise-wiki-glyphwiki-glyph-image-url
290                      code
291                      www-format-char-img-style)
292              t 'literal))
293
294           (goto-char (point-min))
295           (while (re-search-forward "&A-\\(comp\\|cgn\\)U[+-]\\([0-9A-F]+\\);" nil t)
296             (setq code (string-to-int (match-string 2) 16))
297             (replace-match
298              (format "<img alt=\"u%04x\" src=\"%s/u%04x.svg\"
299 style=\"vertical-align:middle; width: 48px; height: 48px\">"
300                      code
301                      chise-wiki-glyphwiki-glyph-image-url
302                      code
303                      www-format-char-img-style)
304              t 'literal))
305
306           (goto-char (point-min))
307           (while (re-search-forward
308                   "&\\(A-\\|g2-\\)?U-i\\([0-9]+\\)\\+\\([0-9A-F]+\\);"
309                   nil t)
310             (setq plane (string-to-int (match-string 2))
311                   code (string-to-int (match-string 3) 16))
312             (replace-match
313              (format "<img alt=\"u%04x-itaiji-%03d\" src=\"%s/u%04x-itaiji-%03d.svg\"
314 style=\"vertical-align:middle; width: 48px; height: 48px\">"
315                      code
316                      plane
317                      chise-wiki-glyphwiki-glyph-image-url
318                      code
319                      plane
320                      www-format-char-img-style)
321              t 'literal))
322
323           (goto-char (point-min))
324           (while (re-search-forward "&A-IWDSU\\+\\([0-9A-F]+\\);" nil t)
325             (setq code (string-to-int (match-string 1) 16))
326             (replace-match
327              (format "<img alt=\"A-IWDSU+%04x\" src=\"%s/u%04x.svg\"
328 style=\"vertical-align:middle; width: 48px; height: 48px\">"
329                      code
330                      chise-wiki-glyphwiki-glyph-image-url
331                      code
332                      www-format-char-img-style)
333              t 'literal))
334
335           (goto-char (point-min))
336           (while (re-search-forward
337                   "&\\(A-\\)?CDP-i\\([0-9]+\\)-\\([0-9A-F]+\\);"
338                   nil t)
339             (setq plane (string-to-int (match-string 2))
340                   code (string-to-int (match-string 3) 16))
341             (replace-match
342              (format "<img alt=\"cdp-%04x-itaiji-%03d\" src=\"%s/cdp-%04x-itaiji-%03d.svg\"
343 style=\"vertical-align:middle; width: 48px; height: 48px\">"
344                      code
345                      plane
346                      chise-wiki-glyphwiki-glyph-image-url
347                      code
348                      plane
349                      www-format-char-img-style)
350              t 'literal))
351
352           (goto-char (point-min))
353           (while (re-search-forward
354                   "&\\(A-\\)?CDP-v\\([0-9]+\\)-\\([0-9A-F]+\\);"
355                   nil t)
356             (setq plane (string-to-int (match-string 2))
357                   code (string-to-int (match-string 3) 16))
358             (replace-match
359              (format "<img alt=\"cdp-%04x-var-%03d\" src=\"%s/cdp-%04x-var-%03d.svg\"
360 style=\"vertical-align:middle; width: 48px; height: 48px\">"
361                      code
362                      plane
363                      chise-wiki-glyphwiki-glyph-image-url
364                      code
365                      plane
366                      www-format-char-img-style)
367              t 'literal))
368
369           (goto-char (point-min))
370           (while (re-search-forward
371                   "&\\(A-\\|G-\\|g2-\\|R-\\)?M-\\([0-9]+\\);"
372                   nil t)
373             (setq code (string-to-int (match-string 2)))
374             (replace-match
375              (format "<img alt=\"dkw-%05d\" src=\"%s/dkw-%05d.svg\"
376 style=\"vertical-align:middle; width: 48px; height: 48px\">"
377                      code
378                      chise-wiki-glyphwiki-glyph-image-url
379                      code
380                      www-format-char-img-style)
381              t 'literal))
382
383           (goto-char (point-min))
384           (while (re-search-forward "&\\(g2-\\)?U-v\\([0-9]+\\)\\+\\([0-9A-F]+\\);" nil t)
385             (setq plane (string-to-int (match-string 2))
386                   code (string-to-int (match-string 3) 16))
387             (replace-match
388              (format "<img alt=\"u%04x-var-%03d\" src=\"%s/u%04x-var-%03d.svg\"
389 style=\"vertical-align:middle; width: 48px; height: 48px\">"
390                      code
391                      plane
392                      chise-wiki-glyphwiki-glyph-image-url
393                      code
394                      plane
395                      www-format-char-img-style)
396              t 'literal))
397
398           (goto-char (point-min))
399           (while (re-search-forward "&\\(A-\\|G-\\|R-\\|g2-\\)?GT-\\([0-9]+\\);" nil t)
400             (setq code (string-to-int (match-string 2)))
401             (replace-match
402              (format "<img alt=\"GT-%05d\" src=\"%s?char=GT-%05d\"
403 style=\"%s\">"
404                      code
405                      chise-wiki-glyph-cgi-url
406                      code
407                      www-format-char-img-style)
408              t 'literal))
409
410           (goto-char (point-min))
411           (while (re-search-forward "&\\(A-\\|G-\\|g2-\\)?GT-K\\([0-9]+\\);" nil t)
412             (setq code (string-to-int (match-string 2)))
413             (replace-match
414              (format "<img alt=\"GT-K%05d\" src=\"%s?char=GT-K%05d\"
415 style=\"%s\">"
416                      code
417                      chise-wiki-glyph-cgi-url
418                      code
419                      www-format-char-img-style)
420              t 'literal))
421
422           (goto-char (point-min))
423           (while (re-search-forward "&B-\\([0-9A-F]+\\);" nil t)
424             (setq code (string-to-int (match-string 1) 16))
425             (replace-match
426              (format "<img alt=\"B-%04X\" src=\"%s?char=B-%04X\"
427 style=\"%s\">"
428                      code
429                      chise-wiki-glyph-cgi-url
430                      code
431                      www-format-char-img-style)
432              t 'literal))
433
434           (goto-char (point-min))
435           (while (re-search-forward
436                   "&\\(A-\\|G-\\|g2-\\|R-\\)?CDP-\\([0-9A-F]+\\);" nil t)
437             (setq code (string-to-int (match-string 2) 16))
438             (replace-match
439              (format "<img alt=\"CDP-%04X\" src=\"%s?char=CDP-%04X\"
440 style=\"%s\">"
441                      code
442                      chise-wiki-glyph-cgi-url
443                      code
444                      www-format-char-img-style)
445              t 'literal))
446
447           (goto-char (point-min))
448           (while (re-search-forward
449                   "&\\(I-\\)?HZK\\(0[1-9]\\|1[0-2]\\)-\\([0-9A-F]+\\);" nil t)
450             (setq plane (match-string 2)
451                   code (string-to-int (match-string 3) 16))
452             (replace-match
453              (format "<img alt=\"HZK%s-%04X\" src=\"%s?char=HZK%s-%04X\"
454 style=\"%s\">"
455                      plane
456                      code
457                      chise-wiki-glyph-cgi-url
458                      plane
459                      code
460                      www-format-char-img-style)
461              t 'literal))
462
463           (goto-char (point-min))
464           (while (re-search-forward "&\\(A-\\|G-\\|g2-\\|R-\\)?RUI6-\\([0-9A-F]+\\);" nil t)
465             (setq code (string-to-int (match-string 2) 16))
466             (replace-match
467              (format "<img alt=\"RUI6-%04X\" src=\"%s?char=RUI6-%04X\"
468 style=\"vertical-align:middle\">"
469                      code
470                      chise-wiki-glyph-cgi-url
471                      code
472                      www-format-char-img-style)
473              t 'literal))
474
475           (goto-char (point-min))
476           (while (re-search-forward "&hanaJU\\+\\([0-9A-F]+\\);" nil t)
477             (setq code (string-to-int (match-string 1) 16))
478             (replace-match
479              (format "<img alt=\"hanaJU+%04X\" src=\"%s?char=hana-JU+%04X\"
480 style=\"vertical-align:middle\">"
481                      code
482                      chise-wiki-glyph-cgi-url
483                      code
484                      www-format-char-img-style)
485              t 'literal))
486
487           (goto-char (point-min))
488           (while (re-search-forward "&\\(A-\\|G-\\|g2-\\|R-\\)?\\(UU\\+\\|U-\\)\\([0-9A-F]+\\);" nil t)
489             (setq code (string-to-int (match-string 3) 16))
490             (replace-match
491              (format "<img alt=\"UU+%04X\" src=\"https://www.unicode.org/cgi-bin/refglyph?24-%04X\"
492 style=\"vertical-align:middle\">"
493                      code
494                      code
495                      www-format-char-img-style)
496              t 'literal))
497
498           (goto-char (point-min))
499           (while (re-search-forward "&MCS-\\([0-9A-F]+\\);" nil t)
500             (setq code (string-to-int (match-string 1) 16))
501             (setq start (match-beginning 0)
502                   end (match-end 0))
503             (setq char (decode-char 'system-char-id code))
504             (cond
505              ((and (setq variants
506                          (or (www-get-feature-value char '->subsumptive)
507                              (www-get-feature-value char '->denotational)))
508                    (progn
509                      (if (characterp variants)
510                          (setq variants (list variants)))
511                      (while (and variants
512                                  (setq ret (www-format-encode-string
513                                             (char-to-string (car variants))))
514                                  (string-match "&MCS-\\([0-9A-F]+\\);" ret))
515                        (setq variants (cdr variants)))
516                      ret))
517               (unless (string-match "&MCS-\\([0-9A-F]+\\);" ret)
518                 (goto-char start)
519                 (delete-region start end)
520                 (insert ret))
521               )
522              ((setq ret (or (www-get-feature-value char 'ideographic-combination)
523                             (www-get-feature-value char 'ideographic-structure)))
524               (setq ret
525                     (mapconcat
526                      (lambda (ch)
527                        (if (listp ch)
528                            (if (characterp (setq rret (find-char ch)))
529                                (setq ch rret)))
530                        (if (characterp ch)
531                            (www-format-encode-string
532                             (char-to-string ch) without-tags)
533                          (www-format-encode-string
534                           (format "%S" ch) without-tags)))
535                      ret ""))
536               (when ret
537                 (goto-char start)
538                 (delete-region start end)
539                 (insert ret))
540               )))
541           ))
542       ;; (goto-char (point-min))
543       ;; (while (search-forward "&GT-" nil t)
544       ;;   (replace-match "&amp;GT-" t 'literal))
545       (buffer-string))))
546
547 (setq www-format-char-img-style "vertical-align:middle;")
548
549 (defun decode-url-string (string &optional coding-system)
550   (if (> (length string) 0)
551       (let ((i 0)
552             dest)
553         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
554           (setq dest (concat dest
555                              (substring string i (match-beginning 0))
556                              (char-to-string
557                               (int-char
558                                (string-to-int (match-string 1 string) 16))))
559                 i (match-end 0)))
560         (decode-coding-string
561          (concat dest (substring string i))
562          coding-system))))
563
564 (defconst www-ids-find-version "0.100.2")
565
566 (defvar www-ids-find-ideographic-products-file-name
567   (expand-file-name "ideographic-products"
568                     (expand-file-name
569                      "feature"
570                      (expand-file-name
571                       "character"
572                       chise-system-db-directory))))
573
574 (defvar www-ids-find-char-viewer-url
575   "/est/view/character/")
576
577 (defvar www-ids-find-chise-link-map-url-prefix
578   "http://fonts.jp/chise_linkmap/map.cgi?code=")
579
580 (defvar www-ids-find-tang-chars-file-name
581   "~tomo/projects/chise/ids/www/tang-chars.udd")
582
583 (defun www-ids-find-format-char (c &optional code-desc)
584   (let* ((ucs (encode-char c '=ucs)))
585     (princ
586      (format "<a href=\"%s%s\">%s</a>"
587              www-ids-find-char-viewer-url
588              (www-uri-encode-object c)
589              (if ucs
590                  (format "<img alt=\"u%04x\" src=\"%s/u%04x.svg\" style=\"vertical-align:middle; width: 60px; height: 60px\"/>%s"
591                          ucs
592                          chise-wiki-glyphwiki-glyph-image-url
593                          ucs
594                          (if code-desc
595                              (encode-coding-string (format " (%c)" c) 'utf-8-mcs-er)
596                            ""))
597                (www-format-encode-string (char-to-string c)))))
598     ))
599
600 (defun www-ids-find-format-ids (ids &optional code-desc)
601   (let (len i ucs ret)
602     (setq i 0
603           len (length ids))
604     (while (< i len)
605       (www-ids-find-format-char (aref ids i))
606       (setq i (1+ i)))
607     (when code-desc
608       (princ
609        (format " (%s)"
610                (mapconcat
611                 (lambda (c)
612                   (setq ucs (or (char-ucs c)
613                                 (encode-char c '=>ucs@iso)
614                                 (encode-char c '=>ucs@unicode)
615                                 (encode-char c '=>ucs@iwds-1)
616                                 (encode-char c '=>ucs@iwds-1/normalized)
617                                 (encode-char c '=>ucs@component)
618                                 (encode-char c '=>ucs@cognate)))
619                   (cond (ucs
620                          (encode-coding-string
621                           (char-to-string (decode-char '=ucs ucs))
622                           'utf-8-mcs-er)
623                          )
624                         (t
625                          (setq ret (encode-coding-string 
626                                     (char-to-string c) 'utf-8-mcs-er))
627                          (if (eq (aref ret 0) ?&)
628                              (concat "&amp;" (substring ret 1)))
629                          )))
630                 ids ""))))))
631
632 (defun www-ids-find-format-line (c is)
633   (let (ucs ids)
634     (princ "<span class=\"entry\">")
635     (www-ids-find-format-char c 'code-desc)
636     (princ "</span>")
637     (princ
638      (or (if (setq ucs (or (char-ucs c)
639                            (encode-char c 'ucs)))
640              (format
641               " <a href=\"http://www.unicode.org/cgi-bin/GetUnihanData.pl?codepoint=%X\">%s</a>"
642               ucs
643               (cond ((<= ucs #xFFFF)
644                      (format "U+%04X" ucs))
645                     ((<= ucs #x10FFFF)
646                      (format "U-%08X" ucs))))
647            "          ")))
648     (when ucs
649       (princ
650        (format " <a href=\"%s%X\">(link map)</a>"
651                www-ids-find-chise-link-map-url-prefix ucs)))
652     (princ " ")
653     ;; (www-ids-find-format-ideographic-structure is 'code-desc)
654     (when is
655       (setq ids (ideographic-structure-to-ids is))
656       ;; (setq i 0
657       ;;       len (length ids))
658       (princ "<span class=\"ids\">")      
659       (www-ids-find-format-ids ids 'code-desc)
660       ;; (while (< i len)
661       ;;   (www-ids-find-format-char (aref ids i))
662       ;;   (setq i (1+ i)))
663       (princ "</span>"))
664     (when (and ucs
665                (with-current-buffer
666                    (find-file-noselect
667                     www-ids-find-tang-chars-file-name)
668                  (goto-char (point-min))
669                  (re-search-forward (format "^%d$" ucs) nil t)))
670       (princ
671        (format " <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar?query=%s\">"
672                (mapconcat
673                 (lambda (c)
674                   (format "%%%02X" (char-int c)))
675                 (encode-coding-string (char-to-string c)
676                                       'utf-8-jp)
677                 "")))
678       (princ (encode-coding-string "\e$B"M\e(B[\e$BEbBeBsK\\e(B]</a>" 'utf-8-jp-er)))
679     (princ "<br>\n")))
680
681 (defun www-ids-insert-chars-including-components* (components
682                                                    &optional ignored-chars products)
683   (unless products
684     (setq products (ideograph-find-products components ignored-chars)))
685   (let (is as bs len)
686     (setq len (length products))
687     (princ "<ul>\n")
688     (dolist (c (cond
689                 ((>= len 1024)
690                  (sort (copy-list products)
691                        (lambda (a b)
692                          (< (char-int a)(char-int b))))
693                  )
694                 ((>= len 512)
695                  (sort (copy-list products)
696                        (lambda (a b)
697                          (if (setq as (char-total-strokes a))
698                              (if (setq bs (char-total-strokes b))
699                                  (if (= as bs)
700                                      (< (char-int a)(char-int b))
701                                    (< as bs))
702                                t)
703                            (< (char-int a)(char-int b)))))
704                  )
705                 (t
706                  (sort (copy-list products)
707                        (lambda (a b)
708                          (if (setq as (char-total-strokes a))
709                              (if (setq bs (char-total-strokes b))
710                                  (if (= as bs)
711                                      (ideograph-char< a b)
712                                    (< as bs))
713                                t)
714                            (ideograph-char< a b))))
715                  )))
716       (unless (memq c ignored-chars)
717         (setq is (char-feature c 'ideographic-structure))
718         (princ "<li>")
719         (www-ids-find-format-line c is)
720         (setq ignored-chars
721               (www-ids-insert-chars-including-components*
722                (char-to-string c) (cons c ignored-chars)))
723         )
724       )
725     (princ "</ul>\n")
726     )
727   ignored-chars)
728
729 (defun www-ids-insert-chars-including-components (components
730                                                   &optional ignored-chars)
731   (let ((products (ideograph-find-products components ignored-chars))
732         is as bs len ignore-children)
733     (setq len (length products))
734     (when (>= len 1024)
735       (setq ignore-children t)
736       (princ
737        (encode-coding-string
738         "<p>\e$B7k2L$,B?$9$.$k$?$a!":F5"E*8!:w$r>JN,$7$^$7$?!#\e(B</p>"
739         'utf-8-jp-er)))
740     (if (>= len 2048)
741         (dolist (c products)
742           (www-ids-find-format-char c))
743       (setq ignored-chars
744             (nreverse
745              (www-ids-insert-chars-including-components* components ignored-chars products)))
746       (dolist (c ignored-chars)
747         (dolist (vc (char-component-variants c))
748           (unless (memq vc ignored-chars)
749             (when (setq is (get-char-attribute vc 'ideographic-structure))
750               (princ "<li>")
751               (www-ids-find-format-line vc is)
752               (setq ignored-chars
753                     (www-ids-insert-chars-including-components*
754                      (char-to-string vc)
755                      (cons vc ignored-chars)))))))
756       (setq products (ideograph-find-products-with-variants components ignored-chars))
757       (setq len (length products))
758       (when (>= len 512)
759         (setq ignore-children t)
760         (princ
761          (encode-coding-string
762           "<p>\e$B7k2L$,B?$9$.$k$?$a!"4XO";z$N:F5"E*8!:w$r>JN,$7$^$7$?!#\e(B</p>"
763           'utf-8-jp-er)))
764       (if (>= len 1024)
765           (dolist (c products)
766             (www-ids-find-format-char c))
767         (dolist (c (sort (copy-tree products)
768                          (lambda (a b)
769                            (if (setq as (char-total-strokes a))
770                                (if (setq bs (char-total-strokes b))
771                                    (if (= as bs)
772                                        (ideograph-char< a b)
773                                      (< as bs))
774                                  t)
775                              (ideograph-char< a b)))))
776           (unless (memq c ignored-chars)
777             (setq is (get-char-attribute c 'ideographic-structure))
778             (princ "<li>")
779             (www-ids-find-format-line c is)
780             (unless ignore-children
781               (setq ignored-chars
782                     (www-ids-insert-chars-including-components*
783                      (char-to-string c)
784                      (cons c ignored-chars))))
785             ))
786         ))
787     )
788   ignored-chars)
789
790 (defun www-batch-ids-find ()
791   (let ((components (car command-line-args-left))
792         (coded-charset-entity-reference-alist
793          (list*
794           '(=cns11643-1         "C1-" 4 X)
795           '(=cns11643-2         "C2-" 4 X)
796           '(=cns11643-3         "C3-" 4 X)
797           '(=cns11643-4         "C4-" 4 X)
798           '(=cns11643-5         "C5-" 4 X)
799           '(=cns11643-6         "C6-" 4 X)
800           '(=cns11643-7         "C7-" 4 X)
801           '(=gb2312             "G0-" 4 X)
802           '(=gb12345            "G1-" 4 X)
803           '(=jis-x0208@1990     "J90-" 4 X)
804           '(=jis-x0212          "JSP-" 4 X)
805           '(=cbeta              "CB" 5 d)
806           '(=jef-china3         "JC3-" 4 X)
807           '(=jis-x0208@1978     "J78-" 4 X)
808           '(=jis-x0208@1983     "J83-" 4 X)
809           '(=daikanwa           "M-" 5 d)
810           coded-charset-entity-reference-alist))
811         )
812     (setq command-line-args-left (cdr command-line-args-left))
813     (cond
814      ((stringp components)
815       (if (string-match "^components=" components)
816           (setq components (substring components (match-end 0))))
817       (setq components
818             (if (> (length components) 0)
819                 (decode-url-string components 'utf-8-er)
820               nil))
821       )
822      (t
823       (setq components nil)
824       ))
825     (princ "Content-Type: text/html; charset=UTF-8
826
827 <!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"
828             \"http://www.w3.org/TR/html4/loose.dtd\">
829 <html lang=\"ja\">
830 <head>
831 <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">
832 <title>CHISE IDS Find</title>
833 <link href=\"/css/bootstrap-4.5.0.min.css\" rel=\"stylesheet\">
834 <style type=\"text/css\">
835 <!--
836 .entry { font-size: 36px; }
837 .entry a img { height: 36px; }
838 .ids { font-size: 24px; }
839 .ids a img { height: 24px; }
840 img { vertical-align:middle; }
841 a { text-decoration:none; }
842 ul { margin: 0 0; }
843 li { margin: 0 0 -0.2em; }
844 .tooltip {
845     position: relative;
846     display: inline-block;
847 }
848 .tooltip .tooltiptext {
849     display: none;
850 }
851 -->
852 </style>
853 </head>
854
855 <body>
856
857 <div class=\"jumbotron jumbotron-fluid mb-0\">
858 <h1 class=\"display-4 text-center\">")
859     (princ (encode-coding-string "CHISE IDS \e$B4A;z8!:w\e(B" 'utf-8-jp-er))
860     (princ "</h1>")
861     (princ "
862 <p class=\"text-center\">Version ")
863     (princ www-ids-find-version)
864     (princ (format-time-string
865             " (Last-modified: %Y-%m-%d %H:%M:%S)</p>"
866             (nth 5
867                  (file-attributes
868                   www-ids-find-ideographic-products-file-name))))
869     (princ "
870 </div>
871 <div class=\"container mt-0 mw-100 d-inline-block align-top bg-dark\">
872 <p />
873 <div class=\"input-group mb-3 h3 my-4\">
874 <div class=\"input-group-prepend mw-75 ml-3\">
875 <form action=\"/ids-find\" method=\"GET\">
876 <span class=\"input-group-text\" id=\"basic-addon1\">
877 ")
878     (princ (encode-coding-string "\e$BItIJJ8;zNs\e(B" 'utf-8-jp-er))
879     (princ "</span>
880 </div>
881 <input type=\"text\" class=\"form-control\" aria-describedby=\"basic-addon1\" name=\"components\" size=\"30\" maxlength=\"256\" value=\"")
882     (if (> (length components) 0)
883         (princ (encode-coding-string components 'utf-8-er)))
884     (princ "\">
885 <input class=\"mr-3\" type=\"submit\" value=\"")
886     (princ (encode-coding-string "\e$B8!:w3+;O\e(B" 'utf-8-jp-er))
887     (princ "\">
888 </form>
889 </div>
890 </div>
891
892 ")
893     (unless (file-newer-than-file-p
894              www-ids-find-ideographic-products-file-name
895              (locate-file (car command-line-args) exec-path))
896       (princ (encode-coding-string "<hr>
897 <p>
898 \e$B8=:_!"%7%9%F%`$N99?7:n6HCf$G$9!#$7$P$i$/$*BT$A$/$@$5$$!#\e(B
899 <hr>
900 " 'utf-8-jp-er))
901       ;; (setq components nil)
902       )
903     (cond
904      (components
905       (princ "<div class=\"container\">
906 ")
907       ;; (map-char-attribute
908       ;;  (lambda (c v)
909       ;;    (when (every (lambda (p)
910       ;;                   (ideographic-structure-member p v))
911       ;;                 components)
912       ;;      (princ (encode-coding-string
913       ;;              (ids-find-format-line c v)
914       ;;              'utf-8-jp-er))
915       ;;      (princ "<br>\n")
916       ;;      )
917       ;;    nil)
918       ;;  'ideographic-structure)
919       (when (= (length components) 1)
920         (www-ids-find-format-line (aref components 0)
921                                   (char-feature (aref components 0)
922                                                 'ideographic-structure)))
923       ;; (dolist (c (ideographic-products-find components))
924       ;;   (setq is (char-feature c 'ideographic-structure))
925       ;;   ;; to avoid problems caused by wrong indexes
926       ;;   (when (every (lambda (c)
927       ;;                  (ideographic-structure-member c is))
928       ;;                components)
929       ;;     (www-ids-find-format-line c is)))
930       ;; (princ "<ul>\n")
931       (www-ids-insert-chars-including-components components)
932       ;; (princ "</ul>\n")
933       (princ "</div>\n")
934       )
935      (t
936       (princ (encode-coding-string "<div class=\"container mt-4\">
937 <div class=\"ml-3\">
938 <p>
939 \e$B;XDj$7$?ItIJ$rA4$F4^$`4A;z$N0lMw$rI=<($7$^$9!#\e(B
940 </p>
941 <p>
942 CHISE \e$B$GMQ$$$i$l$k<BBV;2>H7A<0!JNc!'\e(B&amp;M-00256;\e$B!K$GItIJ$r;XDj$9$k;v$b$G$-$^$9!#\e(B
943 </p>
944 </div>
945 " 'utf-8-jp-er))
946       (princ (encode-coding-string "
947 <p  class=\"ml-0\">
948 \[Links\]
949 <ul>
950 <li><a href=\"https://www.chise.org/ipns/ids-find.chise.org/index.ja.html\"
951 >IPFS \e$BHG\e(B CHISE IDS \e$B4A;z8!:w\e(B</a></li>
952 </ul>
953 <ul>
954 <li><a href=\"http://www.shuiren.org/chuden/toyoshi/syoseki/chise_ids.html\"
955 >\e$B!V\e(BCHISE IDS FIND\e$B$G4A;z$r8!:w!W\e(B</a> \e$B!=\e(B \e$B;3ED?r?N$5$s!J\e(B<a
956 href=\"http://www.shuiren.org/\">\e$B?g?MDb\e(B</a>\e$B!K$K$h$k2r@b\e(B
957 </ul>
958 <ul>
959 <li><a href=\"http://www.karitsu.org/tools/firefox_plugin.htm\"
960 >Firefox \e$BMQ\e(B plugin</a> by \e$B=);3M[0lO:$5$s!J\e(B<a href=\"http://www.karitsu.org/\"
961 >\e$B2aN)c7\e(B</a>\e$B!K\e(B
962 </ul>
963 <ul>
964 <li><a href=\"http://git.chise.org/gitweb/?p=chise/ids.git;a=blob;f=www/www-ids-find.el\"
965 >www-ids-find.el (source file (Emacs Lisp part))
966 <li><a href=\"http://www.chise.org/ids/\"
967 >\e$B!V\e(BCHISE \e$B4A;z9=B$>pJs%G!<%?%Y!<%9!W\e(B</a>
968 <li><a href=\"http://fonts.jp/chise_linkmap/\"
969 >\e$B!V\e(Bchise_linkmap : CHISE \e$B4A;zO"4D?^!W\e(B</a> by \e$B>eCO9(0l$5$s\e(B
970 <li><a href=\"http://www.chise.org/\"
971 >CHISE Project</a>
972 </ul>
973 <ul>
974 <li><a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar\"
975 >\e$B!VBsK\J8;z%G!<%?%Y!<%9!W\e(B</a> by
976 <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/\"
977 >\e$B5~ETBg3X\e(B21\e$B@$5*\e(BCOE\e$B!VEl%"%8%"@$3&$N?MJ8>pJs3X8&5f650i5rE@!W\e(B</a>
978 <li><a href=\"http://www.unicode.org/\"
979 >Unicode</a>
980 </ul>
981 </p>
982 </div>
983 "
984  'utf-8-jp-er))
985
986       ))
987     (princ "<hr>
988 <div class=\"container\">
989 ")
990     (princ "<div class=\"ml-0\">
991 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2015, 2016, 2017, 2020, 2021, 2022, 2023 <a href=\"http://kanji.zinbun.kyoto-u.ac.jp/~tomo/\"
992 >MORIOKA Tomohiko</a></div>")
993     (princ
994      (format
995       "<div>Powered by <a
996 href=\"http://www.chise.org/xemacs/\"
997 >XEmacs CHISE</a> %s.</div>"
998       (encode-coding-string xemacs-chise-version 'utf-8-jp-er)))
999     (princ "
1000 </div>
1001 </body>
1002 </html>
1003 ")))