e397773e490f0a337f10f5219733b8e373459879
[chise/ids.git] / www / www-ids-find.el
1 (require 'ids-find)
2 (require 'cwiki-common)
3
4 (defun www-format-encode-string (string &optional without-tags as-body)
5   (with-temp-buffer
6     (insert string)
7     (let (plane code subcode start end char variants ret rret)
8       (when as-body
9         (goto-char (point-min))
10         (while (search-forward "&" nil t)
11           (replace-match "&" nil t)))
12       (goto-char (point-min))
13       (while (search-forward "<" nil t)
14         (replace-match "&lt;" nil t))
15       (goto-char (point-min))
16       (while (search-forward ">" nil t)
17         (replace-match "&gt;" nil t))
18       (if without-tags
19           (encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
20         (let ((coded-charset-entity-reference-alist
21                est-coded-charset-entity-reference-alist))
22           (encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
23
24           (goto-char (point-min))
25           (while (re-search-forward "&\\(A-\\|G-\\|g2-\\|R-\\)?CB\\([0-9]+\\);" nil t)
26             (setq code (string-to-int (match-string 2)))
27             (replace-match
28              (format "<img alt=\"CB%05d\" src=\"%s/cb-gaiji/%02d/CB%05d.gif\"
29 style=\"%s\">"
30                      code
31                      chise-wiki-legacy-bitmap-glyphs-url
32                      (/ code 1000) code
33                      www-format-char-img-style)
34              t 'literal))
35
36           (goto-char (point-min))
37           (while (re-search-forward "&\\(o-\\|G-\\|g2-\\|R-\\)?J\\(78\\|83\\|90\\|97\\|SP\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
38             (setq plane (match-string 2)
39                   code (string-to-int (match-string 3) 16))
40             (replace-match
41              (format "<img alt=\"J%s-%04X\" src=\"%s/JIS-%s/%02d-%02d.gif\"
42 style=\"%s\">"
43                      plane code
44                      chise-wiki-legacy-bitmap-glyphs-url
45                      plane
46                      (- (lsh code -8) 32)
47                      (- (logand code 255) 32)
48                      www-format-char-img-style)
49              t 'literal))
50
51           (goto-char (point-min))
52           (while (re-search-forward "&\\(o-\\|G-\\|g2-\\|R-\\)?J0-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
53             (setq code (string-to-int (match-string 2) 16))
54             (replace-match
55              (format "<img alt=\"J0-%04X\" src=\"%s/JIS-90/%02d-%02d.gif\"
56 style=\"%s\">"
57                      code
58                      chise-wiki-legacy-bitmap-glyphs-url
59                      (- (lsh code -8) 32)
60                      (- (logand code 255) 32)
61                      www-format-char-img-style)
62              t 'literal))
63
64           (goto-char (point-min))
65           (while (re-search-forward "&\\(o-\\|G-\\|g2-\\|R-\\)?HD-\\(JA\\|JB\\|JC\\|JD\\|FT\\|IA\\|IB\\|HG\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
66             (setq plane (match-string 2)
67                   code (string-to-int (match-string 3) 16))
68             (replace-match
69              (format "<img alt=\"HD-%s-%04X\" src=\"%s/IVD/HanyoDenshi/%s%02d%02d.png\"
70 style=\"%s\">"
71                      plane code
72                      chise-wiki-legacy-bitmap-glyphs-url
73                      plane
74                      (- (lsh code -8) 32)
75                      (- (logand code 255) 32)
76                      www-format-char-img-style)
77              t 'literal))
78
79           (goto-char (point-min))
80           (while (re-search-forward "&\\(o-\\|G-\\|g2-\\|R-\\)?HD-\\(IP\\|JT\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
81             (setq plane (match-string 2)
82                   code (string-to-int (match-string 3) 16))
83             (replace-match
84              (format "<img alt=\"HD-%s-%04X\" src=\"%s/IVD/HanyoDenshi/%s%04X.png\"
85 style=\"%s\">"
86                      plane code
87                      chise-wiki-legacy-bitmap-glyphs-url
88                      plane code
89                      www-format-char-img-style)
90              t 'literal))
91
92           (goto-char (point-min))
93           (while (re-search-forward "&\\(o-\\|G-\\|g2-\\|R-\\)?HD-KS-\\([0-9]+\\);" nil t)
94             (setq code (string-to-int (match-string 2)))
95             (replace-match
96              (format "<img alt=\"HD-KS%06d\" src=\"%s/IVD/HanyoDenshi/KS%06d.png\"
97 style=\"vertical-align:middle\">"
98                      code
99                      chise-wiki-legacy-bitmap-glyphs-url
100                      code
101                      www-format-char-img-style)
102              t 'literal))
103
104           (goto-char (point-min))
105           (while (re-search-forward "&\\(o-\\|G-\\|g2-\\|R-\\)?HD-TK-\\([0-9]+\\);" nil t)
106             (setq code (string-to-int (match-string 2)))
107             (replace-match
108              (format "<img alt=\"HD-KS%06d\" src=\"%s/IVD/HanyoDenshi/TK%08d.png\"
109 style=\"vertical-align:middle\">"
110                      code
111                      chise-wiki-legacy-bitmap-glyphs-url
112                      code
113                      www-format-char-img-style)
114              t 'literal))
115
116           (goto-char (point-min))
117           (while (re-search-forward "&G\\([01]\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
118             (setq plane (string-to-int (match-string 1))
119                   code (string-to-int (match-string 2) 16))
120             (replace-match
121              (format "<img alt=\"GB%d-%04X\" src=\"%s/GB%d/%02d-%02d.gif\"
122 style=\"%s\">"
123                      plane code
124                      chise-wiki-legacy-bitmap-glyphs-url
125                      plane
126                      (- (lsh code -8) 32)
127                      (- (logand code 255) 32)
128                      www-format-char-img-style)
129              t 'literal))
130
131           (goto-char (point-min))
132           (while (re-search-forward "&\\(R-\\)?C\\([1-7]\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
133             (setq plane (string-to-int (match-string 2))
134                   code (string-to-int (match-string 3) 16))
135             (replace-match
136              (format "<img alt=\"CNS%d-%04X\" src=\"%s/CNS%d/%04X.gif\"
137 style=\"%s\">"
138                      plane code
139                      chise-wiki-legacy-bitmap-glyphs-url
140                      plane code
141                      www-format-char-img-style)
142              t 'literal))
143
144           (goto-char (point-min))
145           (while (re-search-forward "&\\(R-\\)?JC3-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
146             (setq code (string-to-int (match-string 2) 16))
147             (replace-match
148              (format "<img alt=\"JC3-%04X\" src=\"%s/JEF-CHINA3/%04X.png\">"
149                      code chise-wiki-bitmap-glyph-image-url code)
150              t 'literal))
151
152           (goto-char (point-min))
153           (while (re-search-forward "&\\(A-\\)?ZOB-\\([0-9]+\\);" nil t)
154             (setq code (string-to-int (match-string 2)))
155             (replace-match
156              (format "<img alt=\"ZOB-%04d\" src=\"%s/ZOB-1968/%04d.png\"
157 style=\"vertical-align:middle\">"
158                      code
159                      chise-wiki-legacy-bitmap-glyphs-url
160                      code
161                      www-format-char-img-style)
162              t 'literal))
163
164           (goto-char (point-min))
165           (while (re-search-forward "&\\(A2-\\|g2-\\|R-\\)?DJT-\\([0-9]+\\);" nil t)
166             (setq code (string-to-int (match-string 2)))
167             (replace-match
168              (format "<img alt=\"DJT-%05d\" src=\"%s/%05d.png\"
169 style=\"vertical-align:middle; width: auto; max-height: 60px\">"
170                      code
171                      chise-wiki-daijiten-bitmap-glyphs-url
172                      code
173                      www-format-char-img-style)
174              t 'literal))
175
176           (goto-char (point-min))
177           (while (re-search-forward "&SW-JIGUGE\\([45]?\\)-\\([0-9]+\\);" nil t)
178             (setq subcode (match-string 1)
179                   code (string-to-int (match-string 2)))
180             (setq plane
181                   (if (string= subcode "")
182                       "5"
183                     subcode))
184             (replace-match
185              (format "<img alt=\"SW-JIGUGE%s-%05d\" src=\"%s/ShuoWen/Jiguge%s/%05d.png\"
186 style=\"vertical-align:middle; width: auto; max-height: 80px\">"
187                      plane code
188                      chise-wiki-legacy-bitmap-glyphs-url
189                      plane code)
190              t 'literal))
191
192           (goto-char (point-min))
193           (while (re-search-forward "&HNG\\([0-9]+\\)-\\([0-9][0-9][0-9][0-9]\\)\\([0-9]\\);" nil t)
194             (setq plane (match-string 1)
195                   code (string-to-int (match-string 2))
196                   subcode (string-to-int (match-string 3)))
197             (setq subcode
198                   (if (eq subcode 0)
199                       ""
200                     (char-to-string (decode-char 'ascii (+ 96 subcode)))))
201             (replace-match
202              (format
203               "<img alt=\"HNG%s-%04d%s\" src=\"%s/%s/%04d%s.png\" style=\"
204 vertical-align:middle; width: auto; max-height: 60px\">"
205               plane code subcode
206               chise-wiki-hng-bitmap-glyphs-url
207               plane code subcode
208               )
209              t 'literal))
210
211           (goto-char (point-min))
212           (while (re-search-forward "&\\(R-\\)?CHISE-HDIC-TSJ\\([0-9A-F]+\\);" nil t)
213             (setq code (string-to-int (match-string 2) 16))
214             (setq char (decode-char '===chise-hdic-tsj code))
215             (when (setq ret (get-char-attribute char '=hdic-tsj-glyph-id))
216               (replace-match
217                (format
218                 "<img alt=\"HDIC-TSJ-%s\" src=\"https://viewer.hdic.jp/img/tsj/%s.jpg\" style=\"
219 vertical-align:middle; width: auto; max-height: 60px\">"
220                 ret ret)
221                t 'literal)))
222
223           (goto-char (point-min))
224           (while (re-search-forward "&\\(R-\\)?CHISE-HDIC-SYP\\([0-9A-F]+\\);" nil t)
225             (setq code (string-to-int (match-string 2) 16))
226             (setq char (decode-char '===chise-hdic-syp code))
227             (when (setq ret (get-char-attribute char '=hdic-syp-entry-id))
228               (replace-match
229                (format
230                 "<img alt=\"HDIC-SYP-%s\" src=\"https://viewer.hdic.jp/img/syp/%s\" style=\"
231 vertical-align:middle; width: auto; max-height: 60px\">"
232                 ret ret)
233                t 'literal)))
234
235           (goto-char (point-min))
236           (while (re-search-forward "&\\(R-\\)?CHISE-HDIC-KTB\\([0-9A-F]+\\);" nil t)
237             (setq code (string-to-int (match-string 2) 16))
238             (setq char (decode-char '===chise-hdic-ktb code))
239             (when (setq ret (get-char-attribute char '=hdic-ktb-entry-id))
240               (replace-match
241                (format
242                 "<img alt=\"HDIC-KTB-%s\" src=\"https://hdic.chise.org/img/ktb/%s.jpg\" style=\"
243 vertical-align:middle; width: auto; max-height: 60px\">"
244                 ret ret)
245                t 'literal)))
246
247           (goto-char (point-min))
248           (while (re-search-forward "&\\(o-\\|G-\\|g2-\\|R-\\)?AJ1-\\([0-9]+\\);" nil t)
249             (setq code (string-to-int (match-string 2)))
250             (replace-match
251              (format "<img alt=\"AJ1-%05d\" src=\"%s/IVD/AdobeJapan1/CID+%d.png\"
252 style=\"vertical-align:middle\">"
253                      code
254                      chise-wiki-legacy-bitmap-glyphs-url
255                      code
256                      www-format-char-img-style)
257              t 'literal))
258
259           (goto-char (point-min))
260           (while (re-search-forward "&\\(A-\\|o-\\|G-\\|g2-\\|R-\\)?MJ\\([0-9]+\\);" nil t)
261             (setq code (string-to-int (match-string 2)))
262             (replace-match
263              (format "<img alt=\"MJ%06d\" src=\"https://moji.or.jp/mojikibansearch/img/MJ/MJ%06d.png\"
264 style=\"vertical-align:middle; width: 48px; height: 48px\">"
265                      code
266                      code
267                      www-format-char-img-style)
268              t 'literal))
269
270           (goto-char (point-min))
271           (while (re-search-forward "&\\(o-\\|G-\\|g2-\\)?IU[+-]\\([0-9A-F]+\\);" nil t)
272             (setq code (string-to-int (match-string 2) 16))
273             (replace-match
274              (format "<img alt=\"u%04x\" src=\"%s/u%04x.svg\"
275 style=\"vertical-align:middle; width: 48px; height: 48px\">"
276                      code
277                      chise-wiki-glyphwiki-glyph-image-url
278                      code
279                      www-format-char-img-style)
280              t 'literal))
281
282           (goto-char (point-min))
283           (while (re-search-forward "&\\(o-\\|G-\\|g2-\\|R-\\)?KU[+-]\\([0-9A-F]+\\);" nil t)
284             (setq code (string-to-int (match-string 2) 16))
285             (replace-match
286              (format "<img alt=\"u%04x-k\" src=\"%s/u%04x-k.svg\"
287 style=\"vertical-align:middle; width: 48px; height: 48px\">"
288                      code
289                      chise-wiki-glyphwiki-glyph-image-url
290                      code
291                      www-format-char-img-style)
292              t 'literal))
293
294           (goto-char (point-min))
295           (while (re-search-forward "&A-\\(comp\\|cgn\\)U[+-]\\([0-9A-F]+\\);" nil t)
296             (setq code (string-to-int (match-string 2) 16))
297             (replace-match
298              (format "<img alt=\"u%04x\" src=\"%s/u%04x.svg\"
299 style=\"vertical-align:middle; width: 48px; height: 48px\">"
300                      code
301                      chise-wiki-glyphwiki-glyph-image-url
302                      code
303                      www-format-char-img-style)
304              t 'literal))
305
306           (goto-char (point-min))
307           (while (re-search-forward
308                   "&\\(A-\\|g2-\\)?U-i\\([0-9]+\\)\\+\\([0-9A-F]+\\);"
309                   nil t)
310             (setq plane (string-to-int (match-string 2))
311                   code (string-to-int (match-string 3) 16))
312             (replace-match
313              (format "<img alt=\"u%04x-itaiji-%03d\" src=\"%s/u%04x-itaiji-%03d.svg\"
314 style=\"vertical-align:middle; width: 48px; height: 48px\">"
315                      code
316                      plane
317                      chise-wiki-glyphwiki-glyph-image-url
318                      code
319                      plane
320                      www-format-char-img-style)
321              t 'literal))
322
323           (goto-char (point-min))
324           (while (re-search-forward "&A-IWDSU\\+\\([0-9A-F]+\\);" nil t)
325             (setq code (string-to-int (match-string 1) 16))
326             (replace-match
327              (format "<img alt=\"A-IWDSU+%04x\" src=\"%s/u%04x.svg\"
328 style=\"vertical-align:middle; width: 48px; height: 48px\">"
329                      code
330                      chise-wiki-glyphwiki-glyph-image-url
331                      code
332                      www-format-char-img-style)
333              t 'literal))
334
335           (goto-char (point-min))
336           (while (re-search-forward
337                   "&\\(A-\\)?CDP-i\\([0-9]+\\)-\\([0-9A-F]+\\);"
338                   nil t)
339             (setq plane (string-to-int (match-string 2))
340                   code (string-to-int (match-string 3) 16))
341             (replace-match
342              (format "<img alt=\"cdp-%04x-itaiji-%03d\" src=\"%s/cdp-%04x-itaiji-%03d.svg\"
343 style=\"vertical-align:middle; width: 48px; height: 48px\">"
344                      code
345                      plane
346                      chise-wiki-glyphwiki-glyph-image-url
347                      code
348                      plane
349                      www-format-char-img-style)
350              t 'literal))
351
352           (goto-char (point-min))
353           (while (re-search-forward
354                   "&\\(A-\\)?CDP-v\\([0-9]+\\)-\\([0-9A-F]+\\);"
355                   nil t)
356             (setq plane (string-to-int (match-string 2))
357                   code (string-to-int (match-string 3) 16))
358             (replace-match
359              (format "<img alt=\"cdp-%04x-var-%03d\" src=\"%s/cdp-%04x-var-%03d.svg\"
360 style=\"vertical-align:middle; width: 48px; height: 48px\">"
361                      code
362                      plane
363                      chise-wiki-glyphwiki-glyph-image-url
364                      code
365                      plane
366                      www-format-char-img-style)
367              t 'literal))
368
369           (goto-char (point-min))
370           (while (re-search-forward
371                   "&\\(A-\\|G-\\|g2-\\|R-\\)?M-\\([0-9]+\\);"
372                   nil t)
373             (setq code (string-to-int (match-string 2)))
374             (replace-match
375              (format "<img alt=\"dkw-%05d\" src=\"%s/dkw-%05d.svg\"
376 style=\"vertical-align:middle; width: 48px; height: 48px\">"
377                      code
378                      chise-wiki-glyphwiki-glyph-image-url
379                      code
380                      www-format-char-img-style)
381              t 'literal))
382
383           (goto-char (point-min))
384           (while (re-search-forward "&\\(g2-\\)?U-v\\([0-9]+\\)\\+\\([0-9A-F]+\\);" nil t)
385             (setq plane (string-to-int (match-string 2))
386                   code (string-to-int (match-string 3) 16))
387             (replace-match
388              (format "<img alt=\"u%04x-var-%03d\" src=\"%s/u%04x-var-%03d.svg\"
389 style=\"vertical-align:middle; width: 48px; height: 48px\">"
390                      code
391                      plane
392                      chise-wiki-glyphwiki-glyph-image-url
393                      code
394                      plane
395                      www-format-char-img-style)
396              t 'literal))
397
398           (goto-char (point-min))
399           (while (re-search-forward "&\\(A-\\|G-\\|R-\\|g2-\\)?GT-\\([0-9]+\\);" nil t)
400             (setq code (string-to-int (match-string 2)))
401             (replace-match
402              (format "<img alt=\"GT-%05d\" src=\"%s?char=GT-%05d\"
403 style=\"%s\">"
404                      code
405                      chise-wiki-glyph-cgi-url
406                      code
407                      www-format-char-img-style)
408              t 'literal))
409
410           (goto-char (point-min))
411           (while (re-search-forward "&\\(A-\\|G-\\|g2-\\)?GT-K\\([0-9]+\\);" nil t)
412             (setq code (string-to-int (match-string 2)))
413             (replace-match
414              (format "<img alt=\"GT-K%05d\" src=\"%s?char=GT-K%05d\"
415 style=\"%s\">"
416                      code
417                      chise-wiki-glyph-cgi-url
418                      code
419                      www-format-char-img-style)
420              t 'literal))
421
422           (goto-char (point-min))
423           (while (re-search-forward "&B-\\([0-9A-F]+\\);" nil t)
424             (setq code (string-to-int (match-string 1) 16))
425             (replace-match
426              (format "<img alt=\"B-%04X\" src=\"%s?char=B-%04X\"
427 style=\"%s\">"
428                      code
429                      chise-wiki-glyph-cgi-url
430                      code
431                      www-format-char-img-style)
432              t 'literal))
433
434           (goto-char (point-min))
435           (while (re-search-forward
436                   "&\\(A-\\|G-\\|g2-\\|R-\\)?CDP-\\([0-9A-F]+\\);" nil t)
437             (setq code (string-to-int (match-string 2) 16))
438             (replace-match
439              (format "<img alt=\"CDP-%04X\" src=\"%s?char=CDP-%04X\"
440 style=\"%s\">"
441                      code
442                      chise-wiki-glyph-cgi-url
443                      code
444                      www-format-char-img-style)
445              t 'literal))
446
447           (goto-char (point-min))
448           (while (re-search-forward
449                   "&\\(I-\\)?HZK\\(0[1-9]\\|1[0-2]\\)-\\([0-9A-F]+\\);" nil t)
450             (setq plane (match-string 2)
451                   code (string-to-int (match-string 3) 16))
452             (replace-match
453              (format "<img alt=\"HZK%s-%04X\" src=\"%s?char=HZK%s-%04X\"
454 style=\"%s\">"
455                      plane
456                      code
457                      chise-wiki-glyph-cgi-url
458                      plane
459                      code
460                      www-format-char-img-style)
461              t 'literal))
462
463           (goto-char (point-min))
464           (while (re-search-forward "&\\(A-\\|G-\\|g2-\\|R-\\)?RUI6-\\([0-9A-F]+\\);" nil t)
465             (setq code (string-to-int (match-string 2) 16))
466             (replace-match
467              (format "<img alt=\"RUI6-%04X\" src=\"%s?char=RUI6-%04X\"
468 style=\"vertical-align:middle\">"
469                      code
470                      chise-wiki-glyph-cgi-url
471                      code
472                      www-format-char-img-style)
473              t 'literal))
474
475           (goto-char (point-min))
476           (while (re-search-forward "&hanaJU\\+\\([0-9A-F]+\\);" nil t)
477             (setq code (string-to-int (match-string 1) 16))
478             (replace-match
479              (format "<img alt=\"hanaJU+%04X\" src=\"%s?char=hana-JU+%04X\"
480 style=\"vertical-align:middle\">"
481                      code
482                      chise-wiki-glyph-cgi-url
483                      code
484                      www-format-char-img-style)
485              t 'literal))
486
487           (goto-char (point-min))
488           (while (re-search-forward "&\\(A-\\|G-\\|g2-\\|R-\\)?\\(UU\\+\\|U-\\)\\([0-9A-F]+\\);" nil t)
489             (setq code (string-to-int (match-string 3) 16))
490             (replace-match
491              (format "<img alt=\"UU+%04X\" src=\"https://www.unicode.org/cgi-bin/refglyph?24-%04X\"
492 style=\"vertical-align:middle\">"
493                      code
494                      code
495                      www-format-char-img-style)
496              t 'literal))
497
498           (goto-char (point-min))
499           (while (re-search-forward "&MCS-\\([0-9A-F]+\\);" nil t)
500             (setq code (string-to-int (match-string 1) 16))
501             (setq start (match-beginning 0)
502                   end (match-end 0))
503             (setq char (decode-char 'system-char-id code))
504             (cond
505              ((and (setq variants
506                          (or (www-get-feature-value char '->subsumptive)
507                              (www-get-feature-value char '->denotational)))
508                    (progn
509                      (if (characterp variants)
510                          (setq variants (list variants)))
511                      (while (and variants
512                                  (setq ret (www-format-encode-string
513                                             (char-to-string (car variants))))
514                                  (string-match "&MCS-\\([0-9A-F]+\\);" ret))
515                        (setq variants (cdr variants)))
516                      ret))
517               (unless (string-match "&MCS-\\([0-9A-F]+\\);" ret)
518                 (goto-char start)
519                 (delete-region start end)
520                 (insert ret))
521               )
522              ((setq ret (or (www-get-feature-value char 'ideographic-combination)
523                             (www-get-feature-value char 'ideographic-structure)))
524               (setq ret
525                     (mapconcat
526                      (lambda (ch)
527                        (if (listp ch)
528                            (if (characterp (setq rret (find-char ch)))
529                                (setq ch rret)))
530                        (if (characterp ch)
531                            (www-format-encode-string
532                             (char-to-string ch) without-tags)
533                          (www-format-encode-string
534                           (format "%S" ch) without-tags)))
535                      ret ""))
536               (when ret
537                 (goto-char start)
538                 (delete-region start end)
539                 (insert ret))
540               )))
541           ))
542       ;; (goto-char (point-min))
543       ;; (while (search-forward "&GT-" nil t)
544       ;;   (replace-match "&amp;GT-" t 'literal))
545       (buffer-string))))
546
547 (setq www-format-char-img-style "vertical-align:middle;")
548
549 (defun decode-url-string (string &optional coding-system)
550   (if (> (length string) 0)
551       (let ((i 0)
552             dest)
553         (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
554           (setq dest (concat dest
555                              (substring string i (match-beginning 0))
556                              (char-to-string
557                               (int-char
558                                (string-to-int (match-string 1 string) 16))))
559                 i (match-end 0)))
560         (decode-coding-string
561          (concat dest (substring string i))
562          coding-system))))
563
564 (defconst www-ids-find-version "0.100")
565
566 (defvar www-ids-find-ideographic-products-file-name
567   (expand-file-name "ideographic-products"
568                     (expand-file-name
569                      "feature"
570                      (expand-file-name
571                       "character"
572                       chise-system-db-directory))))
573
574 (defvar www-ids-find-char-viewer-url
575   "/est/view/character/")
576
577 (defvar www-ids-find-chise-link-map-url-prefix
578   "http://fonts.jp/chise_linkmap/map.cgi?code=")
579
580 (defvar www-ids-find-tang-chars-file-name
581   "~tomo/projects/chise/ids/www/tang-chars.udd")
582
583 (defun www-ids-find-format-char (c &optional code-desc)
584   (let* ((ucs (encode-char c '=ucs)))
585     (princ
586      (format "<a href=\"%s%s\">%s</a>"
587              www-ids-find-char-viewer-url
588              (www-uri-encode-object c)
589              (if ucs
590                  (format "<img alt=\"u%04x\" src=\"%s/u%04x.svg\" style=\"vertical-align:middle; width: 60px; height: 60px\"/>"
591                          ucs
592                          chise-wiki-glyphwiki-glyph-image-url
593                          ucs)
594                (www-format-encode-string (char-to-string c)))))
595     ))
596
597 (defun www-ids-find-format-line (c is)
598   (let (ucs len i ids)
599     (princ "<span class=\"entry\">")
600     (www-ids-find-format-char c 'code-desc)
601     (princ "</span>")
602     (princ
603      (or (if (setq ucs (or (char-ucs c)
604                            (encode-char c 'ucs)))
605              (format
606               " <a href=\"http://www.unicode.org/cgi-bin/GetUnihanData.pl?codepoint=%X\">%s</a>"
607               ucs
608               (cond ((<= ucs #xFFFF)
609                      (format "U+%04X" ucs))
610                     ((<= ucs #x10FFFF)
611                      (format "U-%08X" ucs))))
612            "          ")))
613     (when ucs
614       (princ
615        (format " <a href=\"%s%X\">(link map)</a>"
616                www-ids-find-chise-link-map-url-prefix ucs)))
617     (princ " ")
618     (when is
619       (setq ids (ideographic-structure-to-ids is))
620       (setq i 0
621             len (length ids))
622       (princ "<span class=\"ids\">")      
623       (while (< i len)
624         (www-ids-find-format-char (aref ids i))
625         (setq i (1+ i)))
626       (princ "</span>"))
627     (when (and ucs
628                (with-current-buffer
629                    (find-file-noselect
630                     www-ids-find-tang-chars-file-name)
631                  (goto-char (point-min))
632                  (re-search-forward (format "^%d$" ucs) nil t)))
633       (princ
634        (format " <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar?query=%s\">"
635                (mapconcat
636                 (lambda (c)
637                   (format "%%%02X" (char-int c)))
638                 (encode-coding-string (char-to-string c)
639                                       'utf-8-jp)
640                 "")))
641       (princ (encode-coding-string "\e$B"M\e(B[\e$BEbBeBsK\\e(B]</a>" 'utf-8-jp-er)))
642     (princ "<br>\n")))
643
644 (defun www-ids-insert-chars-including-components* (components
645                                                    &optional ignored-chars products)
646   (unless products
647     (setq products (ideograph-find-products components ignored-chars)))
648   (let (is as bs len)
649     (setq len (length products))
650     (princ "<ul>\n")
651     (dolist (c (cond
652                 ((>= len 1024)
653                  (sort (copy-list products)
654                        (lambda (a b)
655                          (< (char-int a)(char-int b))))
656                  )
657                 ((>= len 512)
658                  (sort (copy-list products)
659                        (lambda (a b)
660                          (if (setq as (char-total-strokes a))
661                              (if (setq bs (char-total-strokes b))
662                                  (if (= as bs)
663                                      (< (char-int a)(char-int b))
664                                    (< as bs))
665                                t)
666                            (< (char-int a)(char-int b)))))
667                  )
668                 (t
669                  (sort (copy-list products)
670                        (lambda (a b)
671                          (if (setq as (char-total-strokes a))
672                              (if (setq bs (char-total-strokes b))
673                                  (if (= as bs)
674                                      (ideograph-char< a b)
675                                    (< as bs))
676                                t)
677                            (ideograph-char< a b))))
678                  )))
679       (unless (memq c ignored-chars)
680         (setq is (char-feature c 'ideographic-structure))
681         (princ "<li>")
682         (www-ids-find-format-line c is)
683         (setq ignored-chars
684               (www-ids-insert-chars-including-components*
685                (char-to-string c) (cons c ignored-chars)))
686         )
687       )
688     (princ "</ul>\n")
689     )
690   ignored-chars)
691
692 (defun www-ids-insert-chars-including-components (components
693                                                   &optional ignored-chars)
694   (let ((products (ideograph-find-products components ignored-chars))
695         is as bs len ignore-children)
696     (setq len (length products))
697     (when (>= len 1024)
698       (setq ignore-children t)
699       (princ
700        (encode-coding-string
701         "<p>\e$B7k2L$,B?$9$.$k$?$a!":F5"E*8!:w$r>JN,$7$^$7$?!#\e(B</p>"
702         'utf-8-jp-er)))
703     (if (>= len 2048)
704         (dolist (c products)
705           (www-ids-find-format-char c))
706       (setq ignored-chars
707             (nreverse
708              (www-ids-insert-chars-including-components* components ignored-chars products)))
709       (dolist (c ignored-chars)
710         (dolist (vc (char-component-variants c))
711           (unless (memq vc ignored-chars)
712             (when (setq is (get-char-attribute vc 'ideographic-structure))
713               (princ "<li>")
714               (www-ids-find-format-line vc is)
715               (setq ignored-chars
716                     (www-ids-insert-chars-including-components*
717                      (char-to-string vc)
718                      (cons vc ignored-chars)))))))
719       (setq products (ideograph-find-products-with-variants components ignored-chars))
720       (setq len (length products))
721       (when (>= len 512)
722         (setq ignore-children t)
723         (princ
724          (encode-coding-string
725           "<p>\e$B7k2L$,B?$9$.$k$?$a!"4XO";z$N:F5"E*8!:w$r>JN,$7$^$7$?!#\e(B</p>"
726           'utf-8-jp-er)))
727       (if (>= len 1024)
728           (dolist (c products)
729             (www-ids-find-format-char c))
730         (dolist (c (sort (copy-tree products)
731                          (lambda (a b)
732                            (if (setq as (char-total-strokes a))
733                                (if (setq bs (char-total-strokes b))
734                                    (if (= as bs)
735                                        (ideograph-char< a b)
736                                      (< as bs))
737                                  t)
738                              (ideograph-char< a b)))))
739           (unless (memq c ignored-chars)
740             (setq is (get-char-attribute c 'ideographic-structure))
741             (princ "<li>")
742             (www-ids-find-format-line c is)
743             (unless ignore-children
744               (setq ignored-chars
745                     (www-ids-insert-chars-including-components*
746                      (char-to-string c)
747                      (cons c ignored-chars))))
748             ))
749         ))
750     )
751   ignored-chars)
752
753 (defun www-batch-ids-find ()
754   (let ((components (car command-line-args-left))
755         (coded-charset-entity-reference-alist
756          (list*
757           '(=cns11643-1         "C1-" 4 X)
758           '(=cns11643-2         "C2-" 4 X)
759           '(=cns11643-3         "C3-" 4 X)
760           '(=cns11643-4         "C4-" 4 X)
761           '(=cns11643-5         "C5-" 4 X)
762           '(=cns11643-6         "C6-" 4 X)
763           '(=cns11643-7         "C7-" 4 X)
764           '(=gb2312             "G0-" 4 X)
765           '(=gb12345            "G1-" 4 X)
766           '(=jis-x0208@1990     "J90-" 4 X)
767           '(=jis-x0212          "JSP-" 4 X)
768           '(=cbeta              "CB" 5 d)
769           '(=jef-china3         "JC3-" 4 X)
770           '(=jis-x0208@1978     "J78-" 4 X)
771           '(=jis-x0208@1983     "J83-" 4 X)
772           '(=daikanwa           "M-" 5 d)
773           coded-charset-entity-reference-alist))
774         )
775     (setq command-line-args-left (cdr command-line-args-left))
776     (cond
777      ((stringp components)
778       (if (string-match "^components=" components)
779           (setq components (substring components (match-end 0))))
780       (setq components
781             (if (> (length components) 0)
782                 (decode-url-string components 'utf-8-er)
783               nil))
784       )
785      (t
786       (setq components nil)
787       ))
788     (princ "Content-Type: text/html; charset=UTF-8
789
790 <!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"
791             \"http://www.w3.org/TR/html4/loose.dtd\">
792 <html lang=\"ja\">
793 <head>
794 <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">
795 <title>CHISE IDS Find</title>
796 <link href=\"/css/bootstrap-4.5.0.min.css\" rel=\"stylesheet\">
797 <style type=\"text/css\">
798 <!--
799 .entry { font-size: 36px; }
800 .entry a img { height: 36px; }
801 .ids { font-size: 24px; }
802 .ids a img { height: 24px; }
803 img { vertical-align:middle; }
804 a { text-decoration:none; }
805 ul { margin: 0 0; }
806 li { margin: 0 0 -0.2em; }
807 .tooltip {
808     position: relative;
809     display: inline-block;
810 }
811 .tooltip .tooltiptext {
812     display: none;
813 }
814 -->
815 </style>
816 </head>
817
818 <body>
819
820 <div class=\"jumbotron jumbotron-fluid mb-0\">
821 <h1 class=\"display-4 text-center\">")
822     (princ (encode-coding-string "CHISE IDS \e$B4A;z8!:w\e(B" 'utf-8-jp-er))
823     (princ "</h1>")
824     (princ "
825 <p class=\"text-center\">Version ")
826     (princ www-ids-find-version)
827     (princ (format-time-string
828             " (Last-modified: %Y-%m-%d %H:%M:%S)</p>"
829             (nth 5
830                  (file-attributes
831                   www-ids-find-ideographic-products-file-name))))
832     (princ "
833 </div>
834 <div class=\"container mt-0 mw-100 d-inline-block align-top bg-dark\">
835 <p />
836 <div class=\"input-group mb-3 h3 my-4\">
837 <div class=\"input-group-prepend mw-75 ml-3\">
838 <form action=\"/ids-find\" method=\"GET\">
839 <span class=\"input-group-text\" id=\"basic-addon1\">
840 ")
841     (princ (encode-coding-string "\e$BItIJJ8;zNs\e(B" 'utf-8-jp-er))
842     (princ "</span>
843 </div>
844 <input type=\"text\" class=\"form-control\" aria-describedby=\"basic-addon1\" name=\"components\" size=\"30\" maxlength=\"256\" value=\"")
845     (if (> (length components) 0)
846         (princ (encode-coding-string components 'utf-8-er)))
847     (princ "\">
848 <input class=\"mr-3\" type=\"submit\" value=\"")
849     (princ (encode-coding-string "\e$B8!:w3+;O\e(B" 'utf-8-jp-er))
850     (princ "\">
851 </form>
852 </div>
853 </div>
854
855 ")
856     (unless (file-newer-than-file-p
857              www-ids-find-ideographic-products-file-name
858              (locate-file (car command-line-args) exec-path))
859       (princ (encode-coding-string "<hr>
860 <p>
861 \e$B8=:_!"%7%9%F%`$N99?7:n6HCf$G$9!#$7$P$i$/$*BT$A$/$@$5$$!#\e(B
862 <hr>
863 " 'utf-8-jp-er))
864       ;; (setq components nil)
865       )
866     (cond
867      (components
868       (princ "<div class=\"container\">
869 ")
870       ;; (map-char-attribute
871       ;;  (lambda (c v)
872       ;;    (when (every (lambda (p)
873       ;;                   (ideographic-structure-member p v))
874       ;;                 components)
875       ;;      (princ (encode-coding-string
876       ;;              (ids-find-format-line c v)
877       ;;              'utf-8-jp-er))
878       ;;      (princ "<br>\n")
879       ;;      )
880       ;;    nil)
881       ;;  'ideographic-structure)
882       (when (= (length components) 1)
883         (www-ids-find-format-line (aref components 0)
884                                   (char-feature (aref components 0)
885                                                 'ideographic-structure)))
886       ;; (dolist (c (ideographic-products-find components))
887       ;;   (setq is (char-feature c 'ideographic-structure))
888       ;;   ;; to avoid problems caused by wrong indexes
889       ;;   (when (every (lambda (c)
890       ;;                  (ideographic-structure-member c is))
891       ;;                components)
892       ;;     (www-ids-find-format-line c is)))
893       ;; (princ "<ul>\n")
894       (www-ids-insert-chars-including-components components)
895       ;; (princ "</ul>\n")
896       (princ "</div>\n")
897       )
898      (t
899       (princ (encode-coding-string "<div class=\"container mt-4\">
900 <div class=\"ml-3\">
901 <p>
902 \e$B;XDj$7$?ItIJ$rA4$F4^$`4A;z$N0lMw$rI=<($7$^$9!#\e(B
903 </p>
904 <p>
905 CHISE \e$B$GMQ$$$i$l$k<BBV;2>H7A<0!JNc!'\e(B&amp;M-00256;\e$B!K$GItIJ$r;XDj$9$k;v$b$G$-$^$9!#\e(B
906 </p>
907 </div>
908 " 'utf-8-jp-er))
909       (princ (encode-coding-string "
910 <p  class=\"ml-0\">
911 \[Links\]
912 <ul>
913 <li><a href=\"http://www.shuiren.org/chuden/toyoshi/syoseki/chise_ids.html\"
914 >\e$B!V\e(BCHISE IDS FIND\e$B$G4A;z$r8!:w!W\e(B</a> \e$B!=\e(B \e$B;3ED?r?N$5$s!J\e(B<a
915 href=\"http://www.shuiren.org/\">\e$B?g?MDb\e(B</a>\e$B!K$K$h$k2r@b\e(B
916 </ul>
917 <ul>
918 <li><a href=\"http://www.karitsu.org/tools/firefox_plugin.htm\"
919 >Firefox \e$BMQ\e(B plugin</a> by \e$B=);3M[0lO:$5$s!J\e(B<a href=\"http://www.karitsu.org/\"
920 >\e$B2aN)c7\e(B</a>\e$B!K\e(B
921 </ul>
922 <ul>
923 <li><a href=\"http://git.chise.org/gitweb/?p=chise/ids.git;a=blob;f=www/www-ids-find.el\"
924 >www-ids-find.el (source file (Emacs Lisp part))
925 <li><a href=\"http://www.chise.org/ids/\"
926 >\e$B!V\e(BCHISE \e$B4A;z9=B$>pJs%G!<%?%Y!<%9!W\e(B</a>
927 <li><a href=\"http://fonts.jp/chise_linkmap/\"
928 >\e$B!V\e(Bchise_linkmap : CHISE \e$B4A;zO"4D?^!W\e(B</a> by \e$B>eCO9(0l$5$s\e(B
929 <li><a href=\"http://www.chise.org/\"
930 >CHISE Project</a>
931 </ul>
932 <ul>
933 <li><a href=\"http://coe21.zinbun.kyoto-u.ac.jp/djvuchar\"
934 >\e$B!VBsK\J8;z%G!<%?%Y!<%9!W\e(B</a> by
935 <a href=\"http://coe21.zinbun.kyoto-u.ac.jp/\"
936 >\e$B5~ETBg3X\e(B21\e$B@$5*\e(BCOE\e$B!VEl%"%8%"@$3&$N?MJ8>pJs3X8&5f650i5rE@!W\e(B</a>
937 <li><a href=\"http://www.unicode.org/\"
938 >Unicode</a>
939 </ul>
940 </p>
941 </div>
942 "
943  'utf-8-jp-er))
944
945       ))
946     (princ "<hr>
947 <div class=\"container\">
948 ")
949     (princ "<div class=\"ml-0\">
950 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2015, 2016, 2017, 2020, 2021, 2022 <a href=\"http://kanji.zinbun.kyoto-u.ac.jp/~tomo/\"
951 >MORIOKA Tomohiko</a></div>")
952     (princ
953      (format
954       "<div>Powered by <a
955 href=\"http://www.chise.org/xemacs/\"
956 >XEmacs CHISE</a> %s.</div>"
957       (encode-coding-string xemacs-chise-version 'utf-8-jp-er)))
958     (princ "
959 </div>
960 </body>
961 </html>
962 ")))