(www-ids-insert-chars-including-components): New function.
[chise/ids.git] / www / www-ids-find.el
index 1de3070..81fca1b 100644 (file)
   "~tomo/projects/chise/ids/www/tang-chars.udd")
 
 (defun www-ids-find-format-line (c is)
-  (let ((str (encode-coding-string (format "%c" c) 'utf-8-jp-er))
-       code ucs)
-    (cond
-     ((string-match "&CB\\([0-9]+\\);" str)
-      (setq code (string-to-int (match-string 1 str)))
-      (princ (format "<img alt=\"CB%05d\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/cb-gaiji/%02d/CB%05d.gif\">\n"
-                    code (/ code 1000) code))
-      (princ (format "CB%05d" code))
-      )
-     (t
-      (princ str)))
+  (let ((str (encode-coding-string (format "%c" c) 'utf-8-er))
+       plane code ucs)
+    (princ
+     (with-temp-buffer
+       (cond
+       ((string-match "&CB\\([0-9]+\\);" str)
+        (setq code (string-to-int (match-string 1 str)))
+        (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
+        (insert str)
+        (insert (format "\"><img alt=\"CB%05d\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/cb-gaiji/%02d/CB%05d.gif\">\n"
+                        code (/ code 1000) code))
+        (insert (format "CB%05d</a>" code))
+        )
+       ((string-match "&JC3-\\([0-9A-F]+\\);" str)
+        (setq code (string-to-int (match-string 1 str) 16))
+        (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
+        (insert str)
+        (insert (format "\"><img alt=\"JC3-%04X\" src=\"http://kanji.zinbun.kyoto-u.ac.jp/db/CHINA3/Gaiji/%04x.gif\">\n"
+                        code code))
+        (insert (format "JC3-%04X</a>" code))
+        )
+       ((string-match "&J\\(78\\|83\\|90\\|SP\\)-\\([0-9A-F]+\\);" str)
+        (setq plane (match-string 1 str)
+              code (string-to-int (match-string 2 str) 16))
+        (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
+        (insert str)
+        (insert (format "\"><img alt=\"J%s-%04X\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/JIS-%s/%02d-%02d.gif\">\n"
+                        plane code plane
+                        (- (lsh code -8) 32)
+                        (- (logand code 255) 32)))
+        (insert (format "J%s-%04X</a>" plane code))
+        )
+       ((string-match "&G\\([01]\\)-\\([0-9A-F]+\\);" str)
+        (setq plane (string-to-int (match-string 1 str))
+              code (string-to-int (match-string 2 str) 16))
+        (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
+        (insert str)
+        (insert (format "\"><img alt=\"G%d-%04X\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/GB%d/%02d-%02d.gif\">\n"
+                        plane code plane
+                        (- (lsh code -8) 32)
+                        (- (logand code 255) 32)))
+        (insert (format "G%d-%04X</a>" plane code))
+        )
+       ((string-match "&C\\([1-7]\\)-\\([0-9A-F]+\\);" str)
+        (setq plane (string-to-int (match-string 1 str))
+              code (string-to-int (match-string 2 str) 16))
+        (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
+        (insert str)
+        (insert (format "\"><img alt=\"C%d-%04X\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/CNS%d/%04X.gif\">\n"
+                        plane code plane code))
+        (insert (format "C%d-%04X</a>" plane code))
+        )
+       ((string-match "&ZOB-\\([0-9]+\\);" str)
+        (setq code (string-to-int (match-string 1 str)))
+        (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
+        (insert str)
+        (insert (format "\"><img alt=\"ZOB-%04d\" src=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/glyphs/ZOB-1968/%04d.png\">\n"
+                        code code))
+        (insert (format "ZOB-%04d</a>" code))
+        )
+       (t
+        (insert "<a href=\"http://mousai.kanji.zinbun.kyoto-u.ac.jp/char-desc?char=")
+        (insert str)
+        (insert "\">")
+        (insert str)
+        (insert "</a>")
+        ))
+       (goto-char (point-min))
+       (while (search-forward "&" nil t)
+        (replace-match "&amp;" t 'literal))
+       (buffer-string)
+       ))
     (princ
      (or (if (setq ucs (or (char-ucs c)
                           (encode-char c 'ucs)))
       (princ (encode-coding-string "\e$B"M\e(B[\e$BEbBeBsK\\e(B]</a>" 'utf-8-jp-er)))
     (princ "<br>\n")))
 
+(defun www-ids-insert-chars-including-components (components)
+  (let (is)
+    (dolist (c (ideographic-products-find components))
+      (setq is (char-feature c 'ideographic-structure))
+      ;; to avoid problems caused by wrong indexes
+      (when (every (lambda (cc)
+                    (ideographic-structure-member cc is))
+                  components)
+       (princ "<li>")
+       (www-ids-find-format-line c is)
+       (princ "<ul>\n")
+       (www-ids-insert-chars-including-components (char-to-string c))
+       (princ "</ul>\n")
+       )
+      )))
+
 (defun www-batch-ids-find ()
   (let ((components (car command-line-args-left))
        (coded-charset-entity-reference-alist
         (list*
-         '((=cbeta      "CB" 5 d)
-           (=jef-china3 "JC3-" 4 X))
+         '(=cns11643-1         "C1-" 4 X)
+         '(=cns11643-2         "C2-" 4 X)
+         '(=cns11643-3         "C3-" 4 X)
+         '(=cns11643-4         "C4-" 4 X)
+         '(=cns11643-5         "C5-" 4 X)
+         '(=cns11643-6         "C6-" 4 X)
+         '(=cns11643-7         "C7-" 4 X)
+         '(=gb2312             "G0-" 4 X)
+         '(=gb12345            "G1-" 4 X)
+         '(=jis-x0208@1990     "J90-" 4 X)
+         '(=jis-x0212          "JSP-" 4 X)
+         '(=cbeta              "CB" 5 d)
+         '(=jef-china3         "JC3-" 4 X)
+         '(=jis-x0208@1978     "J78-" 4 X)
+         '(=jis-x0208@1983     "J83-" 4 X)
+         '(=daikanwa           "M-" 5 d)
          coded-charset-entity-reference-alist))
        is)
     (setq command-line-args-left (cdr command-line-args-left))
        (www-ids-find-format-line (aref components 0)
                                  (char-feature (aref components 0)
                                                'ideographic-structure)))
-      (dolist (c (ideographic-products-find components))
-       (setq is (char-feature c 'ideographic-structure))
-       ;; to avoid problems caused by wrong indexes
-       (when (every (lambda (c)
-                      (ideographic-structure-member c is))
-                    components)
-         (www-ids-find-format-line c is)))
+      ;; (dolist (c (ideographic-products-find components))
+      ;;   (setq is (char-feature c 'ideographic-structure))
+      ;;   ;; to avoid problems caused by wrong indexes
+      ;;   (when (every (lambda (c)
+      ;;                  (ideographic-structure-member c is))
+      ;;                components)
+      ;;     (www-ids-find-format-line c is)))
+      (princ "<ul>\n")
+      (www-ids-insert-chars-including-components components)
+      (princ "</ul>\n")
       )
      (t
       (princ (encode-coding-string "<hr>