From 258bd93ccf69bb8b9271ad59cd306c50c5602d3e Mon Sep 17 00:00:00 2001 From: MORIOKA Tomohiko Date: Mon, 30 Nov 2015 19:16:54 +0900 Subject: [PATCH] New file. --- www/www-hng-ids-find.el | 434 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 434 insertions(+) create mode 100644 www/www-hng-ids-find.el diff --git a/www/www-hng-ids-find.el b/www/www-hng-ids-find.el new file mode 100644 index 0000000..011aecb --- /dev/null +++ b/www/www-hng-ids-find.el @@ -0,0 +1,434 @@ +(require 'ids-find) +(require 'cwiki-common) + +(defvar hng-ccs-list + (let (dest) + (dolist (ccs (charset-list)) + (when (string-match "^===hng-" (symbol-name ccs)) + (setq dest (cons ccs dest)))) + dest)) + +(defun char-hng-p (char) + (or (get-char-attribute char '->HNG) + (char-have-hng-p char))) + +(defun char-have-hng-p (char) + (or (some (lambda (ccs) + (and (encode-char char ccs) + char)) + hng-ccs-list) + (some #'char-have-hng-p + (get-char-attribute char '->subsumptive)) + (some #'char-have-hng-p + (get-char-attribute char '->denotational)))) + +(defun decode-url-string (string &optional coding-system) + (if (> (length string) 0) + (let ((i 0) + dest) + (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i) + (setq dest (concat dest + (substring string i (match-beginning 0)) + (char-to-string + (int-char + (string-to-int (match-string 1 string) 16)))) + i (match-end 0))) + (decode-coding-string + (concat dest (substring string i)) + coding-system)))) + +(defconst www-hng-ids-find-version "0.26") + +(defvar www-ids-find-ideographic-products-file-name + (expand-file-name "ideographic-products" + (expand-file-name + "feature" + (expand-file-name + "character" + chise-system-db-directory)))) + +(defvar www-ids-find-char-viewer-url + "/est/view/character/") + +(defvar www-ids-find-chise-link-map-url-prefix + "http://fonts.jp/chise_linkmap/map.cgi?code=") + +(defvar www-ids-find-tang-chars-file-name + "~tomo/projects/chise/ids/www/tang-chars.udd") + +(defun www-ids-find-format-char (c &optional code-desc) + (princ + (format "%s" + www-ids-find-char-viewer-url + (www-uri-encode-object c) + (www-format-encode-string (char-to-string c)))) + ;; (let ((str (encode-coding-string (format "%c" c) 'utf-8-er)) + ;; plane code) + ;; (princ + ;; (with-temp-buffer + ;; (cond + ;; ((string-match "&CB\\([0-9]+\\);" str) + ;; (setq code (string-to-int (match-string 1 str))) + ;; (insert (format "\"CB%05d\"\n" + ;; code (/ code 1000) code)) + ;; (when code-desc + ;; (insert (format "CB%05d" code))) + ;; ) + ;; ((string-match "&JC3-\\([0-9A-F]+\\);" str) + ;; (setq code (string-to-int (match-string 1 str) 16)) + ;; (insert (format "\"JC3-%04X\"\n" + ;; code code)) + ;; (when code-desc + ;; (insert (format "JC3-%04X" code))) + ;; ) + ;; ((string-match "&J\\(78\\|83\\|90\\|SP\\)-\\([0-9A-F]+\\);" str) + ;; (setq plane (match-string 1 str) + ;; code (string-to-int (match-string 2 str) 16)) + ;; (insert (format "\"J%s-%04X\"\n" + ;; plane code plane + ;; (- (lsh code -8) 32) + ;; (- (logand code 255) 32))) + ;; (when code-desc + ;; (insert (format "J%s-%04X" plane code))) + ;; ) + ;; ((string-match "&G\\([01]\\)-\\([0-9A-F]+\\);" str) + ;; (setq plane (string-to-int (match-string 1 str)) + ;; code (string-to-int (match-string 2 str) 16)) + ;; (insert (format "\"G%d-%04X\"\n" + ;; plane code plane + ;; (- (lsh code -8) 32) + ;; (- (logand code 255) 32))) + ;; (when code-desc + ;; (insert (format "G%d-%04X" plane code))) + ;; ) + ;; ((string-match "&C\\([1-7]\\)-\\([0-9A-F]+\\);" str) + ;; (setq plane (string-to-int (match-string 1 str)) + ;; code (string-to-int (match-string 2 str) 16)) + ;; (insert (format "\"C%d-%04X\"\n" + ;; plane code plane code)) + ;; (when code-desc + ;; (insert (format "C%d-%04X" plane code))) + ;; ) + ;; ((string-match "&ZOB-\\([0-9]+\\);" str) + ;; (setq code (string-to-int (match-string 1 str))) + ;; (insert (format "\"ZOB-%04d\"\n" + ;; code code)) + ;; (when code-desc + ;; (insert (format "ZOB-%04d" code))) + ;; ) + ;; (t + ;; (insert (format "") + ;; (insert str) + ;; (insert "") + ;; )) + ;; (goto-char (point-min)) + ;; (while (search-forward "&" nil t) + ;; (replace-match "&" t 'literal)) + ;; (buffer-string)))) + ) + +(defun www-ids-find-format-line (c is) + (let (ucs len i ids) + (www-ids-find-format-char c 'code-desc) + (princ + (or (if (setq ucs (or (char-ucs c) + (encode-char c 'ucs))) + (format + " %s" + ucs + (cond ((<= ucs #xFFFF) + (format "U+%04X" ucs)) + ((<= ucs #x10FFFF) + (format "U-%08X" ucs)))) + " "))) + (when ucs + (princ + (format " (link map)" + www-ids-find-chise-link-map-url-prefix ucs))) + (princ " ") + (when is + (setq ids (ideographic-structure-to-ids is)) + (setq i 0 + len (length ids)) + (while (< i len) + (www-ids-find-format-char (aref ids i)) + (setq i (1+ i)))) + (when (and ucs + (with-current-buffer + (find-file-noselect + www-ids-find-tang-chars-file-name) + (goto-char (point-min)) + (re-search-forward (format "^%d$" ucs) nil t))) + (princ + (format " " + (mapconcat + (lambda (c) + (format "%%%02X" (char-int c))) + (encode-coding-string (char-to-string c) + 'utf-8-jp) + ""))) + (princ (encode-coding-string "⇒[唐代拓本]" 'utf-8-jp-er))) + (princ "
\n"))) + +(defun www-ids-insert-chars-including-components (components + &optional ignored-chars) + (let ((ret (ideographic-products-find components)) + products + is as bs len ignore-children) + (dolist (char ret) + (if (char-hng-p char) + (setq products (cons char products)))) + (setq len (length products)) + (when (>= len 1024) + (setq ignore-children t) + (princ + (encode-coding-string + "

結果が多すぎるため、再帰的検索を省略しました。

" + 'utf-8-jp-er))) + (if (>= len 2048) + (dolist (c products) + (www-ids-find-format-char c)) + (princ "\n") + )) + ignored-chars) + +(defun www-batch-ids-find () + (let ((components (car command-line-args-left)) + (coded-charset-entity-reference-alist + (list* + '(=cns11643-1 "C1-" 4 X) + '(=cns11643-2 "C2-" 4 X) + '(=cns11643-3 "C3-" 4 X) + '(=cns11643-4 "C4-" 4 X) + '(=cns11643-5 "C5-" 4 X) + '(=cns11643-6 "C6-" 4 X) + '(=cns11643-7 "C7-" 4 X) + '(=gb2312 "G0-" 4 X) + '(=gb12345 "G1-" 4 X) + '(=jis-x0208@1990 "J90-" 4 X) + '(=jis-x0212 "JSP-" 4 X) + '(=cbeta "CB" 5 d) + '(=jef-china3 "JC3-" 4 X) + '(=jis-x0208@1978 "J78-" 4 X) + '(=jis-x0208@1983 "J83-" 4 X) + '(=daikanwa "M-" 5 d) + coded-charset-entity-reference-alist)) + ) + (setq command-line-args-left (cdr command-line-args-left)) + (cond + ((stringp components) + (if (string-match "^components=" components) + (setq components (substring components (match-end 0)))) + (setq components + (if (> (length components) 0) + (decode-url-string components 'utf-8-er) + nil)) + ) + (t + (setq components nil) + )) + (princ "Content-Type: text/html; charset=UTF-8 + + + + +CHISE IDS Find + + + + +

") + (princ (encode-coding-string "CHISE-IDS HNG 漢字検索" 'utf-8-jp-er)) + (princ "

") + (princ " +

Version ") + (princ www-hng-ids-find-version) + (princ (format-time-string + " (Last-modified: %Y-%m-%d %H:%M:%S)" + (nth 5 + (file-attributes + www-ids-find-ideographic-products-file-name)))) + (princ " +


+

+

+") + (princ (encode-coding-string "部品文字列" 'utf-8-jp-er)) + (princ " (length components) 0) + (princ (encode-coding-string components 'utf-8-er))) + (princ "\"> + +
+ +") + (unless (file-newer-than-file-p + www-ids-find-ideographic-products-file-name + (locate-file (car command-line-args) exec-path)) + (princ (encode-coding-string "
+

+現在、システムの更新作業中です。しばらくお待ちください。 +


+" 'utf-8-jp-er)) + ;; (setq components nil) + ) + (cond + (components + ;; (map-char-attribute + ;; (lambda (c v) + ;; (when (every (lambda (p) + ;; (ideographic-structure-member p v)) + ;; components) + ;; (princ (encode-coding-string + ;; (ids-find-format-line c v) + ;; 'utf-8-jp-er)) + ;; (princ "
\n") + ;; ) + ;; nil) + ;; 'ideographic-structure) + (when (= (length components) 1) + (www-ids-find-format-line (aref components 0) + (char-feature (aref components 0) + 'ideographic-structure))) + ;; (dolist (c (ideographic-products-find components)) + ;; (setq is (char-feature c 'ideographic-structure)) + ;; ;; to avoid problems caused by wrong indexes + ;; (when (every (lambda (c) + ;; (ideographic-structure-member c is)) + ;; components) + ;; (www-ids-find-format-line c is))) + ;; (princ "\n") + ) + (t + (princ (encode-coding-string "
+

+指定した部品を全て含む漢字の一覧を表示します。 +

+CHISE で用いられる実態参照形式(例:&M-00256;)で部品を指定する事もできます。" 'utf-8-jp-er)) + (princ (encode-coding-string " +

+\[Links\] +

+ + +" + 'utf-8-jp-er)) + + )) + (princ "
") + (princ "

+Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2015 MORIOKA Tomohiko") + (princ + (format + "

Powered by XEmacs CHISE %s." + (encode-coding-string xemacs-chise-version 'utf-8-jp-er))) + (princ " + + +"))) -- 1.7.10.4