X-Git-Url: http://git.chise.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=ids-find.el;h=eeafbf902e414f7ef7284a9361506b08019d9ac8;hb=c0fc2d6c9dacce8207ab01400b99a3b2efd66406;hp=e68d8a1fe344b47d07d24c5c2625d0576f6837a2;hpb=854d4ec4f71e835a0a1530afd60ca434394da401;p=chise%2Fids.git diff --git a/ids-find.el b/ids-find.el index e68d8a1..eeafbf9 100644 --- a/ids-find.el +++ b/ids-find.el @@ -1,11 +1,11 @@ ;;; ids-find.el --- search utility based on Ideographic-structures -;; Copyright (C) 2002 MORIOKA Tomohiko +;; Copyright (C) 2002, 2003, 2005, 2006, 2007, 2017, 2020 MORIOKA Tomohiko ;; Author: MORIOKA Tomohiko -;; Keywords: Kanji, Ideographs, search, IDS +;; Keywords: Kanji, Ideographs, search, IDS, CHISE, UCS, Unicode -;; This file is a part of Tomoyo-Tools. +;; This file is a part of CHISE-IDS. ;; This program is free software; you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as @@ -24,54 +24,591 @@ ;;; Code: -(defun ideographic-structure-member-compare-parts (part s-part) +(defun ids-index-store-char (product component) + (let ((ret (get-char-attribute component 'ideographic-products))) + (unless (memq product ret) + (put-char-attribute component 'ideographic-products + (cons product ret)) + (when (setq ret (char-feature component 'ideographic-structure)) + (ids-index-store-structure product ret))) + )) + +(defun ids-index-store-structure (product structure) + (let (ret) + (dolist (cell (cdr structure)) + (if (char-ref-p cell) + (setq cell (plist-get cell :char))) + (cond ((characterp cell) + (ids-index-store-char product cell)) + ((setq ret (assq 'ideographic-structure cell)) + (ids-index-store-structure product (cdr ret))) + ((setq ret (find-char cell)) + (ids-index-store-char product ret)) + )))) + +;;;###autoload +(defun ids-update-index () + (interactive) + (map-char-attribute + (lambda (c v) + (ids-index-store-structure c v) + nil) + 'ideographic-structure) + (map-char-attribute + (lambda (c v) + (ids-index-store-structure c v) + nil) + 'ideographic-structure@apparent) + (save-char-attribute-table 'ideographic-products)) + + +(mount-char-attribute-table 'ideographic-products) + +;;;###autoload +(defun ids-find-all-products (char) + (let (dest) + (dolist (cell (char-feature char 'ideographic-products)) + (unless (memq cell dest) + (setq dest (cons cell dest))) + (setq dest (union dest (ids-find-all-products cell)))) + dest)) + +(defun of-component-features () + (let (dest) + (dolist (feature (char-attribute-list)) + (when (string-match "^<-.*[@/]component\\(/[^*/]+\\)*$" + (symbol-name feature)) + (push feature dest))) + (list* '<-mistakable '->mistakable + '<-formed '->formed + '<-same '->same + '<-original '->original + '<-ancient '->ancient + dest))) + +(defun to-component-features () + (let (dest) + (dolist (feature (char-attribute-list)) + (when (string-match "^->.*[@/]component\\(/[^*/]+\\)*$" + (symbol-name feature)) + (push feature dest))) + dest)) + +;;;###autoload +(defun char-component-variants (char) + (let ((dest (list char)) + ret uchr) + (dolist (feature (to-component-features)) + (if (setq ret (get-char-attribute char feature)) + (dolist (c ret) + (setq dest (union dest (char-component-variants c)))))) + (cond + ;; ((setq ret (some (lambda (feature) + ;; (get-char-attribute char feature)) + ;; (to-component-features))) + ;; (dolist (c ret) + ;; (setq dest (union dest (char-component-variants c)))) + ;; ) + ((setq ret (get-char-attribute char '->ucs-unified)) + (setq dest (cons char ret)) + (dolist (c dest) + (setq dest (union dest + (some (lambda (feature) + (get-char-attribute c feature)) + (of-component-features)) + ))) + ) + ((and (setq ret (get-char-attribute char '=>ucs)) + (setq uchr (decode-char '=ucs ret))) + (setq dest (cons uchr (char-variants uchr))) + (dolist (c dest) + (setq dest (union dest + (some (lambda (feature) + (get-char-attribute c feature)) + (of-component-features)) + ))) + ) + (t + (map-char-family + (lambda (c) + (unless (memq c dest) + (setq dest (cons c dest))) + (setq dest + (union dest + (some (lambda (feature) + (char-feature c feature)) + (of-component-features)) + )) + nil) + char) + )) + dest)) + +;;;###autoload +(defun ideographic-products-find (&rest components) + (if (stringp (car components)) + (setq components (string-to-char-list (car components)))) + (let (dest products) + (dolist (variant (char-component-variants (car components))) + (setq products + (union products + (get-char-attribute variant 'ideographic-products)))) + (setq dest products) + (while (and dest + (setq components (cdr components))) + (setq products nil) + (dolist (variant (char-component-variants (car components))) + (setq products + (union products + (get-char-attribute variant 'ideographic-products)))) + (setq dest (intersection dest products))) + dest)) + +(defun ideograph-find-products-with-variants (components &optional ignored-chars) + (if (stringp components) + (setq components (string-to-char-list components))) + (let (dest products) + (dolist (variant (char-component-variants (car components))) + (setq products + (union products + (set-difference + (get-char-attribute variant 'ideographic-products) + ignored-chars)))) + (setq dest products) + (while (and dest + (setq components (cdr components))) + (setq products nil) + (dolist (variant (char-component-variants (car components))) + (setq products + (union products + (set-difference + (get-char-attribute variant 'ideographic-products) + ignored-chars)))) + (setq dest (intersection dest products))) + dest)) + +(defun ideograph-find-products (components &optional ignored-chars) + (if (stringp components) + (setq components (string-to-char-list components))) + (let (dest products) + ;; (dolist (variant (char-component-variants (car components))) + ;; (setq products + ;; (union products + ;; (get-char-attribute variant 'ideographic-products)))) + ;; (setq dest products) + (setq dest (get-char-attribute (car components) 'ideographic-products)) + (while (and dest + (setq components (cdr components))) + ;; (setq products nil) + ;; (dolist (variant (char-component-variants (car components))) + ;; (setq products + ;; (union products + ;; (get-char-attribute variant 'ideographic-products)))) + (setq products (get-char-attribute (car components) 'ideographic-products)) + (setq dest (intersection dest products))) + dest)) + + +(defun ideographic-structure-char= (c1 c2) + (or (eq c1 c2) + (and c1 c2 + (let ((m1 (char-ucs c1)) + (m2 (char-ucs c2))) + (or (and m1 m2 + (eq m1 m2)) + (memq c1 (char-component-variants c2))))))) + +(defun ideographic-structure-member-compare-components (component s-component) (let (ret) - (cond ((char-ref= part s-part - (lambda (c1 c2) - (or (eq c1 c2) - (and c1 c2 - (eq (char-ucs c1)(char-ucs c2))))))) - ((listp s-part) - (if (setq ret (assq 'ideographic-structure s-part)) - (ideographic-structure-member part (cdr ret)))) - ((setq ret (get-char-attribute s-part 'ideographic-structure)) - (ideographic-structure-member part ret))))) - -(defun ideographic-structure-member (part structure) - (or (progn + (cond ((char-ref= component s-component #'ideographic-structure-char=)) + ((listp s-component) + (if (setq ret (assq 'ideographic-structure s-component)) + (ideographic-structure-member component (cdr ret)))) + ((setq ret (get-char-attribute s-component 'ideographic-structure)) + (ideographic-structure-member component ret))))) + +;;;###autoload +(defun ideographic-structure-member (component structure) + "Return non-nil if COMPONENT is included in STRUCTURE." + (or (memq component structure) + (progn (setq structure (cdr structure)) - (ideographic-structure-member-compare-parts part (car structure))) + (ideographic-structure-member-compare-components + component (car structure))) (progn (setq structure (cdr structure)) - (ideographic-structure-member-compare-parts part (car structure))) + (ideographic-structure-member-compare-components + component (car structure))) (progn (setq structure (cdr structure)) (and (car structure) - (ideographic-structure-member-compare-parts - part (car structure)))))) + (ideographic-structure-member-compare-components + component (car structure)))))) + ;;;###autoload -(defun ideographic-structure-search-chars (parts) - "Search Ideographs by PARTS." - (interactive "sParts : ") - (with-current-buffer (get-buffer-create " *ids-chars*") +(defun ideographic-structure-repertoire-p (structure components) + "Return non-nil if STRUCTURE can be constructed by a subset of COMPONENTS." + (and structure + (let (ret s-component) + (catch 'tag + (while (setq structure (cdr structure)) + (setq s-component (car structure)) + (unless (characterp s-component) + (if (setq ret (find-char s-component)) + (setq s-component ret))) + (unless (cond + ((listp s-component) + (if (setq ret (assq 'ideographic-structure s-component)) + (ideographic-structure-repertoire-p + (cdr ret) components))) + ((member* s-component components + :test #'ideographic-structure-char=)) + ((setq ret + (get-char-attribute s-component + 'ideographic-structure)) + (ideographic-structure-repertoire-p ret components))) + (throw 'tag nil))) + t)))) + + +(defvar ids-find-result-buffer "*ids-chars*") + +(defun ids-find-format-line (c v) + (format "%c\t%s\t%s\n" + c + (or (let ((ucs (or (char-ucs c) + (encode-char c 'ucs)))) + (if ucs + (cond ((<= ucs #xFFFF) + (format " U+%04X" ucs)) + ((<= ucs #x10FFFF) + (format "U-%08X" ucs))))) + " ") + (or (ideographic-structure-to-ids v) + v))) + +(defun ids-insert-chars-including-components* (components + &optional level ignored-chars) + (unless level + (setq level 0)) + (let (is i as bs) + (dolist (c (sort (copy-tree (ideograph-find-products components + ignored-chars)) + (lambda (a b) + (if (setq as (char-total-strokes a)) + (if (setq bs (char-total-strokes b)) + (if (= as bs) + (ideograph-char< a b) + (< as bs)) + t) + (ideograph-char< a b))))) + (unless (memq c ignored-chars) + (setq is (char-feature c 'ideographic-structure)) + (setq i 0) + (while (< i level) + (insert "\t") + (setq i (1+ i))) + (insert (ids-find-format-line c is)) + (setq ignored-chars + (ids-insert-chars-including-components* + (char-to-string c) (1+ level) + (cons c ignored-chars)))) + ) + ) + ignored-chars) + +(defun ids-insert-chars-including-components (components + &optional level ignored-chars) + (unless level + (setq level 0)) + (setq ignored-chars + (nreverse + (ids-insert-chars-including-components* components + level ignored-chars))) + (let (is i as bs) + (dolist (c ignored-chars) + (dolist (vc (char-component-variants c)) + (unless (memq vc ignored-chars) + (when (setq is (get-char-attribute vc 'ideographic-structure)) + (setq i 0) + (while (< i level) + (insert "\t") + (setq i (1+ i))) + (insert (ids-find-format-line vc is)) + (setq ignored-chars + (ids-insert-chars-including-components* + (char-to-string vc) (1+ level) + (cons vc ignored-chars))))))) + (dolist (c (sort (copy-tree (ideograph-find-products-with-variants + components ignored-chars)) + (lambda (a b) + (if (setq as (char-total-strokes a)) + (if (setq bs (char-total-strokes b)) + (if (= as bs) + (ideograph-char< a b) + (< as bs)) + t) + (ideograph-char< a b))))) + (unless (memq c ignored-chars) + (setq is (get-char-attribute c 'ideographic-structure)) + (setq i 0) + (while (< i level) + (insert "\t") + (setq i (1+ i))) + (insert (ids-find-format-line c is)) + (setq ignored-chars + (ids-insert-chars-including-components* + (char-to-string c) (1+ level) + (cons c ignored-chars)))) + ) + ) + ignored-chars) + +;;;###autoload +(defun ids-find-chars-including-components (components) + "Search Ideographs whose structures have COMPONENTS." + (interactive "sComponents : ") + (with-current-buffer (get-buffer-create ids-find-result-buffer) + (setq buffer-read-only nil) + (erase-buffer) + (ids-insert-chars-including-components components 0 nil) + ;; (let ((ignored-chars + ;; (nreverse + ;; (ids-insert-chars-including-components components 0 nil + ;; #'ideograph-find-products))) + ;; rest) + ;; (setq rest ignored-chars) + ;; ;; (dolist (c rest) + ;; ;; (setq ignored-chars + ;; ;; (union ignored-chars + ;; ;; (ids-insert-chars-including-components + ;; ;; (list c) 0 ignored-chars + ;; ;; #'ideograph-find-products-with-variants)))) + ;; (ids-insert-chars-including-components components 0 ignored-chars + ;; #'ideograph-find-products-with-variants)) + (goto-char (point-min))) + (view-buffer ids-find-result-buffer)) + +;;;###autoload +(define-obsolete-function-alias 'ideographic-structure-search-chars + 'ids-find-chars-including-components) + +;;;###autoload +(defun ids-find-chars-covered-by-components (components) + "Search Ideographs which structures are consisted by subsets of COMPONENTS." + (interactive "sComponents: ") + (if (stringp components) + (setq components (string-to-char-list components))) + (with-current-buffer (get-buffer-create ids-find-result-buffer) (setq buffer-read-only nil) (erase-buffer) (map-char-attribute (lambda (c v) - (when (every - (lambda (p) - ;; (member* p v :test #'char-ref=) - (ideographic-structure-member p v)) - parts) - (insert (format "%c\t%s\n" - c - (or (ideographic-structure-to-ids v) - v)))) - nil) + (when (ideographic-structure-repertoire-p v components) + (insert (ids-find-format-line c v)))) 'ideographic-structure) (goto-char (point-min))) - (view-buffer " *ids-chars*")) + (view-buffer ids-find-result-buffer)) + + +(defun ideographic-structure-merge-components-alist (ca1 ca2) + (let ((dest-alist ca1) + ret) + (dolist (cell ca2) + (if (setq ret (assq (car cell) dest-alist)) + (setcdr ret (+ (cdr ret)(cdr cell))) + (setq dest-alist (cons cell dest-alist)))) + dest-alist)) + +(defun ideographic-structure-to-components-alist (structure) + (apply #'ideographic-structure-to-components-alist* structure)) + +(defun ideographic-structure-to-components-alist* (operator component1 component2 + &optional component3 + &rest opts) + (let (dest-alist ret) + (setq dest-alist + (cond ((characterp component1) + (unless (encode-char component1 'ascii) + (list (cons component1 1))) + ) + ((setq ret (assq 'ideographic-structure component1)) + (ideographic-structure-to-components-alist (cdr ret)) + ) + ((setq ret (find-char component1)) + (list (cons ret 1)) + ))) + (setq dest-alist + (ideographic-structure-merge-components-alist + dest-alist + (cond ((characterp component2) + (unless (encode-char component2 'ascii) + (list (cons component2 1))) + ) + ((setq ret (assq 'ideographic-structure component2)) + (ideographic-structure-to-components-alist (cdr ret)) + ) + ((setq ret (find-char component2)) + (list (cons ret 1)) + )))) + (if (memq operator '(?\u2FF2 ?\u2FF3)) + (ideographic-structure-merge-components-alist + dest-alist + (cond ((characterp component3) + (unless (encode-char component3 'ascii) + (list (cons component3 1))) + ) + ((setq ret (assq 'ideographic-structure component3)) + (ideographic-structure-to-components-alist (cdr ret)) + ) + ((setq ret (find-char component3)) + (list (cons ret 1)) + ))) + dest-alist))) + +(defun ids-find-merge-variables (ve1 ve2) + (cond ((eq ve1 t) + ve2) + ((eq ve2 t) + ve1) + (t + (let ((dest-alist ve1) + (rest ve2) + cell ret) + (while (and rest + (setq cell (car rest)) + (if (setq ret (assq (car cell) ve1)) + (eq (cdr ret)(cdr cell)) + (setq dest-alist (cons cell dest-alist)))) + (setq rest (cdr rest))) + (if rest + nil + dest-alist))))) + +;;;###autoload +(defun ideographic-structure-equal (structure1 structure2) + (let (dest-alist ret) + (and (setq dest-alist (ideographic-structure-character= + (car structure1)(car structure2))) + (setq ret (ideographic-structure-character= + (nth 1 structure1)(nth 1 structure2))) + (setq dest-alist (ids-find-merge-variables dest-alist ret)) + (setq ret (ideographic-structure-character= + (nth 2 structure1)(nth 2 structure2))) + (setq dest-alist (ids-find-merge-variables dest-alist ret)) + (if (memq (car structure1) '(?\u2FF2 ?\u2FF3)) + (and (setq ret (ideographic-structure-character= + (nth 3 structure1)(nth 3 structure2))) + (setq dest-alist (ids-find-merge-variables dest-alist ret))) + dest-alist)))) + +;;;###autoload +(defun ideographic-structure-character= (c1 c2) + (let (ret ret2) + (cond ((characterp c1) + (cond ((encode-char c1 'ascii) + (list (cons c1 c2)) + ) + ((characterp c2) + (if (encode-char c2 'ascii) + (list (cons c2 c1)) + (eq c1 c2)) + ) + ((setq ret2 (find-char c2)) + (eq c1 ret2) + ) + ((setq ret2 (assq 'ideographic-structure c2)) + (and (setq ret (get-char-attribute c1 'ideographic-structure)) + (ideographic-structure-equal ret (cdr ret2))) + )) + ) + ((setq ret (assq 'ideographic-structure c1)) + (cond ((characterp c2) + (if (encode-char c2 'ascii) + (list (cons c2 c1)) + (and (setq ret2 (get-char-attribute c2 'ideographic-structure)) + (ideographic-structure-equal (cdr ret) ret2))) + ) + ((setq ret2 (find-char c2)) + (and (setq ret2 (get-char-attribute ret2 'ideographic-structure)) + (ideographic-structure-equal (cdr ret) ret2)) + ) + ((setq ret2 (assq 'ideographic-structure c2)) + (ideographic-structure-equal (cdr ret)(cdr ret2)) + )) + ) + ((setq ret (find-char c1)) + (cond ((characterp c2) + (if (encode-char c2 'ascii) + (list (cons c2 c1)) + (eq ret c2)) + ) + ((setq ret2 (find-char c2)) + (eq ret ret2) + ) + ((setq ret2 (assq 'ideographic-structure c2)) + (and (setq ret (get-char-attribute ret 'ideographic-structure)) + (ideographic-structure-equal ret (cdr ret2)) + ))))))) + +;;;###autoload +(defun ideographic-structure-find-chars (structure) + (apply #'ideographic-structure-find-chars* structure)) + +(defun ideographic-structure-find-chars* (operator component1 component2 + &optional component3) + (let ((comp-alist (ideographic-structure-to-components-alist* + operator component1 component2 component3)) + c1 c2 c3 + ret pl str + var-alist) + (dolist (pc (caar + (sort (mapcar (lambda (cell) + (if (setq ret (get-char-attribute + (car cell) 'ideographic-products)) + (cons ret (length ret)) + (cons nil 0))) + comp-alist) + (lambda (a b) + (< (cdr a)(cdr b)))))) + (when (and (setq str (get-char-attribute pc 'ideographic-structure)) + (setq var-alist + (ideographic-structure-character= (car str) operator)) + (setq c1 (nth 1 str)) + (setq ret (ideographic-structure-character= c1 component1)) + (setq var-alist (ids-find-merge-variables var-alist ret)) + (setq c2 (nth 2 str)) + (setq ret (ideographic-structure-character= c2 component2)) + (setq var-alist (ids-find-merge-variables var-alist ret)) + (cond ((memq (car str) '(?\u2FF2 ?\u2FF3)) + (setq c3 (nth 3 str)) + (and (setq ret (ideographic-structure-character= + c3 component3)) + (ids-find-merge-variables var-alist ret)) + ) + (t var-alist))) + (setq pl (cons pc pl)) + )) + pl)) + +;;;###autoload +(defun ideographic-char-count-components (char component) + (let ((dest 0) + structure) + (cond ((eq char component) + 1) + ((setq structure (get-char-attribute char 'ideographic-structure)) + (dolist (cell (ideographic-structure-to-components-alist structure)) + (setq dest + (+ dest + (if (eq (car cell) char) + (cdr cell) + (* (ideographic-char-count-components (car cell) component) + (cdr cell)))))) + dest) + (t + 0)))) ;;; @ End.