(U-00020374): Use "𭑈" instead of "⿰天天".
[chise/ids.git] / ids-find.el
index 40b27b5..eeafbf9 100644 (file)
@@ -1,11 +1,11 @@
 ;;; ids-find.el --- search utility based on Ideographic-structures
 
-;; Copyright (C) 2002,2003,2005,2006,2007 MORIOKA Tomohiko
+;; Copyright (C) 2002, 2003, 2005, 2006, 2007, 2017, 2020 MORIOKA Tomohiko
 
 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
 ;; Keywords: Kanji, Ideographs, search, IDS, CHISE, UCS, Unicode
 
-;; This file is a part of CHISE IDS.
+;; This file is a part of CHISE-IDS.
 
 ;; This program is free software; you can redistribute it and/or
 ;; modify it under the terms of the GNU General Public License as
      (ids-index-store-structure c v)
      nil)
    'ideographic-structure)
+  (map-char-attribute
+   (lambda (c v)
+     (ids-index-store-structure c v)
+     nil)
+   'ideographic-structure@apparent)
   (save-char-attribute-table 'ideographic-products))
 
 
       (when (string-match "^<-.*[@/]component\\(/[^*/]+\\)*$"
                          (symbol-name feature))
        (push feature dest)))
-    (cons '<-ideographic-component-forms
-         dest)))
+    (list* '<-mistakable '->mistakable
+          '<-formed '->formed
+          '<-same '->same
+          '<-original '->original
+          '<-ancient '->ancient
+          dest)))
 
 (defun to-component-features ()
   (let (dest)
@@ -83,8 +92,7 @@
       (when (string-match "^->.*[@/]component\\(/[^*/]+\\)*$"
                          (symbol-name feature))
        (push feature dest)))
-    (cons '->ideographic-component-forms
-         dest)))
+    dest))
 
 ;;;###autoload
 (defun char-component-variants (char)
                (setq components (cdr components)))
       (setq products nil)
       (dolist (variant (char-component-variants (car components)))
+       (setq products
+             (union products
+                    (get-char-attribute variant 'ideographic-products))))
+      (setq dest (intersection dest products)))
+    dest))
+
+(defun ideograph-find-products-with-variants (components &optional ignored-chars)
+  (if (stringp components)
+      (setq components (string-to-char-list components)))
+  (let (dest products)
+    (dolist (variant (char-component-variants (car components)))
       (setq products
            (union products
-                  (get-char-attribute variant 'ideographic-products))))
+                  (set-difference
+                   (get-char-attribute variant 'ideographic-products)
+                   ignored-chars))))
+    (setq dest products)
+    (while (and dest
+               (setq components (cdr components)))
+      (setq products nil)
+      (dolist (variant (char-component-variants (car components)))
+       (setq products
+             (union products
+                    (set-difference
+                     (get-char-attribute variant 'ideographic-products)
+                     ignored-chars))))
+      (setq dest (intersection dest products)))
+    dest))
+
+(defun ideograph-find-products (components &optional ignored-chars)
+  (if (stringp components)
+      (setq components (string-to-char-list components)))
+  (let (dest products)
+    ;; (dolist (variant (char-component-variants (car components)))
+    ;;   (setq products
+    ;;         (union products
+    ;;                (get-char-attribute variant 'ideographic-products))))
+    ;; (setq dest products)
+    (setq dest (get-char-attribute (car components) 'ideographic-products))
+    (while (and dest
+               (setq components (cdr components)))
+      ;; (setq products nil)
+      ;; (dolist (variant (char-component-variants (car components)))
+      ;;   (setq products
+      ;;         (union products
+      ;;                (get-char-attribute variant 'ideographic-products))))
+      (setq products (get-char-attribute (car components) 'ideographic-products))
       (setq dest (intersection dest products)))
     dest))
-;; (defun ideographic-products-find (&rest components)
-;;   (if (stringp (car components))
-;;       (setq components (car components)))
-;;   (let ((len (length components))
-;;         (i 1)
-;;         dest products)
-;;     (dolist (variant (char-component-variants (elt components 0)))
-;;       (setq products
-;;             (union products
-;;                    (get-char-attribute variant 'ideographic-products))))
-;;     (setq dest products)
-;;     (while (and
-;;             (< i len)
-;;             (progn
-;;               (setq products nil)
-;;               (dolist (variant (char-component-variants (elt components i)))
-;;                 (dolist (product (get-char-attribute
-;;                                   variant 'ideographic-products))
-;;                   (unless (memq product products)
-;;                     (when (memq product dest)
-;;                       (setq products (cons product products))))))
-;;               (setq dest products)))
-;;       (setq i (1+ i)))
-;;     products))
 
 
 (defun ideographic-structure-char= (c1 c2)
                 (m2 (char-ucs c2)))
             (or (and m1 m2
                      (eq m1 m2))
-                (memq c1 (char-component-variants c2))
-                 ;; (some (lambda (feature)
-                 ;;         (some (lambda (b2)
-                 ;;                 (unless (characterp b2)
-                 ;;                   (setq b2 (find-char b2)))
-                 ;;                 (and b2
-                 ;;                      (ideographic-structure-char= c1 b2)))
-                 ;;               (char-feature c2 feature)
-                 ;;               ;; (get-char-attribute
-                 ;;               ;;  c2 '<-ideographic-component-forms)
-                 ;;               ))
-                 ;;       (of-component-features))
-                 ;; (progn
-                 ;;   (setq m1 (car (get-char-attribute c1 '<-radical))
-                 ;;         m2 (car (get-char-attribute c2 '<-radical)))
-                 ;;   (unless (characterp m1)
-                 ;;     (setq m1 (find-char m1)))
-                 ;;   (unless (characterp m2)
-                 ;;     (setq m2 (find-char m2)))
-                 ;;   (when (or m1 m2)
-                 ;;     (ideographic-structure-char= m1 m2))
-                 ;;   )
-                )))))
+                (memq c1 (char-component-variants c2)))))))
 
 (defun ideographic-structure-member-compare-components (component s-component)
   (let (ret)
          (or (ideographic-structure-to-ids v)
              v)))
 
+(defun ids-insert-chars-including-components* (components
+                                              &optional level ignored-chars)
+  (unless level
+    (setq level 0))
+  (let (is i as bs)
+    (dolist (c (sort (copy-tree (ideograph-find-products components
+                                                        ignored-chars))
+                    (lambda (a b)
+                      (if (setq as (char-total-strokes a))
+                          (if (setq bs (char-total-strokes b))
+                              (if (= as bs)
+                                  (ideograph-char< a b)
+                                (< as bs))
+                            t)
+                        (ideograph-char< a b)))))
+      (unless (memq c ignored-chars)
+       (setq is (char-feature c 'ideographic-structure))
+       (setq i 0)
+       (while (< i level)
+         (insert "\t")
+         (setq i (1+ i)))
+       (insert (ids-find-format-line c is))
+       (setq ignored-chars
+             (ids-insert-chars-including-components*
+              (char-to-string c) (1+ level)
+              (cons c ignored-chars))))
+      )
+    )
+  ignored-chars)
+
 (defun ids-insert-chars-including-components (components
                                              &optional level ignored-chars)
   (unless level
     (setq level 0))
+  (setq ignored-chars
+       (nreverse
+        (ids-insert-chars-including-components* components
+                                                level ignored-chars)))
   (let (is i as bs)
-    (dolist (c (sort (copy-tree (ideographic-products-find components))
+    (dolist (c ignored-chars)
+      (dolist (vc (char-component-variants c))
+       (unless (memq vc ignored-chars)
+         (when (setq is (get-char-attribute vc 'ideographic-structure))
+           (setq i 0)
+           (while (< i level)
+             (insert "\t")
+             (setq i (1+ i)))
+           (insert (ids-find-format-line vc is))
+           (setq ignored-chars
+                 (ids-insert-chars-including-components*
+                  (char-to-string vc) (1+ level)
+                  (cons vc ignored-chars)))))))
+    (dolist (c (sort (copy-tree (ideograph-find-products-with-variants
+                                components ignored-chars))
                     (lambda (a b)
                       (if (setq as (char-total-strokes a))
                           (if (setq bs (char-total-strokes b))
                             t)
                         (ideograph-char< a b)))))
       (unless (memq c ignored-chars)
-       (setq is (char-feature c 'ideographic-structure))
+       (setq is (get-char-attribute c 'ideographic-structure))
        (setq i 0)
        (while (< i level)
          (insert "\t")
          (setq i (1+ i)))
        (insert (ids-find-format-line c is))
        (setq ignored-chars
-             (ids-insert-chars-including-components
+             (ids-insert-chars-including-components*
               (char-to-string c) (1+ level)
               (cons c ignored-chars))))
-      ))
+      )
+    )
   ignored-chars)
-;; (defun ids-insert-chars-including-components (components level)
-;;   (let (is dis i)
-;;     (dolist (c (ideographic-products-find components))
-;;       (setq is (char-feature c 'ideographic-structure))
-;;       (setq i 0)
-;;       (while (< i level)
-;;         (insert "\t")
-;;         (setq i (1+ i)))
-;;       (insert (ids-find-format-line c is))
-;;       ;;(forward-line -1)
-;;       (ids-insert-chars-including-components
-;;        (char-to-string c) (1+ level))
-;;       )))
 
 ;;;###autoload
 (defun ids-find-chars-including-components (components)
   (with-current-buffer (get-buffer-create ids-find-result-buffer)
     (setq buffer-read-only nil)
     (erase-buffer)
-    (ids-insert-chars-including-components components 0)
-    ;; (let (is dis)
-    ;;   (dolist (c (ideographic-products-find components))
-    ;;     (setq is (char-feature c 'ideographic-structure))
-    ;;     ;; to avoid problems caused by wrong indexes
-    ;;     ;; (when (every (lambda (cc)
-    ;;     ;;                (ideographic-structure-member cc is))
-    ;;     ;;              components)
-    ;;     (dolist (dc (ideographic-products-find (char-to-string c)))
-    ;;       (setq dis (char-feature dc 'ideographic-structure))
-    ;;     ;;     ;; to avoid problems caused by wrong indexes
-    ;;     ;;     (when (every (lambda (dcc)
-    ;;     ;;                    (ideographic-structure-member dcc is))
-    ;;     ;;                  components)
-    ;;       (insert "\t")
-    ;;       (insert (ids-find-format-line dc dis))
-    ;;       (forward-line -1)
-    ;;     ;;       )
-    ;;       )
-    ;;     (insert (ids-find-format-line c is))
-    ;;     (forward-line -1)
-    ;;     ;;   )
-    ;;     )
-    ;;   )
+    (ids-insert-chars-including-components components 0 nil)
+    ;; (let ((ignored-chars
+    ;;        (nreverse
+    ;;         (ids-insert-chars-including-components components 0 nil
+    ;;                                                #'ideograph-find-products)))
+    ;;       rest)
+    ;;   (setq rest ignored-chars)
+    ;;   ;; (dolist (c rest)
+    ;;   ;;   (setq ignored-chars
+    ;;   ;;         (union ignored-chars
+    ;;   ;;                (ids-insert-chars-including-components
+    ;;   ;;                 (list c) 0 ignored-chars
+    ;;   ;;                 #'ideograph-find-products-with-variants))))
+    ;;   (ids-insert-chars-including-components components 0 ignored-chars
+    ;;                                          #'ideograph-find-products-with-variants))
     (goto-char (point-min)))
   (view-buffer ids-find-result-buffer))
-;; (defun ids-find-chars-including-components (components)
-;;   "Search Ideographs whose structures have COMPONENTS."
-;;   (interactive "sComponents : ")
-;;   (with-current-buffer (get-buffer-create ids-find-result-buffer)
-;;     (setq buffer-read-only nil)
-;;     (erase-buffer)
-;;     (map-char-attribute
-;;      (lambda (c v)
-;;        (when (every (lambda (p)
-;;                       (ideographic-structure-member p v))
-;;                     components)
-;;          (insert (ids-find-format-line c v)))
-;;        nil)
-;;      'ideographic-structure)
-;;     (goto-char (point-min)))
-;;   (view-buffer ids-find-result-buffer))
 
 ;;;###autoload
 (define-obsolete-function-alias 'ideographic-structure-search-chars
   (with-current-buffer (get-buffer-create ids-find-result-buffer)
     (setq buffer-read-only nil)
     (erase-buffer)
-    (let (ucs jis)
-      (map-char-attribute
-       (lambda (c v)
-        (when (ideographic-structure-repertoire-p v components)
-          (insert (ids-find-format-line c v))))
-       'ideographic-structure))
+    (map-char-attribute
+     (lambda (c v)
+       (when (ideographic-structure-repertoire-p v components)
+        (insert (ids-find-format-line c v))))
+     'ideographic-structure)
     (goto-char (point-min)))
   (view-buffer ids-find-result-buffer))
 
 
+(defun ideographic-structure-merge-components-alist (ca1 ca2)
+  (let ((dest-alist ca1)
+       ret)
+    (dolist (cell ca2)
+      (if (setq ret (assq (car cell) dest-alist))
+         (setcdr ret (+ (cdr ret)(cdr cell)))
+       (setq dest-alist (cons cell dest-alist))))
+    dest-alist))
+
+(defun ideographic-structure-to-components-alist (structure)
+  (apply #'ideographic-structure-to-components-alist* structure))
+
+(defun ideographic-structure-to-components-alist* (operator component1 component2
+                                                           &optional component3
+                                                           &rest opts)
+  (let (dest-alist ret)
+    (setq dest-alist
+         (cond ((characterp component1)
+                (unless (encode-char component1 'ascii)
+                  (list (cons component1 1)))
+                )
+               ((setq ret (assq 'ideographic-structure component1))
+                (ideographic-structure-to-components-alist (cdr ret))
+                )
+               ((setq ret (find-char component1))
+                (list (cons ret 1))
+                )))
+    (setq dest-alist
+         (ideographic-structure-merge-components-alist
+          dest-alist
+          (cond ((characterp component2)
+                 (unless (encode-char component2 'ascii)
+                   (list (cons component2 1)))
+                 )
+                ((setq ret (assq 'ideographic-structure component2))
+                 (ideographic-structure-to-components-alist (cdr ret))
+                 )
+                ((setq ret (find-char component2))
+                 (list (cons ret 1))
+                 ))))
+    (if (memq operator '(?\u2FF2 ?\u2FF3))
+       (ideographic-structure-merge-components-alist
+        dest-alist
+        (cond ((characterp component3)
+               (unless (encode-char component3 'ascii)
+                 (list (cons component3 1)))
+               )
+              ((setq ret (assq 'ideographic-structure component3))
+               (ideographic-structure-to-components-alist (cdr ret))
+               )
+              ((setq ret (find-char component3))
+               (list (cons ret 1))
+               )))
+      dest-alist)))
+
+(defun ids-find-merge-variables (ve1 ve2)
+  (cond ((eq ve1 t)
+        ve2)
+       ((eq ve2 t)
+        ve1)
+       (t
+        (let ((dest-alist ve1)
+              (rest ve2)
+              cell ret)
+          (while (and rest
+                      (setq cell (car rest))
+                      (if (setq ret (assq (car cell) ve1))
+                          (eq (cdr ret)(cdr cell))
+                        (setq dest-alist (cons cell dest-alist))))
+            (setq rest (cdr rest)))
+          (if rest
+              nil
+            dest-alist)))))
+
+;;;###autoload
+(defun ideographic-structure-equal (structure1 structure2)
+  (let (dest-alist ret)
+    (and (setq dest-alist (ideographic-structure-character=
+                          (car structure1)(car structure2)))
+        (setq ret (ideographic-structure-character=
+                   (nth 1 structure1)(nth 1 structure2)))
+        (setq dest-alist (ids-find-merge-variables dest-alist ret))
+        (setq ret (ideographic-structure-character=
+                   (nth 2 structure1)(nth 2 structure2)))
+        (setq dest-alist (ids-find-merge-variables dest-alist ret))
+        (if (memq (car structure1) '(?\u2FF2 ?\u2FF3))
+            (and (setq ret (ideographic-structure-character=
+                            (nth 3 structure1)(nth 3 structure2)))
+                 (setq dest-alist (ids-find-merge-variables dest-alist ret)))
+          dest-alist))))
+
+;;;###autoload
+(defun ideographic-structure-character= (c1 c2)
+  (let (ret ret2)
+    (cond ((characterp c1)
+          (cond ((encode-char c1 'ascii)
+                 (list (cons c1 c2))
+                 )
+                ((characterp c2)
+                 (if (encode-char c2 'ascii)
+                     (list (cons c2 c1))
+                   (eq c1 c2))
+                 )
+                ((setq ret2 (find-char c2))
+                 (eq c1 ret2)
+                 )
+                ((setq ret2 (assq 'ideographic-structure c2))
+                 (and (setq ret (get-char-attribute c1 'ideographic-structure))
+                      (ideographic-structure-equal ret (cdr ret2)))
+                 ))
+          )
+         ((setq ret (assq 'ideographic-structure c1))
+          (cond ((characterp c2)
+                 (if (encode-char c2 'ascii)
+                     (list (cons c2 c1))
+                   (and (setq ret2 (get-char-attribute c2 'ideographic-structure))
+                        (ideographic-structure-equal (cdr ret) ret2)))
+                 )
+                ((setq ret2 (find-char c2))
+                 (and (setq ret2 (get-char-attribute ret2 'ideographic-structure))
+                      (ideographic-structure-equal (cdr ret) ret2))
+                 )
+                ((setq ret2 (assq 'ideographic-structure c2))
+                 (ideographic-structure-equal (cdr ret)(cdr ret2))
+                 ))
+          )
+         ((setq ret (find-char c1))
+          (cond ((characterp c2)
+                 (if (encode-char c2 'ascii)
+                     (list (cons c2 c1))
+                   (eq ret c2))
+                 )
+                ((setq ret2 (find-char c2))
+                 (eq ret ret2)
+                 )
+                ((setq ret2 (assq 'ideographic-structure c2))
+                 (and (setq ret (get-char-attribute ret 'ideographic-structure))
+                      (ideographic-structure-equal ret (cdr ret2))
+                      )))))))
+
+;;;###autoload
+(defun ideographic-structure-find-chars (structure)
+  (apply #'ideographic-structure-find-chars* structure))
+
+(defun ideographic-structure-find-chars* (operator component1 component2
+                                                  &optional component3)
+  (let ((comp-alist (ideographic-structure-to-components-alist*
+                    operator component1 component2 component3))
+       c1 c2 c3
+       ret pl str
+       var-alist)
+    (dolist (pc (caar
+                (sort (mapcar (lambda (cell)
+                                (if (setq ret (get-char-attribute
+                                               (car cell) 'ideographic-products))
+                                    (cons ret (length ret))
+                                  (cons nil 0)))
+                              comp-alist)
+                      (lambda (a b)
+                        (< (cdr a)(cdr b))))))
+      (when (and (setq str (get-char-attribute pc 'ideographic-structure))
+                (setq var-alist
+                      (ideographic-structure-character= (car str) operator))
+                (setq c1 (nth 1 str))
+                (setq ret (ideographic-structure-character= c1 component1))
+                (setq var-alist (ids-find-merge-variables var-alist ret))
+                (setq c2 (nth 2 str))
+                (setq ret (ideographic-structure-character= c2 component2))
+                (setq var-alist (ids-find-merge-variables var-alist ret))
+                (cond ((memq (car str) '(?\u2FF2 ?\u2FF3))
+                       (setq c3 (nth 3 str))
+                       (and (setq ret (ideographic-structure-character=
+                                       c3 component3))
+                            (ids-find-merge-variables var-alist ret))
+                       )
+                      (t var-alist)))
+       (setq pl (cons pc pl))
+       ))
+    pl))
+
+;;;###autoload
+(defun ideographic-char-count-components (char component)
+  (let ((dest 0)
+       structure)
+    (cond ((eq char component)
+          1)
+         ((setq structure (get-char-attribute char 'ideographic-structure))
+          (dolist (cell (ideographic-structure-to-components-alist structure))
+            (setq dest
+                  (+ dest
+                     (if (eq (car cell) char)
+                         (cdr cell)
+                       (* (ideographic-char-count-components (car cell) component)
+                          (cdr cell))))))
+          dest)
+         (t
+          0))))
+
+
 ;;; @ End.
 ;;;