update.
[chise/ids.git] / ids-find.el
index c58d022..b0afbdd 100644 (file)
@@ -1,6 +1,7 @@
 ;;; ids-find.el --- search utility based on Ideographic-structures ;; -*- coding: utf-8-mcs-er -*-
 
-;; Copyright (C) 2002, 2003, 2005, 2006, 2007, 2017, 2020 MORIOKA Tomohiko
+;; Copyright (C) 2002, 2003, 2005, 2006, 2007, 2017, 2020, 2021, 2022, 2023
+;;   MORIOKA Tomohiko
 
 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
 ;; Keywords: Kanji, Ideographs, search, IDS, CHISE, UCS, Unicode
       (put-char-attribute component 'ideographic-products
                          (cons product ret))
       (when (setq ret (char-feature component 'ideographic-structure))
-       (ids-index-store-structure product ret)))
+       (ids-index-store-structure product ret))
+      (when (setq ret (char-feature component 'ideographic-structure@apparent))
+       (ids-index-store-structure product ret))
+      (when (setq ret (char-feature component 'ideographic-structure@apparent/leftmost))
+       (ids-index-store-structure product ret))
+      (when (setq ret (char-feature component 'ideographic-structure@apparent/rightmost))
+       (ids-index-store-structure product ret))
+      )
     ))
 
 (defun ids-index-store-structure (product structure)
             (ids-index-store-char product cell))
            ((setq ret (assq 'ideographic-structure cell))
             (ids-index-store-structure product (cdr ret)))
+           ((setq ret (assq 'ideographic-structure@apparent cell))
+            (ids-index-store-structure product (cdr ret)))
+           ((setq ret (assq 'ideographic-structure@apparent/leftmost cell))
+            (ids-index-store-structure product (cdr ret)))
+           ((setq ret (assq 'ideographic-structure@apparent/rightmost cell))
+            (ids-index-store-structure product (cdr ret)))
            ((setq ret (find-char cell))
             (ids-index-store-char product ret))
            ))))
      (ids-index-store-structure c v)
      nil)
    'ideographic-structure@apparent)
+  (map-char-attribute
+   (lambda (c v)
+     (ids-index-store-structure c v)
+     nil)
+   'ideographic-structure@apparent/leftmost)
+  (map-char-attribute
+   (lambda (c v)
+     (ids-index-store-structure c v)
+     nil)
+   'ideographic-structure@apparent/rightmost)
+  (let (products ucs)
+    (map-char-attribute
+     (lambda (c v)
+       (setq products (get-char-attribute c 'ideographic-products))
+       (dolist (comp (delq c (char-ucs-chars c)))
+        (dolist (p_c (get-char-attribute comp 'ideographic-products))
+          (unless (encode-char p_c '=ucs)
+            (if (setq ucs (char-ucs p_c))
+                (setq p_c (decode-char '=ucs ucs))))
+          (setq products (adjoin p_c products))))
+       (put-char-attribute c 'ideographic-products products)
+       nil)
+     '=>iwds-1)
+    (map-char-attribute
+     (lambda (c v)
+       (setq products (get-char-attribute c 'ideographic-products))
+       (dolist (comp (delq c (char-ucs-chars c)))
+        (dolist (p_c (get-char-attribute comp 'ideographic-products))
+          (unless (encode-char p_c '=ucs)
+            (if (setq ucs (char-ucs p_c))
+                (setq p_c (decode-char '=ucs ucs))))
+          (setq products (adjoin p_c products))))
+       (put-char-attribute c 'ideographic-products products)
+       nil)
+     '=>ucs@iwds-1)
+    (map-char-attribute
+     (lambda (c v)
+       (setq products (get-char-attribute c 'ideographic-products))
+       (dolist (comp (delq c (char-ucs-chars c)))
+        (put-char-attribute
+         comp 'ideographic-products
+         (union products
+                (get-char-attribute comp 'ideographic-products))))
+       )
+     '=>iwds-1)
+    (map-char-attribute
+     (lambda (c v)
+       (setq products (get-char-attribute c 'ideographic-products))
+       (dolist (comp (delq c (char-ucs-chars c)))
+        (put-char-attribute
+         comp 'ideographic-products
+         (union products
+                (get-char-attribute comp 'ideographic-products))))
+       )
+     '=>ucs@iwds-1)
+    )
   (unless in-memory
     (save-char-attribute-table 'ideographic-products)))
 
                     (ideographic-structure-equal str structure))
                (and (setq str
                           (get-char-attribute pc 'ideographic-structure@apparent))
+                    (ideographic-structure-equal str structure))
+               (and (setq str
+                          (get-char-attribute pc 'ideographic-structure@apparent/leftmost))
+                    (ideographic-structure-equal str structure))
+               (and (setq str
+                          (get-char-attribute pc 'ideographic-structure@apparent/rightmost))
                     (ideographic-structure-equal str structure)))
        (setq pl (cons pc pl))
        ))
 (defun ideographic-character-get-structure (character)
   "Return ideographic-structure of CHARACTER.
 CHARACTER can be a character or char-spec."
-  (let (ret)
-    (cond ((characterp character)
-          (get-char-attribute character 'ideographic-structure)
-          )
-         ((setq ret (assq 'ideographic-structure character))
-          (cdr ret)
-          )
-         ((setq ret (find-char character))
-          (get-char-attribute ret 'ideographic-structure)
-          ))))
+  (mapcar (lambda (cell)
+           (or (and (listp cell)
+                    (find-char cell))
+               cell))
+         (let (ret)
+           (cond ((characterp character)
+                  (get-char-attribute character 'ideographic-structure)
+                  )
+                 ((setq ret (assq 'ideographic-structure character))
+                  (cdr ret)
+                  )
+                 ((setq ret (find-char character))
+                  (get-char-attribute ret 'ideographic-structure)
+                  )))))
 
 ;;;###autoload
 (defun ideographic-char-match-component (char component)
@@ -759,6 +839,9 @@ COMPONENT can be a character or char-spec."
        ret dest sub)
     (while rest
       (setq cell (pop rest))
+      (if (and (consp cell)
+              (setq ret (find-char cell)))
+         (setq cell ret))
       (cond
        ((and (consp cell)
             (cond ((setq ret (assq 'ideographic-structure cell))
@@ -768,9 +851,15 @@ COMPONENT can be a character or char-spec."
                    (setq sub cell)
                    )))
        (setq cell
-             (if (setq ret (ideographic-structure-find-chars sub))
-                 (car ret)
-               (list (cons 'ideographic-structure sub))))
+             (cond ((setq ret (ideographic-structure-find-chars sub))
+                    (car ret)
+                    )
+                   ((setq ret (ideographic-structure-compact sub))
+                    (list (cons 'ideographic-structure ret))
+                    )
+                   (t
+                    (list (cons 'ideographic-structure sub))))
+             )
        ))
       (setq dest (cons cell dest)))
     (nreverse dest)))
@@ -778,7 +867,8 @@ COMPONENT can be a character or char-spec."
 (defun ideographic-structure-compare-functional-and-apparent (structure
                                                              &optional char
                                                              conversion-only)
-  (let (enc enc-str enc2-str new-str new-str-c f-res a-res ret code)
+  (let (enc enc-str enc2-str enc3-str new-str new-str-c
+           f-res a-res ret code)
     (cond
      ((eq (car structure) ?⿸)
       (setq enc (nth 1 structure))
@@ -839,12 +929,12 @@ COMPONENT can be a character or char-spec."
                (list
                 (cond
                  ((characterp (nth 2 enc-str))
-                  (if (or (eq (encode-char (nth 2 enc-str) '=>ucs@component)
-                              #x20087)
-                          (eq (encode-char (nth 2 enc-str) '=>ucs@component)
-                              #x5382)
-                          (eq (encode-char (nth 2 enc-str) '=>ucs@component)
+                  (if (or (memq (encode-char (nth 2 enc-str) '=>ucs@component)
+                                '(#x20087 #x5382 #x4E06))
+                          (eq (encode-char (nth 2 enc-str) '=>ucs@iwds-1)
                               #x4E06)
+                          (eq (encode-char (nth 2 enc-str) '=ucs-itaiji-001)
+                              #x2E282)
                           (eq (encode-char (nth 2 enc-str) '=big5-cdp)
                               #x89CE)
                           (eq (encode-char (nth 2 enc-str) '=>big5-cdp)
@@ -942,7 +1032,28 @@ COMPONENT can be a character or char-spec."
                  a-res
                  (list ?⿰ new-str-c (nth 2 enc-str))
                  210))
-         )))
+         )
+        ((eq (car enc-str) ?⿱)
+         (unless conversion-only
+           (setq f-res (ids-find-chars-including-ids enc-str)))
+         (setq new-str (list ?⿰
+                             (nth 2 structure)
+                             (nth 2 enc-str)))
+         (setq new-str-c
+               (if (setq ret (ideographic-structure-find-chars new-str))
+                   (car ret)
+                 (list (cons 'ideographic-structure new-str))))
+         (if conversion-only
+             (list ?⿱ (nth 1 enc-str) new-str-c)
+           (setq a-res (ids-find-chars-including-ids new-str))
+           (list enc
+                 f-res
+                 new-str-c
+                 a-res
+                 (list ?⿱ (nth 1 enc-str) new-str-c)
+                 220))
+         )
+        ))
       )
      ((eq (get-char-attribute (car structure) '=ucs-itaiji-001) #x2FF6)
       (setq enc (nth 1 structure))
@@ -993,6 +1104,26 @@ COMPONENT can be a character or char-spec."
                  a-res
                  (list ?⿱ new-str-c (nth 2 enc-str))
                  320))
+         )
+        ((eq (car enc-str) ?⿰)
+         (unless conversion-only
+           (setq f-res (ids-find-chars-including-ids enc-str)))
+         (setq new-str (list ?⿱
+                             (nth 2 structure)
+                             (nth 1 enc-str)))
+         (setq new-str-c
+               (if (setq ret (ideographic-structure-find-chars new-str))
+                   (car ret)
+                 (list (cons 'ideographic-structure new-str))))
+         (if conversion-only
+             (list ?⿰ new-str-c (nth 2 enc-str))
+           (setq a-res (ids-find-chars-including-ids new-str))
+           (list enc
+                 f-res
+                 new-str-c
+                 a-res
+                 (list ?⿰ new-str-c (nth 2 enc-str))
+                 330))
          ))
        )
       )
@@ -1354,16 +1485,16 @@ COMPONENT can be a character or char-spec."
                         (cdr (assq 'ideographic-structure enc))
                         )))
        (cond
-        ((eq (car enc-str) ?⿱)
-         (setq enc2-str (ideographic-character-get-structure (nth 2 enc-str)))
-         (when (and enc2-str
-                    (eq (car enc2-str) ?⿰))
+        ((eq (car enc-str) ?⿱)         
+         (cond
+          ((and (characterp (nth 2 enc-str))
+                (memq (char-ucs (nth 2 enc-str))
+                      '(#x9580 #x9B25)))
            (unless conversion-only
              (setq f-res (ids-find-chars-including-ids enc-str)))
-           (setq new-str (list ?⿲
-                               (nth 1 enc2-str)
-                               (nth 2 structure)
-                               (nth 2 enc2-str)))
+           (setq new-str (list ?⿵
+                               (nth 2 enc-str)
+                               (nth 2 structure)))
            (setq new-str-c
                  (if (setq ret (ideographic-structure-find-chars new-str))
                      (car ret)
@@ -1376,8 +1507,73 @@ COMPONENT can be a character or char-spec."
                    new-str-c
                    a-res
                    (list ?⿱ (nth 1 enc-str) new-str-c)
-                   611))
+                   601))
            )
+          ((and (setq enc2-str (ideographic-character-get-structure (nth 2 enc-str)))
+                (cond
+                 ((eq (car enc2-str) ?⿰)
+                  (setq code 611)
+                  )
+                 ((eq (car enc2-str) ?⿲)
+                  (setq code 614)
+                  )
+                 ((and (eq (car enc2-str) ?⿱)
+                       (setq enc3-str
+                             (ideographic-character-get-structure (nth 2 enc2-str)))
+                       (eq (car enc3-str) ?⿰))
+                  (setq code 613)
+                  )))
+           (unless conversion-only
+             (setq f-res (ids-find-chars-including-ids enc-str)))
+           (setq new-str
+                 (cond ((eq code 611)
+                        (list ?⿲
+                              (nth 1 enc2-str)
+                              (nth 2 structure)
+                              (nth 2 enc2-str))
+                        )
+                       ((eq code 613)
+                        (list ?⿲
+                              (nth 1 enc3-str)
+                              (nth 2 structure)
+                              (nth 2 enc3-str))
+                        )
+                       ((eq code 614)
+                        (list ?⿲
+                              (nth 1 enc2-str)
+                              (list (list 'ideographic-structure
+                                          ?⿱
+                                          (nth 2 enc2-str)
+                                          (nth 2 structure)))
+                              (nth 3 enc2-str))
+                        )))
+           (setq new-str-c
+                 (if (setq ret (ideographic-structure-find-chars new-str))
+                     (car ret)
+                   (list (cons 'ideographic-structure
+                               (ideographic-structure-compact new-str)))))
+           (if conversion-only
+               (cond ((or (eq code 611)
+                          (eq code 614))
+                      (list ?⿱ (nth 1 enc-str) new-str-c)
+                      )
+                     ((eq code 613)
+                      (list ?⿳ (nth 1 enc-str)(nth 1 enc2-str) new-str-c)
+                      ))
+             (setq a-res (ids-find-chars-including-ids new-str))
+             (list enc
+                   f-res
+                   new-str-c
+                   a-res
+                   (cond ((or (eq code 611)
+                              (eq code 614))
+                          (list ?⿱ (nth 1 enc-str) new-str-c)
+                          )
+                         ((eq code 613)
+                          (list ?⿳ (nth 1 enc-str)(nth 1 enc2-str) new-str-c)
+                          ))
+                   code))
+           ))
          )
         ((eq (car enc-str) ?⿳)
          (setq enc2-str (ideographic-character-get-structure (nth 3 enc-str)))
@@ -1445,9 +1641,29 @@ COMPONENT can be a character or char-spec."
                    new-str-c
                    a-res
                    (list ?⿲ (nth 1 enc2-str) new-str-c (nth 2 enc2-str))
-                   630))
-           )
-         )))
+                   630)))
+         )
+        ((eq (car enc-str) ?⿵)
+         (unless conversion-only
+           (setq f-res (ids-find-chars-including-ids enc-str)))
+         (setq new-str (list ?⿱
+                             (nth 2 enc-str)
+                             (nth 2 structure)))
+         (setq new-str-c
+               (if (setq ret (ideographic-structure-find-chars new-str))
+                   (car ret)
+                 (list (cons 'ideographic-structure new-str))))
+         (if conversion-only
+             (list ?⿵ (nth 1 enc-str) new-str-c)
+           (setq a-res (ids-find-chars-including-ids new-str))
+           (list enc
+                 f-res
+                 new-str-c
+                 a-res
+                 (list ?⿵ (nth 1 enc-str) new-str-c)
+                 640))
+         )
+        ))
       )
      ((eq (car structure) ?⿷)
       (setq enc (nth 1 structure))
@@ -1478,17 +1694,76 @@ COMPONENT can be a character or char-spec."
                  a-res
                  (list ?⿺ (nth 1 enc-str) new-str-c)
                  710))
-         )))
+         )
+        ((eq (car enc-str) ?⿸)
+         (unless conversion-only
+           (setq f-res (ids-find-chars-including-ids enc-str)))
+         (cond
+          ((and (characterp (nth 2 enc-str))
+                (or (memq (char-ucs (nth 2 enc-str))
+                          '(#x4EBA #x5165 #x513F #x51E0))
+                    (memq (or (encode-char (nth 2 enc-str) '=>ucs@iwds-1)
+                              (encode-char (nth 2 enc-str) '=>ucs@component))
+                          '(#x4EBA #x513F))))
+           (setq new-str (list ?⿺
+                               (nth 2 enc-str)
+                               (nth 2 structure)))
+           (setq new-str-c
+                 (if (setq ret (ideographic-structure-find-chars new-str))
+                     (car ret)
+                   (list (cons 'ideographic-structure new-str))))
+           (if conversion-only
+               (list ?⿸ (nth 1 enc-str) new-str-c)
+             (setq a-res (ids-find-chars-including-ids new-str))
+             (list enc
+                   f-res
+                   new-str-c
+                   a-res
+                   (list ?⿸ (nth 1 enc-str) new-str-c)
+                   721))
+           )
+          (t
+           (setq new-str (list ?⿱
+                               (nth 2 structure)
+                               (nth 2 enc-str)))
+           (setq new-str-c
+                 (if (setq ret (ideographic-structure-find-chars new-str))
+                     (car ret)
+                   (list (cons 'ideographic-structure new-str))))
+           (if conversion-only
+               (list ?⿸ (nth 1 enc-str) new-str-c)
+             (setq a-res (ids-find-chars-including-ids new-str))
+             (list enc
+                   f-res
+                   new-str-c
+                   a-res
+                   (list ?⿸ (nth 1 enc-str) new-str-c)
+                   722))
+           ))
+         )
+        ))
       )
      ((eq (car structure) ?⿺)
       (setq enc (nth 1 structure))
       (when (setq enc-str
                  (cond ((characterp enc)
-                        (get-char-attribute enc 'ideographic-structure)
+                        (or (get-char-attribute enc 'ideographic-structure)
+                            (get-char-attribute enc 'ideographic-structure@apparent)
+                            (get-char-attribute enc 'ideographic-structure@apparent/leftmost)
+                            (get-char-attribute enc 'ideographic-structure@apparent/rightmost))
                         )
                        ((consp enc)
-                        (cdr (assq 'ideographic-structure enc))
+                        (or (cdr (assq 'ideographic-structure enc))
+                            (cdr (assq 'ideographic-structure@apparent enc))
+                            (cdr (assq 'ideographic-structure@apparent/leftmost enc))
+                            (cdr (assq 'ideographic-structure@apparent/rightmost enc)))
                         )))
+        ;; (setq enc-str
+        ;;       (mapcar (lambda (cell)
+        ;;                 (or (and (listp cell)
+        ;;                          (find-char cell))
+        ;;                     cell))
+        ;;               enc-str))
        (cond
         ((eq (car enc-str) ?⿱)
          (cond
@@ -1519,10 +1794,17 @@ COMPONENT can be a character or char-spec."
                    code))
            )
           ((and (characterp (nth 2 enc-str))
-                (memq (char-ucs (nth 2 enc-str))
-                      '(#x706C
-                        #x65E5 #x66F0 #x5FC3
-                        #x2123C #x58EC #x738B #x7389)))
+                (or (memq (char-ucs (nth 2 enc-str))
+                          '(#x4E00
+                            #x706C
+                            #x65E5 #x66F0 #x5FC3
+                            #x2123C #x58EC #x738B #x7389))
+                    (memq (encode-char (nth 2 enc-str) '=>ucs@component)
+                          '(#x2123C #x58EC))
+                    (eq (encode-char (nth 2 enc-str) '=>ucs@iwds-1)
+                        #x7389)
+                    (eq (encode-char (nth 2 enc-str) '=>big5-cdp)
+                        #x8D71)))
            (unless conversion-only
              (setq f-res (ids-find-chars-including-ids enc-str)))
            (setq new-str (list ?⿰