X-Git-Url: http://git.chise.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=ids-find.el;h=b0afbdd810cee2fc9666070780a4623590f97d2c;hb=af02acf26c0ebf972aa1baabdec134446ff08558;hp=c410984134bf2e2af0d6fb0aa5a1cb9f0787d5a3;hpb=6ecbe5a19084105d6a06c4d4a85bc017dd59f3fa;p=chise%2Fids.git diff --git a/ids-find.el b/ids-find.el index c410984..b0afbdd 100644 --- a/ids-find.el +++ b/ids-find.el @@ -1,6 +1,7 @@ ;;; ids-find.el --- search utility based on Ideographic-structures ;; -*- coding: utf-8-mcs-er -*- -;; Copyright (C) 2002, 2003, 2005, 2006, 2007, 2017, 2020 MORIOKA Tomohiko +;; Copyright (C) 2002, 2003, 2005, 2006, 2007, 2017, 2020, 2021, 2022, 2023 +;; MORIOKA Tomohiko ;; Author: MORIOKA Tomohiko ;; Keywords: Kanji, Ideographs, search, IDS, CHISE, UCS, Unicode @@ -30,7 +31,14 @@ (put-char-attribute component 'ideographic-products (cons product ret)) (when (setq ret (char-feature component 'ideographic-structure)) - (ids-index-store-structure product ret))) + (ids-index-store-structure product ret)) + (when (setq ret (char-feature component 'ideographic-structure@apparent)) + (ids-index-store-structure product ret)) + (when (setq ret (char-feature component 'ideographic-structure@apparent/leftmost)) + (ids-index-store-structure product ret)) + (when (setq ret (char-feature component 'ideographic-structure@apparent/rightmost)) + (ids-index-store-structure product ret)) + ) )) (defun ids-index-store-structure (product structure) @@ -42,6 +50,12 @@ (ids-index-store-char product cell)) ((setq ret (assq 'ideographic-structure cell)) (ids-index-store-structure product (cdr ret))) + ((setq ret (assq 'ideographic-structure@apparent cell)) + (ids-index-store-structure product (cdr ret))) + ((setq ret (assq 'ideographic-structure@apparent/leftmost cell)) + (ids-index-store-structure product (cdr ret))) + ((setq ret (assq 'ideographic-structure@apparent/rightmost cell)) + (ids-index-store-structure product (cdr ret))) ((setq ret (find-char cell)) (ids-index-store-char product ret)) )))) @@ -59,6 +73,62 @@ (ids-index-store-structure c v) nil) 'ideographic-structure@apparent) + (map-char-attribute + (lambda (c v) + (ids-index-store-structure c v) + nil) + 'ideographic-structure@apparent/leftmost) + (map-char-attribute + (lambda (c v) + (ids-index-store-structure c v) + nil) + 'ideographic-structure@apparent/rightmost) + (let (products ucs) + (map-char-attribute + (lambda (c v) + (setq products (get-char-attribute c 'ideographic-products)) + (dolist (comp (delq c (char-ucs-chars c))) + (dolist (p_c (get-char-attribute comp 'ideographic-products)) + (unless (encode-char p_c '=ucs) + (if (setq ucs (char-ucs p_c)) + (setq p_c (decode-char '=ucs ucs)))) + (setq products (adjoin p_c products)))) + (put-char-attribute c 'ideographic-products products) + nil) + '=>iwds-1) + (map-char-attribute + (lambda (c v) + (setq products (get-char-attribute c 'ideographic-products)) + (dolist (comp (delq c (char-ucs-chars c))) + (dolist (p_c (get-char-attribute comp 'ideographic-products)) + (unless (encode-char p_c '=ucs) + (if (setq ucs (char-ucs p_c)) + (setq p_c (decode-char '=ucs ucs)))) + (setq products (adjoin p_c products)))) + (put-char-attribute c 'ideographic-products products) + nil) + '=>ucs@iwds-1) + (map-char-attribute + (lambda (c v) + (setq products (get-char-attribute c 'ideographic-products)) + (dolist (comp (delq c (char-ucs-chars c))) + (put-char-attribute + comp 'ideographic-products + (union products + (get-char-attribute comp 'ideographic-products)))) + ) + '=>iwds-1) + (map-char-attribute + (lambda (c v) + (setq products (get-char-attribute c 'ideographic-products)) + (dolist (comp (delq c (char-ucs-chars c))) + (put-char-attribute + comp 'ideographic-products + (union products + (get-char-attribute comp 'ideographic-products)))) + ) + '=>ucs@iwds-1) + ) (unless in-memory (save-char-attribute-table 'ideographic-products))) @@ -571,6 +641,12 @@ (ideographic-structure-equal str structure)) (and (setq str (get-char-attribute pc 'ideographic-structure@apparent)) + (ideographic-structure-equal str structure)) + (and (setq str + (get-char-attribute pc 'ideographic-structure@apparent/leftmost)) + (ideographic-structure-equal str structure)) + (and (setq str + (get-char-attribute pc 'ideographic-structure@apparent/rightmost)) (ideographic-structure-equal str structure))) (setq pl (cons pc pl)) )) @@ -599,16 +675,20 @@ (defun ideographic-character-get-structure (character) "Return ideographic-structure of CHARACTER. CHARACTER can be a character or char-spec." - (let (ret) - (cond ((characterp character) - (get-char-attribute character 'ideographic-structure) - ) - ((setq ret (assq 'ideographic-structure character)) - (cdr ret) - ) - ((setq ret (find-char character)) - (get-char-attribute ret 'ideographic-structure) - )))) + (mapcar (lambda (cell) + (or (and (listp cell) + (find-char cell)) + cell)) + (let (ret) + (cond ((characterp character) + (get-char-attribute character 'ideographic-structure) + ) + ((setq ret (assq 'ideographic-structure character)) + (cdr ret) + ) + ((setq ret (find-char character)) + (get-char-attribute ret 'ideographic-structure) + ))))) ;;;###autoload (defun ideographic-char-match-component (char component) @@ -759,6 +839,9 @@ COMPONENT can be a character or char-spec." ret dest sub) (while rest (setq cell (pop rest)) + (if (and (consp cell) + (setq ret (find-char cell))) + (setq cell ret)) (cond ((and (consp cell) (cond ((setq ret (assq 'ideographic-structure cell)) @@ -768,9 +851,15 @@ COMPONENT can be a character or char-spec." (setq sub cell) ))) (setq cell - (if (setq ret (ideographic-structure-find-chars sub)) - (car ret) - (list (cons 'ideographic-structure sub)))) + (cond ((setq ret (ideographic-structure-find-chars sub)) + (car ret) + ) + ((setq ret (ideographic-structure-compact sub)) + (list (cons 'ideographic-structure ret)) + ) + (t + (list (cons 'ideographic-structure sub)))) + ) )) (setq dest (cons cell dest))) (nreverse dest))) @@ -778,7 +867,8 @@ COMPONENT can be a character or char-spec." (defun ideographic-structure-compare-functional-and-apparent (structure &optional char conversion-only) - (let (enc enc-str enc2-str new-str new-str-c f-res a-res ret code) + (let (enc enc-str enc2-str enc3-str new-str new-str-c + f-res a-res ret code) (cond ((eq (car structure) ?⿸) (setq enc (nth 1 structure)) @@ -942,7 +1032,28 @@ COMPONENT can be a character or char-spec." a-res (list ?⿰ new-str-c (nth 2 enc-str)) 210)) - ))) + ) + ((eq (car enc-str) ?⿱) + (unless conversion-only + (setq f-res (ids-find-chars-including-ids enc-str))) + (setq new-str (list ?⿰ + (nth 2 structure) + (nth 2 enc-str))) + (setq new-str-c + (if (setq ret (ideographic-structure-find-chars new-str)) + (car ret) + (list (cons 'ideographic-structure new-str)))) + (if conversion-only + (list ?⿱ (nth 1 enc-str) new-str-c) + (setq a-res (ids-find-chars-including-ids new-str)) + (list enc + f-res + new-str-c + a-res + (list ?⿱ (nth 1 enc-str) new-str-c) + 220)) + ) + )) ) ((eq (get-char-attribute (car structure) '=ucs-itaiji-001) #x2FF6) (setq enc (nth 1 structure)) @@ -1399,26 +1510,69 @@ COMPONENT can be a character or char-spec." 601)) ) ((and (setq enc2-str (ideographic-character-get-structure (nth 2 enc-str))) - (eq (car enc2-str) ?⿰)) + (cond + ((eq (car enc2-str) ?⿰) + (setq code 611) + ) + ((eq (car enc2-str) ?⿲) + (setq code 614) + ) + ((and (eq (car enc2-str) ?⿱) + (setq enc3-str + (ideographic-character-get-structure (nth 2 enc2-str))) + (eq (car enc3-str) ?⿰)) + (setq code 613) + ))) (unless conversion-only (setq f-res (ids-find-chars-including-ids enc-str))) - (setq new-str (list ?⿲ - (nth 1 enc2-str) - (nth 2 structure) - (nth 2 enc2-str))) + (setq new-str + (cond ((eq code 611) + (list ?⿲ + (nth 1 enc2-str) + (nth 2 structure) + (nth 2 enc2-str)) + ) + ((eq code 613) + (list ?⿲ + (nth 1 enc3-str) + (nth 2 structure) + (nth 2 enc3-str)) + ) + ((eq code 614) + (list ?⿲ + (nth 1 enc2-str) + (list (list 'ideographic-structure + ?⿱ + (nth 2 enc2-str) + (nth 2 structure))) + (nth 3 enc2-str)) + ))) (setq new-str-c (if (setq ret (ideographic-structure-find-chars new-str)) (car ret) - (list (cons 'ideographic-structure new-str)))) + (list (cons 'ideographic-structure + (ideographic-structure-compact new-str))))) (if conversion-only - (list ?⿱ (nth 1 enc-str) new-str-c) + (cond ((or (eq code 611) + (eq code 614)) + (list ?⿱ (nth 1 enc-str) new-str-c) + ) + ((eq code 613) + (list ?⿳ (nth 1 enc-str)(nth 1 enc2-str) new-str-c) + )) (setq a-res (ids-find-chars-including-ids new-str)) (list enc f-res new-str-c a-res - (list ?⿱ (nth 1 enc-str) new-str-c) - 611)) + (cond ((or (eq code 611) + (eq code 614)) + (list ?⿱ (nth 1 enc-str) new-str-c) + ) + ((eq code 613) + (list ?⿳ (nth 1 enc-str)(nth 1 enc2-str) new-str-c) + )) + code)) )) ) ((eq (car enc-str) ?⿳) @@ -1487,9 +1641,29 @@ COMPONENT can be a character or char-spec." new-str-c a-res (list ?⿲ (nth 1 enc2-str) new-str-c (nth 2 enc2-str)) - 630)) - ) - ))) + 630))) + ) + ((eq (car enc-str) ?⿵) + (unless conversion-only + (setq f-res (ids-find-chars-including-ids enc-str))) + (setq new-str (list ?⿱ + (nth 2 enc-str) + (nth 2 structure))) + (setq new-str-c + (if (setq ret (ideographic-structure-find-chars new-str)) + (car ret) + (list (cons 'ideographic-structure new-str)))) + (if conversion-only + (list ?⿵ (nth 1 enc-str) new-str-c) + (setq a-res (ids-find-chars-including-ids new-str)) + (list enc + f-res + new-str-c + a-res + (list ?⿵ (nth 1 enc-str) new-str-c) + 640)) + ) + )) ) ((eq (car structure) ?⿷) (setq enc (nth 1 structure)) @@ -1520,17 +1694,76 @@ COMPONENT can be a character or char-spec." a-res (list ?⿺ (nth 1 enc-str) new-str-c) 710)) - ))) + ) + ((eq (car enc-str) ?⿸) + (unless conversion-only + (setq f-res (ids-find-chars-including-ids enc-str))) + (cond + ((and (characterp (nth 2 enc-str)) + (or (memq (char-ucs (nth 2 enc-str)) + '(#x4EBA #x5165 #x513F #x51E0)) + (memq (or (encode-char (nth 2 enc-str) '=>ucs@iwds-1) + (encode-char (nth 2 enc-str) '=>ucs@component)) + '(#x4EBA #x513F)))) + (setq new-str (list ?⿺ + (nth 2 enc-str) + (nth 2 structure))) + (setq new-str-c + (if (setq ret (ideographic-structure-find-chars new-str)) + (car ret) + (list (cons 'ideographic-structure new-str)))) + (if conversion-only + (list ?⿸ (nth 1 enc-str) new-str-c) + (setq a-res (ids-find-chars-including-ids new-str)) + (list enc + f-res + new-str-c + a-res + (list ?⿸ (nth 1 enc-str) new-str-c) + 721)) + ) + (t + (setq new-str (list ?⿱ + (nth 2 structure) + (nth 2 enc-str))) + (setq new-str-c + (if (setq ret (ideographic-structure-find-chars new-str)) + (car ret) + (list (cons 'ideographic-structure new-str)))) + (if conversion-only + (list ?⿸ (nth 1 enc-str) new-str-c) + (setq a-res (ids-find-chars-including-ids new-str)) + (list enc + f-res + new-str-c + a-res + (list ?⿸ (nth 1 enc-str) new-str-c) + 722)) + )) + ) + )) ) ((eq (car structure) ?⿺) (setq enc (nth 1 structure)) (when (setq enc-str (cond ((characterp enc) - (get-char-attribute enc 'ideographic-structure) + (or (get-char-attribute enc 'ideographic-structure) + (get-char-attribute enc 'ideographic-structure@apparent) + (get-char-attribute enc 'ideographic-structure@apparent/leftmost) + (get-char-attribute enc 'ideographic-structure@apparent/rightmost)) ) ((consp enc) - (cdr (assq 'ideographic-structure enc)) + (or (cdr (assq 'ideographic-structure enc)) + (cdr (assq 'ideographic-structure@apparent enc)) + (cdr (assq 'ideographic-structure@apparent/leftmost enc)) + (cdr (assq 'ideographic-structure@apparent/rightmost enc))) ))) + ;; (setq enc-str + ;; (mapcar (lambda (cell) + ;; (or (and (listp cell) + ;; (find-char cell)) + ;; cell)) + ;; enc-str)) (cond ((eq (car enc-str) ?⿱) (cond @@ -1561,10 +1794,17 @@ COMPONENT can be a character or char-spec." code)) ) ((and (characterp (nth 2 enc-str)) - (memq (char-ucs (nth 2 enc-str)) - '(#x706C - #x65E5 #x66F0 #x5FC3 - #x2123C #x58EC #x738B #x7389))) + (or (memq (char-ucs (nth 2 enc-str)) + '(#x4E00 + #x706C + #x65E5 #x66F0 #x5FC3 + #x2123C #x58EC #x738B #x7389)) + (memq (encode-char (nth 2 enc-str) '=>ucs@component) + '(#x2123C #x58EC)) + (eq (encode-char (nth 2 enc-str) '=>ucs@iwds-1) + #x7389) + (eq (encode-char (nth 2 enc-str) '=>big5-cdp) + #x8D71))) (unless conversion-only (setq f-res (ids-find-chars-including-ids enc-str))) (setq new-str (list ?⿰