X-Git-Url: http://git.chise.org/gitweb/?p=chise%2Fids.git;a=blobdiff_plain;f=ids-find.el;h=a4acb7187eb0665de4529852ac4e8b1b797e750f;hp=db20f4c53226441b1066214938cc7dc1f9bea4ff;hb=HEAD;hpb=ece74e934fa32f5d2b25d7d7ab03080c1ec7f0b7 diff --git a/ids-find.el b/ids-find.el index db20f4c..b0afbdd 100644 --- a/ids-find.el +++ b/ids-find.el @@ -1,6 +1,7 @@ ;;; ids-find.el --- search utility based on Ideographic-structures ;; -*- coding: utf-8-mcs-er -*- -;; Copyright (C) 2002, 2003, 2005, 2006, 2007, 2017, 2020 MORIOKA Tomohiko +;; Copyright (C) 2002, 2003, 2005, 2006, 2007, 2017, 2020, 2021, 2022, 2023 +;; MORIOKA Tomohiko ;; Author: MORIOKA Tomohiko ;; Keywords: Kanji, Ideographs, search, IDS, CHISE, UCS, Unicode @@ -30,7 +31,14 @@ (put-char-attribute component 'ideographic-products (cons product ret)) (when (setq ret (char-feature component 'ideographic-structure)) - (ids-index-store-structure product ret))) + (ids-index-store-structure product ret)) + (when (setq ret (char-feature component 'ideographic-structure@apparent)) + (ids-index-store-structure product ret)) + (when (setq ret (char-feature component 'ideographic-structure@apparent/leftmost)) + (ids-index-store-structure product ret)) + (when (setq ret (char-feature component 'ideographic-structure@apparent/rightmost)) + (ids-index-store-structure product ret)) + ) )) (defun ids-index-store-structure (product structure) @@ -42,6 +50,12 @@ (ids-index-store-char product cell)) ((setq ret (assq 'ideographic-structure cell)) (ids-index-store-structure product (cdr ret))) + ((setq ret (assq 'ideographic-structure@apparent cell)) + (ids-index-store-structure product (cdr ret))) + ((setq ret (assq 'ideographic-structure@apparent/leftmost cell)) + (ids-index-store-structure product (cdr ret))) + ((setq ret (assq 'ideographic-structure@apparent/rightmost cell)) + (ids-index-store-structure product (cdr ret))) ((setq ret (find-char cell)) (ids-index-store-char product ret)) )))) @@ -59,6 +73,62 @@ (ids-index-store-structure c v) nil) 'ideographic-structure@apparent) + (map-char-attribute + (lambda (c v) + (ids-index-store-structure c v) + nil) + 'ideographic-structure@apparent/leftmost) + (map-char-attribute + (lambda (c v) + (ids-index-store-structure c v) + nil) + 'ideographic-structure@apparent/rightmost) + (let (products ucs) + (map-char-attribute + (lambda (c v) + (setq products (get-char-attribute c 'ideographic-products)) + (dolist (comp (delq c (char-ucs-chars c))) + (dolist (p_c (get-char-attribute comp 'ideographic-products)) + (unless (encode-char p_c '=ucs) + (if (setq ucs (char-ucs p_c)) + (setq p_c (decode-char '=ucs ucs)))) + (setq products (adjoin p_c products)))) + (put-char-attribute c 'ideographic-products products) + nil) + '=>iwds-1) + (map-char-attribute + (lambda (c v) + (setq products (get-char-attribute c 'ideographic-products)) + (dolist (comp (delq c (char-ucs-chars c))) + (dolist (p_c (get-char-attribute comp 'ideographic-products)) + (unless (encode-char p_c '=ucs) + (if (setq ucs (char-ucs p_c)) + (setq p_c (decode-char '=ucs ucs)))) + (setq products (adjoin p_c products)))) + (put-char-attribute c 'ideographic-products products) + nil) + '=>ucs@iwds-1) + (map-char-attribute + (lambda (c v) + (setq products (get-char-attribute c 'ideographic-products)) + (dolist (comp (delq c (char-ucs-chars c))) + (put-char-attribute + comp 'ideographic-products + (union products + (get-char-attribute comp 'ideographic-products)))) + ) + '=>iwds-1) + (map-char-attribute + (lambda (c v) + (setq products (get-char-attribute c 'ideographic-products)) + (dolist (comp (delq c (char-ucs-chars c))) + (put-char-attribute + comp 'ideographic-products + (union products + (get-char-attribute comp 'ideographic-products)))) + ) + '=>ucs@iwds-1) + ) (unless in-memory (save-char-attribute-table 'ideographic-products))) @@ -294,7 +364,7 @@ (unless level (setq level 0)) (let (is i as bs) - (dolist (c (sort (copy-tree (ideograph-find-products components + (dolist (c (sort (copy-list (ideograph-find-products components ignored-chars)) (lambda (a b) (if (setq as (char-total-strokes a)) @@ -341,7 +411,7 @@ (ids-insert-chars-including-components* (char-to-string vc) (1+ level) (cons vc ignored-chars))))))) - (dolist (c (sort (copy-tree (ideograph-find-products-with-variants + (dolist (c (sort (copy-list (ideograph-find-products-with-variants components ignored-chars)) (lambda (a b) (if (setq as (char-total-strokes a)) @@ -555,15 +625,8 @@ ;;;###autoload (defun ideographic-structure-find-chars (structure) - (apply #'ideographic-structure-find-chars* structure)) - -(defun ideographic-structure-find-chars* (operator component1 component2 - &optional component3) - (let ((comp-alist (ideographic-structure-to-components-alist* - operator component1 component2 component3)) - c1 c2 c3 - ret pl str - var-alist) + (let ((comp-alist (ideographic-structure-to-components-alist structure)) + ret pl str) (dolist (pc (caar (sort (mapcar (lambda (cell) (if (setq ret (get-char-attribute @@ -573,22 +636,18 @@ comp-alist) (lambda (a b) (< (cdr a)(cdr b)))))) - (when (and (setq str (get-char-attribute pc 'ideographic-structure)) - (setq var-alist - (ideographic-structure-character= (car str) operator)) - (setq c1 (nth 1 str)) - (setq ret (ideographic-structure-character= c1 component1)) - (setq var-alist (ids-find-merge-variables var-alist ret)) - (setq c2 (nth 2 str)) - (setq ret (ideographic-structure-character= c2 component2)) - (setq var-alist (ids-find-merge-variables var-alist ret)) - (cond ((memq (car str) '(?\u2FF2 ?\u2FF3)) - (setq c3 (nth 3 str)) - (and (setq ret (ideographic-structure-character= - c3 component3)) - (ids-find-merge-variables var-alist ret)) - ) - (t var-alist))) + (when (or (and (setq str + (get-char-attribute pc 'ideographic-structure)) + (ideographic-structure-equal str structure)) + (and (setq str + (get-char-attribute pc 'ideographic-structure@apparent)) + (ideographic-structure-equal str structure)) + (and (setq str + (get-char-attribute pc 'ideographic-structure@apparent/leftmost)) + (ideographic-structure-equal str structure)) + (and (setq str + (get-char-attribute pc 'ideographic-structure@apparent/rightmost)) + (ideographic-structure-equal str structure))) (setq pl (cons pc pl)) )) pl)) @@ -616,16 +675,20 @@ (defun ideographic-character-get-structure (character) "Return ideographic-structure of CHARACTER. CHARACTER can be a character or char-spec." - (let (ret) - (cond ((characterp character) - (get-char-attribute character 'ideographic-structure) - ) - ((setq ret (assq 'ideographic-structure character)) - (cdr ret) - ) - ((setq ret (find-char character)) - (get-char-attribute ret 'ideographic-structure) - )))) + (mapcar (lambda (cell) + (or (and (listp cell) + (find-char cell)) + cell)) + (let (ret) + (cond ((characterp character) + (get-char-attribute character 'ideographic-structure) + ) + ((setq ret (assq 'ideographic-structure character)) + (cdr ret) + ) + ((setq ret (find-char character)) + (get-char-attribute ret 'ideographic-structure) + ))))) ;;;###autoload (defun ideographic-char-match-component (char component) @@ -678,439 +741,96 @@ COMPONENT can be a character or char-spec." )) )) -(defun ideographic-chars-to-is-a-tree (chars) - (let (comp char products others dest rest - la lb) - (setq chars (sort chars #'ideographic-structure-char<)) - (while chars - (setq comp (pop chars) - rest chars - products nil - others nil) - (while rest - (setq char (pop rest)) - (cond - ((ideographic-char-match-component char comp) - (push char products) - ) - (t - (push char others) - ))) - (push (cons comp - ;; (nreverse products) - (if products - (sort (ideographic-chars-to-is-a-tree products) - (lambda (a b) - (setq la (length (cdr a)) - lb (length (cdr b))) - (or (> la lb) - (and (= la lb) - (ideograph-char< (car a) (car b)) - ;; (progn - ;; (setq tsa (char-total-strokes (car a)) - ;; tsb (char-total-strokes (car b))) - ;; (if tsa - ;; (if tsb - ;; (or (< tsa tsb) - ;; (and (= tsa tsb) - ;; (ideograph-char< - ;; (car a) (car b)))) - ;; t) - ;; (if tsb - ;; nil - ;; (ideograph-char< (car a) (car b))))) - )))) - nil) - ) - dest) - (setq chars others)) - dest)) +(defun ideo-comp-tree-adjoin (tree char) + (let ((rest tree) + included ; other + dest cell finished) + (while (and (not finished) + rest) + (setq cell (pop rest)) + (cond ((ideographic-structure-character= char (car cell)) + (setq finished t + dest tree + rest nil) + ) + ((ideographic-char-match-component char (car cell)) + (setq dest + (cons (cons (car cell) + (ideo-comp-tree-adjoin (cdr cell) char)) + dest)) + (setq finished t) + ) + ((ideographic-char-match-component (car cell) char) + (setq included (cons cell included)) + ) + ;; (included + ;; (setq other (cons cell other)) + ;; ) + (t + (setq dest (cons cell dest)) + ))) + (cond (finished + (nconc dest rest) + ) + (included + (cons (cons char included) + (nconc dest rest)) + ) + (t + (cons (list char) tree) + )))) -(defun ids-find-chars-including-ids* (operator component1 component2 - &optional component3) - (let ((comp-alist (ideographic-structure-to-components-alist* - operator component1 component2 component3)) - (comp-spec - (list (list* 'ideographic-structure - operator component1 component2 - (if component3 - (list component3))))) - ret str rest) - (dolist (pc (caar - (sort (mapcar (lambda (cell) - (if (setq ret (get-char-attribute - (car cell) 'ideographic-products)) - (cons ret (length ret)) - (cons nil 0))) - comp-alist) - (lambda (a b) - (< (cdr a)(cdr b)))))) - (when (and (every (lambda (cell) - (>= (ideographic-char-count-components pc (car cell)) - (cdr cell))) - comp-alist) - (or (ideographic-char-match-component pc comp-spec) - (and (setq str (get-char-attribute pc 'ideographic-structure)) - (ideographic-char-match-component - (list - (cons - 'ideographic-structure - (functional-ideographic-structure-to-apparent-structure - str))) - comp-spec)))) - (push pc rest))) - (ideographic-chars-to-is-a-tree rest))) +(defun ideographic-chars-to-is-a-tree (chars) + (let (tree) + (dolist (char (sort (copy-list chars) #'ideographic-structure-char<)) + (setq tree (ideo-comp-tree-adjoin tree char))) + tree)) (defun ids-find-chars-including-ids (structure) - (if (characterp structure) - (setq structure (get-char-attribute structure 'ideographic-structure))) - (apply #'ids-find-chars-including-ids* structure)) + (let (comp-alist comp-spec ret str rest) + (cond + ((characterp structure) + (setq rest (copy-list (get-char-attribute structure 'ideographic-products))) + ) + ((setq ret (ideographic-structure-find-chars structure)) + (dolist (pc ret) + (setq rest + (union + rest + (copy-list (get-char-attribute pc 'ideographic-products))))) + ) + (t + (setq comp-alist (ideographic-structure-to-components-alist structure) + comp-spec (list (cons 'ideographic-structure structure))) + (dolist (pc (caar + (sort (mapcar (lambda (cell) + (if (setq ret (get-char-attribute + (car cell) 'ideographic-products)) + (cons ret (length ret)) + (cons nil 0))) + comp-alist) + (lambda (a b) + (< (cdr a)(cdr b)))))) + (when (and (every (lambda (cell) + (>= (ideographic-char-count-components pc (car cell)) + (cdr cell))) + comp-alist) + (or (ideographic-char-match-component pc comp-spec) + (and (setq str (get-char-attribute pc 'ideographic-structure)) + (ideographic-char-match-component + (list + (cons + 'ideographic-structure + (functional-ideographic-structure-to-apparent-structure + str))) + comp-spec)))) + (push pc rest))) + )) + (ideographic-chars-to-is-a-tree rest))) (defun functional-ideographic-structure-to-apparent-structure (structure) (ideographic-structure-compare-functional-and-apparent - structure nil 'conversion-only) - ;; (ideographic-structure-compact - ;; (let (enc enc-str enc2-str new-str) - ;; (cond - ;; ((eq (car structure) ?⿸) - ;; (setq enc (nth 1 structure)) - ;; (when (setq enc-str - ;; (cond ((characterp enc) - ;; (get-char-attribute enc 'ideographic-structure) - ;; ) - ;; ((consp enc) - ;; (cdr (assq 'ideographic-structure enc)) - ;; ))) - ;; (cond - ;; ((eq (car enc-str) ?⿰) - ;; (list ?⿰ (nth 1 enc-str) - ;; (list (list 'ideographic-structure - ;; ?⿱ - ;; (nth 2 enc-str) - ;; (nth 2 structure)))) - ;; ) - ;; ((and (eq (car enc-str) ?⿲) - ;; (memq (char-ucs (nth 1 enc-str)) '(#x4EBB #x2E85)) - ;; (eq (nth 2 enc-str) ?丨)) - ;; (list ?⿰ - ;; (decode-char '=big5-cdp #x8B7A) - ;; (list (list 'ideographic-structure - ;; ?⿱ - ;; (nth 3 enc-str) - ;; (nth 2 structure)))) - ;; ) - ;; ((eq (car enc-str) ?⿱) - ;; (list ?⿱ (nth 1 enc-str) - ;; (list - ;; (cons 'ideographic-structure - ;; (or (functional-ideographic-structure-to-apparent-structure - ;; (setq new-str - ;; (list - ;; (cond - ;; ((characterp (nth 2 enc-str)) - ;; (if (or (eq (encode-char - ;; (nth 2 enc-str) - ;; '=>ucs@component) - ;; #x20087) - ;; (eq (encode-char - ;; (nth 2 enc-str) - ;; '=>ucs@component) - ;; #x5382) - ;; (eq (encode-char - ;; (nth 2 enc-str) - ;; '=>ucs@component) - ;; #x4E06) - ;; (eq (encode-char - ;; (nth 2 enc-str) - ;; '=big5-cdp) - ;; #x89CE) - ;; (eq (encode-char - ;; (nth 2 enc-str) - ;; '=>big5-cdp) - ;; #x88E2) - ;; (eq (encode-char - ;; (nth 2 enc-str) - ;; '=big5-cdp) - ;; #x88AD) - ;; (eq (or (encode-char - ;; (nth 2 enc-str) - ;; '=>big5-cdp) - ;; (encode-char - ;; (nth 2 enc-str) - ;; '=big5-cdp-itaiji-001)) - ;; #x8766) - ;; (eq (car - ;; (get-char-attribute - ;; (nth 2 enc-str) - ;; 'ideographic-structure)) - ;; ?⿸)) - ;; ?⿸ - ;; ?⿰)) - ;; ((eq (car - ;; (cdr - ;; (assq 'ideographic-structure - ;; (nth 2 enc-str)))) - ;; ?⿸) - ;; ?⿸) - ;; (t - ;; ?⿰)) - ;; (nth 2 enc-str) - ;; (nth 2 structure) - ;; ))) - ;; new-str)))) - ;; ) - ;; ((eq (car enc-str) ?⿸) - ;; (list ?⿸ (nth 1 enc-str) - ;; (list - ;; (cons 'ideographic-structure - ;; (setq new-str - ;; (list - ;; (cond - ;; ((characterp (nth 2 enc-str)) - ;; (if (memq (char-ucs (nth 2 enc-str)) - ;; '(#x5F73)) - ;; ?⿰ - ;; ?⿱) - ;; ) - ;; (t - ;; ?⿱)) - ;; (nth 2 enc-str) - ;; (nth 2 structure)))))) - ;; ))) - ;; ) - ;; ((eq (car structure) ?⿹) - ;; (setq enc (nth 1 structure)) - ;; (when (setq enc-str - ;; (cond ((characterp enc) - ;; (get-char-attribute enc 'ideographic-structure) - ;; ) - ;; ((consp enc) - ;; (cdr (assq 'ideographic-structure enc)) - ;; ))) - ;; (cond - ;; ((eq (car enc-str) ?⿰) - ;; (list ?⿰ - ;; (list (list 'ideographic-structure - ;; ?⿱ - ;; (nth 1 enc-str) - ;; (nth 2 structure))) - ;; (nth 2 enc-str)) - ;; ))) - ;; ) - ;; ((eq (get-char-attribute (car structure) '=ucs-itaiji-001) #x2FF6) - ;; (setq enc (nth 1 structure)) - ;; (when (setq enc-str - ;; (cond ((characterp enc) - ;; (get-char-attribute enc 'ideographic-structure) - ;; ) - ;; ((consp enc) - ;; (cdr (assq 'ideographic-structure enc)) - ;; ))) - ;; (cond - ;; ((eq (car enc-str) ?⿺) - ;; (list ?⿺ - ;; (list (list 'ideographic-structure - ;; ?⿱ - ;; (nth 2 structure) - ;; (nth 1 enc-str))) - ;; (nth 2 enc-str)) - ;; ) - ;; ((eq (car enc-str) ?⿱) - ;; (list ?⿱ - ;; (list (list 'ideographic-structure - ;; ?⿰ - ;; (nth 2 structure) - ;; (nth 1 enc-str))) - ;; (nth 2 enc-str)) - ;; )) - ;; ) - ;; ) - ;; ((eq (car structure) ?⿴) - ;; (setq enc (nth 1 structure)) - ;; (when (setq enc-str - ;; (cond ((characterp enc) - ;; (get-char-attribute enc 'ideographic-structure) - ;; ) - ;; ((consp enc) - ;; (cdr (assq 'ideographic-structure enc)) - ;; ))) - ;; (cond - ;; ((eq (car enc-str) ?⿱) - ;; (cond - ;; ((and (characterp (nth 2 enc-str)) - ;; (or (memq (char-ucs (nth 2 enc-str)) '(#x56D7 #x5F51 #x897F)) - ;; (eq (char-feature (nth 2 enc-str) '=>big5-cdp) - ;; #x87A5))) - ;; (list ?⿱ - ;; (nth 1 enc-str) - ;; (list (list 'ideographic-structure - ;; ?⿴ - ;; (nth 2 enc-str) - ;; (nth 2 structure))) - ;; ) - ;; ) - ;; ((and (characterp (nth 2 enc-str)) - ;; (eq (char-ucs (nth 2 enc-str)) #x51F5)) - ;; (list ?⿱ - ;; (nth 1 enc-str) - ;; (list (list 'ideographic-structure - ;; ?⿶ - ;; (nth 2 enc-str) - ;; (nth 2 structure))) - ;; ) - ;; ) - ;; ((and (characterp (nth 1 enc-str)) - ;; (eq (char-feature (nth 1 enc-str) '=>ucs@component) - ;; #x300E6)) - ;; (list ?⿱ - ;; (list (list 'ideographic-structure - ;; ?⿵ - ;; (nth 1 enc-str) - ;; (nth 2 structure))) - ;; (nth 2 enc-str)) - ;; ) - ;; (t - ;; (list ?⿳ - ;; (nth 1 enc-str) - ;; (nth 2 structure) - ;; (nth 2 enc-str)) - ;; )) - ;; )) - ;; ) - ;; ) - ;; ((eq (car structure) ?⿶) - ;; (setq enc (nth 1 structure)) - ;; (when (setq enc-str - ;; (cond ((characterp enc) - ;; (get-char-attribute enc 'ideographic-structure) - ;; ) - ;; ((consp enc) - ;; (cdr (assq 'ideographic-structure enc)) - ;; ))) - ;; (cond - ;; ((eq (car enc-str) ?⿱) - ;; (setq enc2-str (ideographic-character-get-structure (nth 1 enc-str))) - ;; (when (and enc2-str - ;; (eq (car enc2-str) ?⿰)) - ;; (list ?⿱ - ;; (list (list 'ideographic-structure - ;; ?⿲ - ;; (nth 1 enc2-str) - ;; (nth 2 structure) - ;; (nth 2 enc2-str))) - ;; (nth 2 enc-str))) - ;; ) - ;; ((eq (car enc-str) ?⿳) - ;; (setq enc2-str (ideographic-character-get-structure (nth 1 enc-str))) - ;; (when (and enc2-str - ;; (eq (car enc2-str) ?⿰)) - ;; (list ?⿳ - ;; (list (list 'ideographic-structure - ;; ?⿲ - ;; (nth 1 enc2-str) - ;; (nth 2 structure) - ;; (nth 2 enc2-str))) - ;; (nth 2 enc-str) - ;; (nth 3 enc-str))) - ;; ) - ;; ((eq (car enc-str) ?⿲) - ;; (list ?⿲ - ;; (nth 1 enc-str) - ;; (list (list 'ideographic-structure - ;; ?⿱ - ;; (nth 2 structure) - ;; (nth 2 enc-str))) - ;; (nth 3 enc-str)) - ;; ) - ;; ((eq (car enc-str) ?⿴) - ;; (setq enc2-str (ideographic-character-get-structure (nth 1 enc-str))) - ;; (when (and enc2-str - ;; (eq (car enc2-str) ?⿰)) - ;; (list ?⿲ - ;; (nth 1 enc2-str) - ;; (list (list 'ideographic-structure - ;; ?⿱ - ;; (nth 2 structure) - ;; (nth 2 enc-str))) - ;; (nth 2 enc2-str))) - ;; ))) - ;; ) - ;; ((eq (car structure) ?⿵) - ;; (setq enc (nth 1 structure)) - ;; (when (setq enc-str - ;; (cond ((characterp enc) - ;; (get-char-attribute enc 'ideographic-structure) - ;; ) - ;; ((consp enc) - ;; (cdr (assq 'ideographic-structure enc)) - ;; ))) - ;; (cond - ;; ((eq (car enc-str) ?⿱) - ;; (setq enc2-str (ideographic-character-get-structure (nth 2 enc-str))) - ;; (when (and enc2-str - ;; (eq (car enc2-str) ?⿰)) - ;; (list ?⿱ - ;; (nth 1 enc-str) - ;; (list (list 'ideographic-structure - ;; ?⿲ - ;; (nth 1 enc2-str) - ;; (nth 2 structure) - ;; (nth 2 enc2-str))))) - ;; ) - ;; ((eq (car enc-str) ?⿳) - ;; (setq enc2-str (ideographic-character-get-structure (nth 3 enc-str))) - ;; (when (and enc2-str - ;; (eq (car enc2-str) ?⿰)) - ;; (list ?⿳ - ;; (nth 1 enc-str) - ;; (nth 2 enc-str) - ;; (list (list 'ideographic-structure - ;; ?⿲ - ;; (nth 1 enc2-str) - ;; (nth 2 structure) - ;; (nth 2 enc2-str))))) - ;; ) - ;; ((eq (car enc-str) ?⿲) - ;; (list ?⿲ - ;; (nth 1 enc-str) - ;; (list (list 'ideographic-structure - ;; ?⿱ - ;; (nth 2 enc-str) - ;; (nth 2 structure))) - ;; (nth 3 enc-str)) - ;; ) - ;; ((eq (car enc-str) ?⿴) - ;; (setq enc2-str (ideographic-character-get-structure (nth 1 enc-str))) - ;; (when (and enc2-str - ;; (eq (car enc2-str) ?⿰)) - ;; (list ?⿲ - ;; (nth 1 enc2-str) - ;; (list (list 'ideographic-structure - ;; ?⿱ - ;; (nth 2 enc-str) - ;; (nth 2 structure))) - ;; (nth 2 enc2-str))) - ;; ))) - ;; ) - ;; ((eq (car structure) ?⿻) - ;; (setq enc (nth 1 structure)) - ;; (when (setq enc-str - ;; (cond ((characterp enc) - ;; (get-char-attribute enc 'ideographic-structure) - ;; ) - ;; ((consp enc) - ;; (cdr (assq 'ideographic-structure enc)) - ;; ))) - ;; (cond - ;; ((eq (car enc-str) ?⿱) - ;; (list ?⿳ - ;; (nth 1 enc-str) - ;; (nth 2 structure) - ;; (nth 2 enc-str)) - ;; ))) - ;; )) - ;; )) - ) + structure nil 'conversion-only)) ;;;###autoload (defun ideographic-structure-compact (structure) @@ -1119,6 +839,9 @@ COMPONENT can be a character or char-spec." ret dest sub) (while rest (setq cell (pop rest)) + (if (and (consp cell) + (setq ret (find-char cell))) + (setq cell ret)) (cond ((and (consp cell) (cond ((setq ret (assq 'ideographic-structure cell)) @@ -1128,9 +851,15 @@ COMPONENT can be a character or char-spec." (setq sub cell) ))) (setq cell - (if (setq ret (ideographic-structure-find-chars sub)) - (car ret) - (list (cons 'ideographic-structure sub)))) + (cond ((setq ret (ideographic-structure-find-chars sub)) + (car ret) + ) + ((setq ret (ideographic-structure-compact sub)) + (list (cons 'ideographic-structure ret)) + ) + (t + (list (cons 'ideographic-structure sub)))) + ) )) (setq dest (cons cell dest))) (nreverse dest))) @@ -1138,7 +867,8 @@ COMPONENT can be a character or char-spec." (defun ideographic-structure-compare-functional-and-apparent (structure &optional char conversion-only) - (let (enc enc-str enc2-str new-str new-str-c f-res a-res code ret) + (let (enc enc-str enc2-str enc3-str new-str new-str-c + f-res a-res ret code) (cond ((eq (car structure) ?⿸) (setq enc (nth 1 structure)) @@ -1199,12 +929,12 @@ COMPONENT can be a character or char-spec." (list (cond ((characterp (nth 2 enc-str)) - (if (or (eq (encode-char (nth 2 enc-str) '=>ucs@component) - #x20087) - (eq (encode-char (nth 2 enc-str) '=>ucs@component) - #x5382) - (eq (encode-char (nth 2 enc-str) '=>ucs@component) + (if (or (memq (encode-char (nth 2 enc-str) '=>ucs@component) + '(#x20087 #x5382 #x4E06)) + (eq (encode-char (nth 2 enc-str) '=>ucs@iwds-1) #x4E06) + (eq (encode-char (nth 2 enc-str) '=ucs-itaiji-001) + #x2E282) (eq (encode-char (nth 2 enc-str) '=big5-cdp) #x89CE) (eq (encode-char (nth 2 enc-str) '=>big5-cdp) @@ -1302,7 +1032,28 @@ COMPONENT can be a character or char-spec." a-res (list ?⿰ new-str-c (nth 2 enc-str)) 210)) - ))) + ) + ((eq (car enc-str) ?⿱) + (unless conversion-only + (setq f-res (ids-find-chars-including-ids enc-str))) + (setq new-str (list ?⿰ + (nth 2 structure) + (nth 2 enc-str))) + (setq new-str-c + (if (setq ret (ideographic-structure-find-chars new-str)) + (car ret) + (list (cons 'ideographic-structure new-str)))) + (if conversion-only + (list ?⿱ (nth 1 enc-str) new-str-c) + (setq a-res (ids-find-chars-including-ids new-str)) + (list enc + f-res + new-str-c + a-res + (list ?⿱ (nth 1 enc-str) new-str-c) + 220)) + ) + )) ) ((eq (get-char-attribute (car structure) '=ucs-itaiji-001) #x2FF6) (setq enc (nth 1 structure)) @@ -1353,6 +1104,26 @@ COMPONENT can be a character or char-spec." a-res (list ?⿱ new-str-c (nth 2 enc-str)) 320)) + ) + ((eq (car enc-str) ?⿰) + (unless conversion-only + (setq f-res (ids-find-chars-including-ids enc-str))) + (setq new-str (list ?⿱ + (nth 2 structure) + (nth 1 enc-str))) + (setq new-str-c + (if (setq ret (ideographic-structure-find-chars new-str)) + (car ret) + (list (cons 'ideographic-structure new-str)))) + (if conversion-only + (list ?⿰ new-str-c (nth 2 enc-str)) + (setq a-res (ids-find-chars-including-ids new-str)) + (list enc + f-res + new-str-c + a-res + (list ?⿰ new-str-c (nth 2 enc-str)) + 330)) )) ) ) @@ -1452,6 +1223,150 @@ COMPONENT can be a character or char-spec." (list ?⿱ (nth 1 enc-str) new-str-c) 414)) )) + ) + ((eq (car enc-str) ?⿳) + (cond + ((and (characterp (nth 2 enc-str)) + (eq (char-ucs (nth 2 enc-str)) #x56D7)) + (unless conversion-only + (setq f-res (ids-find-chars-including-ids enc-str))) + (setq new-str (list ?⿴ (nth 2 enc-str) (nth 2 structure))) + (setq new-str-c + (if (setq ret (ideographic-structure-find-chars new-str)) + (car ret) + (list (cons 'ideographic-structure new-str)))) + (setq new-str (list ?⿱ (nth 1 enc-str) new-str-c)) + (setq new-str-c + (if (setq ret (ideographic-structure-find-chars new-str)) + (car ret) + (list (cons 'ideographic-structure new-str)))) + (if conversion-only + (list ?⿱ new-str-c (nth 3 enc-str)) + (setq a-res (ids-find-chars-including-ids new-str)) + (list enc + f-res + new-str-c + a-res + (list ?⿱ new-str-c (nth 3 enc-str)) + 415)) + ) + ((and (characterp (nth 2 enc-str)) + (eq (char-ucs (nth 2 enc-str)) #x5196)) + (unless conversion-only + (setq f-res (ids-find-chars-including-ids enc-str))) + (setq new-str (list ?⿱ (nth 1 enc-str) (nth 2 enc-str))) + (setq new-str-c + (if (setq ret (ideographic-structure-find-chars new-str)) + (car ret) + (list (cons 'ideographic-structure new-str)))) + (setq new-str (list ?⿱ new-str-c (nth 2 structure))) + (setq new-str-c + (if (setq ret (ideographic-structure-find-chars new-str)) + (car ret) + (list (cons 'ideographic-structure new-str)))) + (if conversion-only + (list ?⿱ new-str-c (nth 3 enc-str)) + (setq a-res (ids-find-chars-including-ids new-str)) + (list enc + f-res + new-str-c + a-res + (list ?⿱ new-str-c (nth 3 enc-str)) + 416)) + ) + ((and (characterp (nth 2 enc-str)) + (or (eq (encode-char (nth 2 enc-str) '=>big5-cdp) + #x89A6) + (eq (encode-char (nth 2 enc-str) '=>gt-k) + 146) + (eq (char-ucs (nth 2 enc-str)) #x2008A))) + (unless conversion-only + (setq f-res (ids-find-chars-including-ids enc-str))) + (setq new-str (list ?⿱ (nth 2 structure) (nth 2 enc-str))) + (setq new-str-c + (if (setq ret (ideographic-structure-find-chars new-str)) + (car ret) + (list (cons 'ideographic-structure new-str)))) + (setq new-str (list ?⿸ new-str-c (nth 3 enc-str))) + (setq new-str-c + (if (setq ret (ideographic-structure-find-chars new-str)) + (car ret) + (list (cons 'ideographic-structure new-str)))) + (if conversion-only + (list ?⿱ (nth 1 enc-str) new-str-c) + (setq a-res (ids-find-chars-including-ids new-str)) + (list enc + f-res + new-str-c + a-res + (list ?⿱ (nth 1 enc-str) new-str-c) + 417)) + ) + (t + (unless conversion-only + (setq f-res (ids-find-chars-including-ids enc-str))) + (setq new-str (list ?⿻ (nth 2 enc-str) (nth 2 structure))) + (setq new-str-c + (if (setq ret (ideographic-structure-find-chars new-str)) + (car ret) + (list (cons 'ideographic-structure new-str)))) + (setq new-str (list ?⿱ (nth 1 enc-str) new-str-c)) + (setq new-str-c + (if (setq ret (ideographic-structure-find-chars new-str)) + (car ret) + (list (cons 'ideographic-structure new-str)))) + (if conversion-only + (list ?⿱ new-str-c (nth 3 enc-str)) + (setq a-res (ids-find-chars-including-ids new-str)) + (list enc + f-res + new-str-c + a-res + (list ?⿱ new-str-c (nth 3 enc-str)) + 419)) + )) + ) + ((eq (car enc-str) ?⿰) + (cond + ((equal (nth 1 enc-str)(nth 2 enc-str)) + (unless conversion-only + (setq f-res (ids-find-chars-including-ids enc-str))) + (setq new-str (list ?⿲ + (nth 1 enc-str) + (nth 2 structure) + (nth 2 enc-str))) + (setq new-str-c + (list (cons 'ideographic-structure new-str))) + (if conversion-only + new-str + (setq a-res (ids-find-chars-including-ids new-str)) + (list enc + f-res + new-str-c + a-res + new-str + 421)) + ) + (t + (unless conversion-only + (setq f-res (ids-find-chars-including-ids enc-str))) + (setq new-str (list ?⿰ + (nth 2 structure) + (nth 2 enc-str))) + (setq new-str-c + (if (setq ret (ideographic-structure-find-chars new-str)) + (car ret) + (list (cons 'ideographic-structure new-str)))) + (if conversion-only + (list ?⿰ (nth 1 enc-str) new-str-c) + (setq a-res (ids-find-chars-including-ids new-str)) + (list enc + f-res + new-str-c + a-res + (list ?⿰ (nth 1 enc-str) new-str-c) + 422)) + )) )) ) ) @@ -1570,16 +1485,16 @@ COMPONENT can be a character or char-spec." (cdr (assq 'ideographic-structure enc)) ))) (cond - ((eq (car enc-str) ?⿱) - (setq enc2-str (ideographic-character-get-structure (nth 2 enc-str))) - (when (and enc2-str - (eq (car enc2-str) ?⿰)) + ((eq (car enc-str) ?⿱) + (cond + ((and (characterp (nth 2 enc-str)) + (memq (char-ucs (nth 2 enc-str)) + '(#x9580 #x9B25))) (unless conversion-only (setq f-res (ids-find-chars-including-ids enc-str))) - (setq new-str (list ?⿲ - (nth 1 enc2-str) - (nth 2 structure) - (nth 2 enc2-str))) + (setq new-str (list ?⿵ + (nth 2 enc-str) + (nth 2 structure))) (setq new-str-c (if (setq ret (ideographic-structure-find-chars new-str)) (car ret) @@ -1592,8 +1507,73 @@ COMPONENT can be a character or char-spec." new-str-c a-res (list ?⿱ (nth 1 enc-str) new-str-c) - 611)) + 601)) ) + ((and (setq enc2-str (ideographic-character-get-structure (nth 2 enc-str))) + (cond + ((eq (car enc2-str) ?⿰) + (setq code 611) + ) + ((eq (car enc2-str) ?⿲) + (setq code 614) + ) + ((and (eq (car enc2-str) ?⿱) + (setq enc3-str + (ideographic-character-get-structure (nth 2 enc2-str))) + (eq (car enc3-str) ?⿰)) + (setq code 613) + ))) + (unless conversion-only + (setq f-res (ids-find-chars-including-ids enc-str))) + (setq new-str + (cond ((eq code 611) + (list ?⿲ + (nth 1 enc2-str) + (nth 2 structure) + (nth 2 enc2-str)) + ) + ((eq code 613) + (list ?⿲ + (nth 1 enc3-str) + (nth 2 structure) + (nth 2 enc3-str)) + ) + ((eq code 614) + (list ?⿲ + (nth 1 enc2-str) + (list (list 'ideographic-structure + ?⿱ + (nth 2 enc2-str) + (nth 2 structure))) + (nth 3 enc2-str)) + ))) + (setq new-str-c + (if (setq ret (ideographic-structure-find-chars new-str)) + (car ret) + (list (cons 'ideographic-structure + (ideographic-structure-compact new-str))))) + (if conversion-only + (cond ((or (eq code 611) + (eq code 614)) + (list ?⿱ (nth 1 enc-str) new-str-c) + ) + ((eq code 613) + (list ?⿳ (nth 1 enc-str)(nth 1 enc2-str) new-str-c) + )) + (setq a-res (ids-find-chars-including-ids new-str)) + (list enc + f-res + new-str-c + a-res + (cond ((or (eq code 611) + (eq code 614)) + (list ?⿱ (nth 1 enc-str) new-str-c) + ) + ((eq code 613) + (list ?⿳ (nth 1 enc-str)(nth 1 enc2-str) new-str-c) + )) + code)) + )) ) ((eq (car enc-str) ?⿳) (setq enc2-str (ideographic-character-get-structure (nth 3 enc-str))) @@ -1661,9 +1641,190 @@ COMPONENT can be a character or char-spec." new-str-c a-res (list ?⿲ (nth 1 enc2-str) new-str-c (nth 2 enc2-str)) - 630)) + 630))) + ) + ((eq (car enc-str) ?⿵) + (unless conversion-only + (setq f-res (ids-find-chars-including-ids enc-str))) + (setq new-str (list ?⿱ + (nth 2 enc-str) + (nth 2 structure))) + (setq new-str-c + (if (setq ret (ideographic-structure-find-chars new-str)) + (car ret) + (list (cons 'ideographic-structure new-str)))) + (if conversion-only + (list ?⿵ (nth 1 enc-str) new-str-c) + (setq a-res (ids-find-chars-including-ids new-str)) + (list enc + f-res + new-str-c + a-res + (list ?⿵ (nth 1 enc-str) new-str-c) + 640)) + ) + )) + ) + ((eq (car structure) ?⿷) + (setq enc (nth 1 structure)) + (when (setq enc-str + (cond ((characterp enc) + (get-char-attribute enc 'ideographic-structure) + ) + ((consp enc) + (cdr (assq 'ideographic-structure enc)) + ))) + (cond + ((eq (car enc-str) ?⿺) + (unless conversion-only + (setq f-res (ids-find-chars-including-ids enc-str))) + (setq new-str (list ?⿱ + (nth 2 enc-str) + (nth 2 structure))) + (setq new-str-c + (if (setq ret (ideographic-structure-find-chars new-str)) + (car ret) + (list (cons 'ideographic-structure new-str)))) + (if conversion-only + (list ?⿺ (nth 1 enc-str) new-str-c) + (setq a-res (ids-find-chars-including-ids new-str)) + (list enc + f-res + new-str-c + a-res + (list ?⿺ (nth 1 enc-str) new-str-c) + 710)) + ) + ((eq (car enc-str) ?⿸) + (unless conversion-only + (setq f-res (ids-find-chars-including-ids enc-str))) + (cond + ((and (characterp (nth 2 enc-str)) + (or (memq (char-ucs (nth 2 enc-str)) + '(#x4EBA #x5165 #x513F #x51E0)) + (memq (or (encode-char (nth 2 enc-str) '=>ucs@iwds-1) + (encode-char (nth 2 enc-str) '=>ucs@component)) + '(#x4EBA #x513F)))) + (setq new-str (list ?⿺ + (nth 2 enc-str) + (nth 2 structure))) + (setq new-str-c + (if (setq ret (ideographic-structure-find-chars new-str)) + (car ret) + (list (cons 'ideographic-structure new-str)))) + (if conversion-only + (list ?⿸ (nth 1 enc-str) new-str-c) + (setq a-res (ids-find-chars-including-ids new-str)) + (list enc + f-res + new-str-c + a-res + (list ?⿸ (nth 1 enc-str) new-str-c) + 721)) ) - ))) + (t + (setq new-str (list ?⿱ + (nth 2 structure) + (nth 2 enc-str))) + (setq new-str-c + (if (setq ret (ideographic-structure-find-chars new-str)) + (car ret) + (list (cons 'ideographic-structure new-str)))) + (if conversion-only + (list ?⿸ (nth 1 enc-str) new-str-c) + (setq a-res (ids-find-chars-including-ids new-str)) + (list enc + f-res + new-str-c + a-res + (list ?⿸ (nth 1 enc-str) new-str-c) + 722)) + )) + ) + )) + ) + ((eq (car structure) ?⿺) + (setq enc (nth 1 structure)) + (when (setq enc-str + (cond ((characterp enc) + (or (get-char-attribute enc 'ideographic-structure) + (get-char-attribute enc 'ideographic-structure@apparent) + (get-char-attribute enc 'ideographic-structure@apparent/leftmost) + (get-char-attribute enc 'ideographic-structure@apparent/rightmost)) + ) + ((consp enc) + (or (cdr (assq 'ideographic-structure enc)) + (cdr (assq 'ideographic-structure@apparent enc)) + (cdr (assq 'ideographic-structure@apparent/leftmost enc)) + (cdr (assq 'ideographic-structure@apparent/rightmost enc))) + ))) + ;; (setq enc-str + ;; (mapcar (lambda (cell) + ;; (or (and (listp cell) + ;; (find-char cell)) + ;; cell)) + ;; enc-str)) + (cond + ((eq (car enc-str) ?⿱) + (cond + ((and (characterp (nth 1 enc-str)) + (or (and (eq (char-ucs (nth 1 enc-str)) #x200CA) + (setq code 811)) + (and (eq (char-feature (nth 1 enc-str) '=>iwds-1) 233) + (characterp (nth 2 structure)) + (eq (char-ucs (nth 2 structure)) #x4E36) + (setq code 812)))) + (unless conversion-only + (setq f-res (ids-find-chars-including-ids enc-str))) + (setq new-str (list ?⿺ + (nth 1 enc-str) + (nth 2 structure))) + (setq new-str-c + (if (setq ret (ideographic-structure-find-chars new-str)) + (car ret) + (list (cons 'ideographic-structure new-str)))) + (if conversion-only + (list ?⿱ new-str-c (nth 2 enc-str)) + (setq a-res (ids-find-chars-including-ids new-str)) + (list enc + f-res + new-str-c + a-res + (list ?⿱ new-str-c (nth 2 enc-str)) + code)) + ) + ((and (characterp (nth 2 enc-str)) + (or (memq (char-ucs (nth 2 enc-str)) + '(#x4E00 + #x706C + #x65E5 #x66F0 #x5FC3 + #x2123C #x58EC #x738B #x7389)) + (memq (encode-char (nth 2 enc-str) '=>ucs@component) + '(#x2123C #x58EC)) + (eq (encode-char (nth 2 enc-str) '=>ucs@iwds-1) + #x7389) + (eq (encode-char (nth 2 enc-str) '=>big5-cdp) + #x8D71))) + (unless conversion-only + (setq f-res (ids-find-chars-including-ids enc-str))) + (setq new-str (list ?⿰ + (nth 1 enc-str) + (nth 2 structure))) + (setq new-str-c + (if (setq ret (ideographic-structure-find-chars new-str)) + (car ret) + (list (cons 'ideographic-structure new-str)))) + (if conversion-only + (list ?⿱ new-str-c (nth 2 enc-str)) + (setq a-res (ids-find-chars-including-ids new-str)) + (list enc + f-res + new-str-c + a-res + (list ?⿱ new-str-c (nth 2 enc-str)) + 813)) + ) + )))) ) ((eq (car structure) ?⿻) (setq enc (nth 1 structure))