;;; ids-find.el --- search utility based on Ideographic-structures ;; -*- coding: utf-8-mcs-er -*-
-;; Copyright (C) 2002, 2003, 2005, 2006, 2007, 2017, 2020 MORIOKA Tomohiko
+;; Copyright (C) 2002, 2003, 2005, 2006, 2007, 2017, 2020, 2021
+;; MORIOKA Tomohiko
;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
;; Keywords: Kanji, Ideographs, search, IDS, CHISE, UCS, Unicode
(ids-index-store-structure c v)
nil)
'ideographic-structure@apparent)
+ (map-char-attribute
+ (lambda (c v)
+ (ids-index-store-structure c v)
+ nil)
+ 'ideographic-structure@apparent/leftmost)
(unless in-memory
(save-char-attribute-table 'ideographic-products)))
(ideographic-structure-equal str structure))
(and (setq str
(get-char-attribute pc 'ideographic-structure@apparent))
+ (ideographic-structure-equal str structure))
+ (and (setq str
+ (get-char-attribute pc 'ideographic-structure@apparent/leftmost))
(ideographic-structure-equal str structure)))
(setq pl (cons pc pl))
))
(setq sub cell)
)))
(setq cell
- (if (setq ret (ideographic-structure-find-chars sub))
- (car ret)
- (list (cons 'ideographic-structure sub))))
+ (cond ((setq ret (ideographic-structure-find-chars sub))
+ (car ret)
+ )
+ ((setq ret (ideographic-structure-compact sub))
+ (list (cons 'ideographic-structure ret))
+ )
+ (t
+ (list (cons 'ideographic-structure sub))))
+ )
))
(setq dest (cons cell dest)))
(nreverse dest)))
(defun ideographic-structure-compare-functional-and-apparent (structure
&optional char
conversion-only)
- (let (enc enc-str enc2-str enc3-str new-str new-str-c f-res a-res ret code)
+ (let (enc enc-str enc2-str enc3-str new-str new-str-c
+ f-res a-res ret code)
(cond
((eq (car structure) ?⿸)
(setq enc (nth 1 structure))
a-res
(list ?⿰ new-str-c (nth 2 enc-str))
210))
- )))
+ )
+ ((eq (car enc-str) ?⿱)
+ (unless conversion-only
+ (setq f-res (ids-find-chars-including-ids enc-str)))
+ (setq new-str (list ?⿰
+ (nth 2 structure)
+ (nth 2 enc-str)))
+ (setq new-str-c
+ (if (setq ret (ideographic-structure-find-chars new-str))
+ (car ret)
+ (list (cons 'ideographic-structure new-str))))
+ (if conversion-only
+ (list ?⿱ (nth 1 enc-str) new-str-c)
+ (setq a-res (ids-find-chars-including-ids new-str))
+ (list enc
+ f-res
+ new-str-c
+ a-res
+ (list ?⿱ (nth 1 enc-str) new-str-c)
+ 220))
+ )
+ ))
)
((eq (get-char-attribute (car structure) '=ucs-itaiji-001) #x2FF6)
(setq enc (nth 1 structure))
((eq (car enc2-str) ?⿰)
(setq code 611)
)
+ ((eq (car enc2-str) ?⿲)
+ (setq code 614)
+ )
((and (eq (car enc2-str) ?⿱)
(setq enc3-str
(ideographic-character-get-structure (nth 2 enc2-str)))
(nth 1 enc3-str)
(nth 2 structure)
(nth 2 enc3-str))
+ )
+ ((eq code 614)
+ (list ?⿲
+ (nth 1 enc2-str)
+ (list (list 'ideographic-structure
+ ?⿱
+ (nth 2 enc2-str)
+ (nth 2 structure)))
+ (nth 3 enc2-str))
)))
(setq new-str-c
(if (setq ret (ideographic-structure-find-chars new-str))
(car ret)
- (list (cons 'ideographic-structure new-str))))
+ (list (cons 'ideographic-structure
+ (ideographic-structure-compact new-str)))))
(if conversion-only
- (cond ((eq code 611)
+ (cond ((or (eq code 611)
+ (eq code 614))
(list ?⿱ (nth 1 enc-str) new-str-c)
)
((eq code 613)
f-res
new-str-c
a-res
- (cond ((eq code 611)
+ (cond ((or (eq code 611)
+ (eq code 614))
(list ?⿱ (nth 1 enc-str) new-str-c)
)
((eq code 613)
new-str-c
a-res
(list ?⿲ (nth 1 enc2-str) new-str-c (nth 2 enc2-str))
- 630))
- )
- )))
+ 630)))
+ )
+ ((eq (car enc-str) ?⿵)
+ (unless conversion-only
+ (setq f-res (ids-find-chars-including-ids enc-str)))
+ (setq new-str (list ?⿱
+ (nth 2 enc-str)
+ (nth 2 structure)))
+ (setq new-str-c
+ (if (setq ret (ideographic-structure-find-chars new-str))
+ (car ret)
+ (list (cons 'ideographic-structure new-str))))
+ (if conversion-only
+ (list ?⿵ (nth 1 enc-str) new-str-c)
+ (setq a-res (ids-find-chars-including-ids new-str))
+ (list enc
+ f-res
+ new-str-c
+ a-res
+ (list ?⿵ (nth 1 enc-str) new-str-c)
+ 640))
+ )
+ ))
)
((eq (car structure) ?⿷)
(setq enc (nth 1 structure))
a-res
(list ?⿺ (nth 1 enc-str) new-str-c)
710))
- )))
+ )
+ ((eq (car enc-str) ?⿸)
+ (unless conversion-only
+ (setq f-res (ids-find-chars-including-ids enc-str)))
+ (cond
+ ((and (characterp (nth 2 enc-str))
+ (or (memq (char-ucs (nth 2 enc-str))
+ '(#x4EBA #x5165 #x513F #x51E0))
+ (memq (or (encode-char (nth 2 enc-str) '=>ucs@iwds-1)
+ (encode-char (nth 2 enc-str) '=>ucs@component))
+ '(#x4EBA #x513F))))
+ (setq new-str (list ?⿺
+ (nth 2 enc-str)
+ (nth 2 structure)))
+ (setq new-str-c
+ (if (setq ret (ideographic-structure-find-chars new-str))
+ (car ret)
+ (list (cons 'ideographic-structure new-str))))
+ (if conversion-only
+ (list ?⿸ (nth 1 enc-str) new-str-c)
+ (setq a-res (ids-find-chars-including-ids new-str))
+ (list enc
+ f-res
+ new-str-c
+ a-res
+ (list ?⿸ (nth 1 enc-str) new-str-c)
+ 721))
+ )
+ (t
+ (setq new-str (list ?⿱
+ (nth 2 structure)
+ (nth 2 enc-str)))
+ (setq new-str-c
+ (if (setq ret (ideographic-structure-find-chars new-str))
+ (car ret)
+ (list (cons 'ideographic-structure new-str))))
+ (if conversion-only
+ (list ?⿸ (nth 1 enc-str) new-str-c)
+ (setq a-res (ids-find-chars-including-ids new-str))
+ (list enc
+ f-res
+ new-str-c
+ a-res
+ (list ?⿸ (nth 1 enc-str) new-str-c)
+ 722))
+ ))
+ )
+ ))
)
((eq (car structure) ?⿺)
(setq enc (nth 1 structure))
(when (setq enc-str
(cond ((characterp enc)
(or (get-char-attribute enc 'ideographic-structure)
- (get-char-attribute enc 'ideographic-structure@apparent))
+ (get-char-attribute enc 'ideographic-structure@apparent)
+ (get-char-attribute enc 'ideographic-structure@apparent/leftmost))
)
((consp enc)
(or (cdr (assq 'ideographic-structure enc))
- (cdr (assq 'ideographic-structure@apparent enc)))
+ (cdr (assq 'ideographic-structure@apparent enc))
+ (cdr (assq 'ideographic-structure@apparent/leftmost enc)))
)))
;; (setq enc-str
;; (mapcar (lambda (cell)
code))
)
((and (characterp (nth 2 enc-str))
- (memq (char-ucs (nth 2 enc-str))
- '(#x4E00
- #x706C
- #x65E5 #x66F0 #x5FC3
- #x2123C #x58EC #x738B #x7389)))
+ (or (memq (char-ucs (nth 2 enc-str))
+ '(#x4E00
+ #x706C
+ #x65E5 #x66F0 #x5FC3
+ #x2123C #x58EC #x738B #x7389))
+ (memq (encode-char (nth 2 enc-str) '=>ucs@component)
+ '(#x2123C #x58EC))
+ (eq (encode-char (nth 2 enc-str) '=>ucs@iwds-1)
+ #x7389)
+ (eq (encode-char (nth 2 enc-str) '=>big5-cdp)
+ #x8D71)))
(unless conversion-only
(setq f-res (ids-find-chars-including-ids enc-str)))
(setq new-str (list ?⿰