;;; ids-find.el --- search utility based on Ideographic-structures
-;; Copyright (C) 2002,2003 MORIOKA Tomohiko
+;; Copyright (C) 2002, 2003, 2005, 2006, 2007, 2017, 2020 MORIOKA Tomohiko
;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
;; Keywords: Kanji, Ideographs, search, IDS, CHISE, UCS, Unicode
-;; This file is a part of Tomoyo-Tools.
+;; This file is a part of CHISE-IDS.
;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;;; Code:
+(defun ids-index-store-char (product component)
+ (let ((ret (get-char-attribute component 'ideographic-products)))
+ (unless (memq product ret)
+ (put-char-attribute component 'ideographic-products
+ (cons product ret))
+ (when (setq ret (char-feature component 'ideographic-structure))
+ (ids-index-store-structure product ret)))
+ ))
+
+(defun ids-index-store-structure (product structure)
+ (let (ret)
+ (dolist (cell (cdr structure))
+ (if (char-ref-p cell)
+ (setq cell (plist-get cell :char)))
+ (cond ((characterp cell)
+ (ids-index-store-char product cell))
+ ((setq ret (assq 'ideographic-structure cell))
+ (ids-index-store-structure product (cdr ret)))
+ ((setq ret (find-char cell))
+ (ids-index-store-char product ret))
+ ))))
+
+;;;###autoload
+(defun ids-update-index ()
+ (interactive)
+ (map-char-attribute
+ (lambda (c v)
+ (ids-index-store-structure c v)
+ nil)
+ 'ideographic-structure)
+ (map-char-attribute
+ (lambda (c v)
+ (ids-index-store-structure c v)
+ nil)
+ 'ideographic-structure@apparent)
+ (save-char-attribute-table 'ideographic-products))
+
+
+(mount-char-attribute-table 'ideographic-products)
+
+;;;###autoload
+(defun ids-find-all-products (char)
+ (let (dest)
+ (dolist (cell (char-feature char 'ideographic-products))
+ (unless (memq cell dest)
+ (setq dest (cons cell dest)))
+ (setq dest (union dest (ids-find-all-products cell))))
+ dest))
+
+(defun of-component-features ()
+ (let (dest)
+ (dolist (feature (char-attribute-list))
+ (when (string-match "^<-.*[@/]component\\(/[^*/]+\\)*$"
+ (symbol-name feature))
+ (push feature dest)))
+ (list* '<-mistakable '->mistakable
+ '<-formed '->formed
+ '<-same '->same
+ '<-original '->original
+ '<-ancient '->ancient
+ dest)))
+
+(defun to-component-features ()
+ (let (dest)
+ (dolist (feature (char-attribute-list))
+ (when (string-match "^->.*[@/]component\\(/[^*/]+\\)*$"
+ (symbol-name feature))
+ (push feature dest)))
+ dest))
+
+;;;###autoload
+(defun char-component-variants (char)
+ (let ((dest (list char))
+ ret uchr)
+ (dolist (feature (to-component-features))
+ (if (setq ret (get-char-attribute char feature))
+ (dolist (c ret)
+ (setq dest (union dest (char-component-variants c))))))
+ (cond
+ ;; ((setq ret (some (lambda (feature)
+ ;; (get-char-attribute char feature))
+ ;; (to-component-features)))
+ ;; (dolist (c ret)
+ ;; (setq dest (union dest (char-component-variants c))))
+ ;; )
+ ((setq ret (get-char-attribute char '->ucs-unified))
+ (setq dest (cons char ret))
+ (dolist (c dest)
+ (setq dest (union dest
+ (some (lambda (feature)
+ (get-char-attribute c feature))
+ (of-component-features))
+ )))
+ )
+ ((and (setq ret (get-char-attribute char '=>ucs))
+ (setq uchr (decode-char '=ucs ret)))
+ (setq dest (cons uchr (char-variants uchr)))
+ (dolist (c dest)
+ (setq dest (union dest
+ (some (lambda (feature)
+ (get-char-attribute c feature))
+ (of-component-features))
+ )))
+ )
+ (t
+ (map-char-family
+ (lambda (c)
+ (unless (memq c dest)
+ (setq dest (cons c dest)))
+ (setq dest
+ (union dest
+ (some (lambda (feature)
+ (char-feature c feature))
+ (of-component-features))
+ ))
+ nil)
+ char)
+ ))
+ dest))
+
+;;;###autoload
+(defun ideographic-products-find (&rest components)
+ (if (stringp (car components))
+ (setq components (string-to-char-list (car components))))
+ (let (dest products)
+ (dolist (variant (char-component-variants (car components)))
+ (setq products
+ (union products
+ (get-char-attribute variant 'ideographic-products))))
+ (setq dest products)
+ (while (and dest
+ (setq components (cdr components)))
+ (setq products nil)
+ (dolist (variant (char-component-variants (car components)))
+ (setq products
+ (union products
+ (get-char-attribute variant 'ideographic-products))))
+ (setq dest (intersection dest products)))
+ dest))
+
+(defun ideograph-find-products-with-variants (components &optional ignored-chars)
+ (if (stringp components)
+ (setq components (string-to-char-list components)))
+ (let (dest products)
+ (dolist (variant (char-component-variants (car components)))
+ (setq products
+ (union products
+ (set-difference
+ (get-char-attribute variant 'ideographic-products)
+ ignored-chars))))
+ (setq dest products)
+ (while (and dest
+ (setq components (cdr components)))
+ (setq products nil)
+ (dolist (variant (char-component-variants (car components)))
+ (setq products
+ (union products
+ (set-difference
+ (get-char-attribute variant 'ideographic-products)
+ ignored-chars))))
+ (setq dest (intersection dest products)))
+ dest))
+
+(defun ideograph-find-products (components &optional ignored-chars)
+ (if (stringp components)
+ (setq components (string-to-char-list components)))
+ (let (dest products)
+ ;; (dolist (variant (char-component-variants (car components)))
+ ;; (setq products
+ ;; (union products
+ ;; (get-char-attribute variant 'ideographic-products))))
+ ;; (setq dest products)
+ (setq dest (get-char-attribute (car components) 'ideographic-products))
+ (while (and dest
+ (setq components (cdr components)))
+ ;; (setq products nil)
+ ;; (dolist (variant (char-component-variants (car components)))
+ ;; (setq products
+ ;; (union products
+ ;; (get-char-attribute variant 'ideographic-products))))
+ (setq products (get-char-attribute (car components) 'ideographic-products))
+ (setq dest (intersection dest products)))
+ dest))
+
+
(defun ideographic-structure-char= (c1 c2)
(or (eq c1 c2)
(and c1 c2
(m2 (char-ucs c2)))
(or (and m1 m2
(eq m1 m2))
- (some (lambda (b2)
- (unless (characterp b2)
- (setq b2 (find-char b2)))
- (and b2
- (ideographic-structure-char= c1 b2)))
- (get-char-attribute
- c2 '<-ideographic-component-forms))
- (progn
- (setq m1 (car (get-char-attribute c1 '<-radical))
- m2 (car (get-char-attribute c2 '<-radical)))
- (unless (characterp m1)
- (setq m1 (find-char m1)))
- (unless (characterp m2)
- (setq m2 (find-char m2)))
- (when (or m1 m2)
- (ideographic-structure-char= m1 m2))))))))
+ (memq c1 (char-component-variants c2)))))))
(defun ideographic-structure-member-compare-components (component s-component)
(let (ret)
;;;###autoload
(defun ideographic-structure-member (component structure)
"Return non-nil if COMPONENT is included in STRUCTURE."
- (or (progn
+ (or (memq component structure)
+ (progn
(setq structure (cdr structure))
(ideographic-structure-member-compare-components
component (car structure)))
(or (ideographic-structure-to-ids v)
v)))
+(defun ids-insert-chars-including-components* (components
+ &optional level ignored-chars)
+ (unless level
+ (setq level 0))
+ (let (is i as bs)
+ (dolist (c (sort (copy-tree (ideograph-find-products components
+ ignored-chars))
+ (lambda (a b)
+ (if (setq as (char-total-strokes a))
+ (if (setq bs (char-total-strokes b))
+ (if (= as bs)
+ (ideograph-char< a b)
+ (< as bs))
+ t)
+ (ideograph-char< a b)))))
+ (unless (memq c ignored-chars)
+ (setq is (char-feature c 'ideographic-structure))
+ (setq i 0)
+ (while (< i level)
+ (insert "\t")
+ (setq i (1+ i)))
+ (insert (ids-find-format-line c is))
+ (setq ignored-chars
+ (ids-insert-chars-including-components*
+ (char-to-string c) (1+ level)
+ (cons c ignored-chars))))
+ )
+ )
+ ignored-chars)
+
+(defun ids-insert-chars-including-components (components
+ &optional level ignored-chars)
+ (unless level
+ (setq level 0))
+ (setq ignored-chars
+ (nreverse
+ (ids-insert-chars-including-components* components
+ level ignored-chars)))
+ (let (is i as bs)
+ (dolist (c ignored-chars)
+ (dolist (vc (char-component-variants c))
+ (unless (memq vc ignored-chars)
+ (when (setq is (get-char-attribute vc 'ideographic-structure))
+ (setq i 0)
+ (while (< i level)
+ (insert "\t")
+ (setq i (1+ i)))
+ (insert (ids-find-format-line vc is))
+ (setq ignored-chars
+ (ids-insert-chars-including-components*
+ (char-to-string vc) (1+ level)
+ (cons vc ignored-chars)))))))
+ (dolist (c (sort (copy-tree (ideograph-find-products-with-variants
+ components ignored-chars))
+ (lambda (a b)
+ (if (setq as (char-total-strokes a))
+ (if (setq bs (char-total-strokes b))
+ (if (= as bs)
+ (ideograph-char< a b)
+ (< as bs))
+ t)
+ (ideograph-char< a b)))))
+ (unless (memq c ignored-chars)
+ (setq is (get-char-attribute c 'ideographic-structure))
+ (setq i 0)
+ (while (< i level)
+ (insert "\t")
+ (setq i (1+ i)))
+ (insert (ids-find-format-line c is))
+ (setq ignored-chars
+ (ids-insert-chars-including-components*
+ (char-to-string c) (1+ level)
+ (cons c ignored-chars))))
+ )
+ )
+ ignored-chars)
+
;;;###autoload
(defun ids-find-chars-including-components (components)
"Search Ideographs whose structures have COMPONENTS."
(with-current-buffer (get-buffer-create ids-find-result-buffer)
(setq buffer-read-only nil)
(erase-buffer)
- (map-char-attribute
- (lambda (c v)
- (when (every (lambda (p)
- (ideographic-structure-member p v))
- components)
- (insert (ids-find-format-line c v)))
- nil)
- 'ideographic-structure)
+ (ids-insert-chars-including-components components 0 nil)
+ ;; (let ((ignored-chars
+ ;; (nreverse
+ ;; (ids-insert-chars-including-components components 0 nil
+ ;; #'ideograph-find-products)))
+ ;; rest)
+ ;; (setq rest ignored-chars)
+ ;; ;; (dolist (c rest)
+ ;; ;; (setq ignored-chars
+ ;; ;; (union ignored-chars
+ ;; ;; (ids-insert-chars-including-components
+ ;; ;; (list c) 0 ignored-chars
+ ;; ;; #'ideograph-find-products-with-variants))))
+ ;; (ids-insert-chars-including-components components 0 ignored-chars
+ ;; #'ideograph-find-products-with-variants))
(goto-char (point-min)))
(view-buffer ids-find-result-buffer))
(with-current-buffer (get-buffer-create ids-find-result-buffer)
(setq buffer-read-only nil)
(erase-buffer)
- (let (ucs jis)
- (map-char-attribute
- (lambda (c v)
- (when (ideographic-structure-repertoire-p v components)
- (insert (ids-find-format-line c v))))
- 'ideographic-structure))
+ (map-char-attribute
+ (lambda (c v)
+ (when (ideographic-structure-repertoire-p v components)
+ (insert (ids-find-format-line c v))))
+ 'ideographic-structure)
(goto-char (point-min)))
(view-buffer ids-find-result-buffer))
+(defun ideographic-structure-merge-components-alist (ca1 ca2)
+ (let ((dest-alist ca1)
+ ret)
+ (dolist (cell ca2)
+ (if (setq ret (assq (car cell) dest-alist))
+ (setcdr ret (+ (cdr ret)(cdr cell)))
+ (setq dest-alist (cons cell dest-alist))))
+ dest-alist))
+
+(defun ideographic-structure-to-components-alist (structure)
+ (apply #'ideographic-structure-to-components-alist* structure))
+
+(defun ideographic-structure-to-components-alist* (operator component1 component2
+ &optional component3
+ &rest opts)
+ (let (dest-alist ret)
+ (setq dest-alist
+ (cond ((characterp component1)
+ (unless (encode-char component1 'ascii)
+ (list (cons component1 1)))
+ )
+ ((setq ret (assq 'ideographic-structure component1))
+ (ideographic-structure-to-components-alist (cdr ret))
+ )
+ ((setq ret (find-char component1))
+ (list (cons ret 1))
+ )))
+ (setq dest-alist
+ (ideographic-structure-merge-components-alist
+ dest-alist
+ (cond ((characterp component2)
+ (unless (encode-char component2 'ascii)
+ (list (cons component2 1)))
+ )
+ ((setq ret (assq 'ideographic-structure component2))
+ (ideographic-structure-to-components-alist (cdr ret))
+ )
+ ((setq ret (find-char component2))
+ (list (cons ret 1))
+ ))))
+ (if (memq operator '(?\u2FF2 ?\u2FF3))
+ (ideographic-structure-merge-components-alist
+ dest-alist
+ (cond ((characterp component3)
+ (unless (encode-char component3 'ascii)
+ (list (cons component3 1)))
+ )
+ ((setq ret (assq 'ideographic-structure component3))
+ (ideographic-structure-to-components-alist (cdr ret))
+ )
+ ((setq ret (find-char component3))
+ (list (cons ret 1))
+ )))
+ dest-alist)))
+
+(defun ids-find-merge-variables (ve1 ve2)
+ (cond ((eq ve1 t)
+ ve2)
+ ((eq ve2 t)
+ ve1)
+ (t
+ (let ((dest-alist ve1)
+ (rest ve2)
+ cell ret)
+ (while (and rest
+ (setq cell (car rest))
+ (if (setq ret (assq (car cell) ve1))
+ (eq (cdr ret)(cdr cell))
+ (setq dest-alist (cons cell dest-alist))))
+ (setq rest (cdr rest)))
+ (if rest
+ nil
+ dest-alist)))))
+
+;;;###autoload
+(defun ideographic-structure-equal (structure1 structure2)
+ (let (dest-alist ret)
+ (and (setq dest-alist (ideographic-structure-character=
+ (car structure1)(car structure2)))
+ (setq ret (ideographic-structure-character=
+ (nth 1 structure1)(nth 1 structure2)))
+ (setq dest-alist (ids-find-merge-variables dest-alist ret))
+ (setq ret (ideographic-structure-character=
+ (nth 2 structure1)(nth 2 structure2)))
+ (setq dest-alist (ids-find-merge-variables dest-alist ret))
+ (if (memq (car structure1) '(?\u2FF2 ?\u2FF3))
+ (and (setq ret (ideographic-structure-character=
+ (nth 3 structure1)(nth 3 structure2)))
+ (setq dest-alist (ids-find-merge-variables dest-alist ret)))
+ dest-alist))))
+
+;;;###autoload
+(defun ideographic-structure-character= (c1 c2)
+ (let (ret ret2)
+ (cond ((characterp c1)
+ (cond ((encode-char c1 'ascii)
+ (list (cons c1 c2))
+ )
+ ((characterp c2)
+ (if (encode-char c2 'ascii)
+ (list (cons c2 c1))
+ (eq c1 c2))
+ )
+ ((setq ret2 (find-char c2))
+ (eq c1 ret2)
+ )
+ ((setq ret2 (assq 'ideographic-structure c2))
+ (and (setq ret (get-char-attribute c1 'ideographic-structure))
+ (ideographic-structure-equal ret (cdr ret2)))
+ ))
+ )
+ ((setq ret (assq 'ideographic-structure c1))
+ (cond ((characterp c2)
+ (if (encode-char c2 'ascii)
+ (list (cons c2 c1))
+ (and (setq ret2 (get-char-attribute c2 'ideographic-structure))
+ (ideographic-structure-equal (cdr ret) ret2)))
+ )
+ ((setq ret2 (find-char c2))
+ (and (setq ret2 (get-char-attribute ret2 'ideographic-structure))
+ (ideographic-structure-equal (cdr ret) ret2))
+ )
+ ((setq ret2 (assq 'ideographic-structure c2))
+ (ideographic-structure-equal (cdr ret)(cdr ret2))
+ ))
+ )
+ ((setq ret (find-char c1))
+ (cond ((characterp c2)
+ (if (encode-char c2 'ascii)
+ (list (cons c2 c1))
+ (eq ret c2))
+ )
+ ((setq ret2 (find-char c2))
+ (eq ret ret2)
+ )
+ ((setq ret2 (assq 'ideographic-structure c2))
+ (and (setq ret (get-char-attribute ret 'ideographic-structure))
+ (ideographic-structure-equal ret (cdr ret2))
+ )))))))
+
+;;;###autoload
+(defun ideographic-structure-find-chars (structure)
+ (apply #'ideographic-structure-find-chars* structure))
+
+(defun ideographic-structure-find-chars* (operator component1 component2
+ &optional component3)
+ (let ((comp-alist (ideographic-structure-to-components-alist*
+ operator component1 component2 component3))
+ c1 c2 c3
+ ret pl str
+ var-alist)
+ (dolist (pc (caar
+ (sort (mapcar (lambda (cell)
+ (if (setq ret (get-char-attribute
+ (car cell) 'ideographic-products))
+ (cons ret (length ret))
+ (cons nil 0)))
+ comp-alist)
+ (lambda (a b)
+ (< (cdr a)(cdr b))))))
+ (when (and (setq str (get-char-attribute pc 'ideographic-structure))
+ (setq var-alist
+ (ideographic-structure-character= (car str) operator))
+ (setq c1 (nth 1 str))
+ (setq ret (ideographic-structure-character= c1 component1))
+ (setq var-alist (ids-find-merge-variables var-alist ret))
+ (setq c2 (nth 2 str))
+ (setq ret (ideographic-structure-character= c2 component2))
+ (setq var-alist (ids-find-merge-variables var-alist ret))
+ (cond ((memq (car str) '(?\u2FF2 ?\u2FF3))
+ (setq c3 (nth 3 str))
+ (and (setq ret (ideographic-structure-character=
+ c3 component3))
+ (ids-find-merge-variables var-alist ret))
+ )
+ (t var-alist)))
+ (setq pl (cons pc pl))
+ ))
+ pl))
+
+;;;###autoload
+(defun ideographic-char-count-components (char component)
+ (let ((dest 0)
+ structure)
+ (cond ((eq char component)
+ 1)
+ ((setq structure (get-char-attribute char 'ideographic-structure))
+ (dolist (cell (ideographic-structure-to-components-alist structure))
+ (setq dest
+ (+ dest
+ (if (eq (car cell) char)
+ (cdr cell)
+ (* (ideographic-char-count-components (car cell) component)
+ (cdr cell))))))
+ dest)
+ (t
+ 0))))
+
+
;;; @ End.
;;;