1 ;;; ids-find.el --- search utility based on Ideographic-structures
3 ;; Copyright (C) 2002, 2003, 2005, 2006, 2007, 2017, 2020 MORIOKA Tomohiko
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: Kanji, Ideographs, search, IDS, CHISE, UCS, Unicode
8 ;; This file is a part of CHISE-IDS.
10 ;; This program is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
15 ;; This program is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with this program; see the file COPYING. If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 (defun ids-index-store-char (product component)
28 (let ((ret (get-char-attribute component 'ideographic-products)))
29 (unless (memq product ret)
30 (put-char-attribute component 'ideographic-products
32 (when (setq ret (char-feature component 'ideographic-structure))
33 (ids-index-store-structure product ret)))
36 (defun ids-index-store-structure (product structure)
38 (dolist (cell (cdr structure))
40 (setq cell (plist-get cell :char)))
41 (cond ((characterp cell)
42 (ids-index-store-char product cell))
43 ((setq ret (assq 'ideographic-structure cell))
44 (ids-index-store-structure product (cdr ret)))
45 ((setq ret (find-char cell))
46 (ids-index-store-char product ret))
50 (defun ids-update-index ()
54 (ids-index-store-structure c v)
56 'ideographic-structure)
59 (ids-index-store-structure c v)
61 'ideographic-structure@apparent)
62 (save-char-attribute-table 'ideographic-products))
65 (mount-char-attribute-table 'ideographic-products)
68 (defun ids-find-all-products (char)
70 (dolist (cell (char-feature char 'ideographic-products))
71 (unless (memq cell dest)
72 (setq dest (cons cell dest)))
73 (setq dest (union dest (ids-find-all-products cell))))
76 (defun of-component-features ()
78 (dolist (feature (char-attribute-list))
79 (when (string-match "^<-.*[@/]component\\(/[^*/]+\\)*$"
80 (symbol-name feature))
82 (list* '<-mistakable '->mistakable
85 '<-original '->original
89 (defun to-component-features ()
91 (dolist (feature (char-attribute-list))
92 (when (string-match "^->.*[@/]component\\(/[^*/]+\\)*$"
93 (symbol-name feature))
98 (defun char-component-variants (char)
99 (let ((dest (list char))
101 (dolist (feature (to-component-features))
102 (if (setq ret (get-char-attribute char feature))
104 (setq dest (union dest (char-component-variants c))))))
106 ;; ((setq ret (some (lambda (feature)
107 ;; (get-char-attribute char feature))
108 ;; (to-component-features)))
110 ;; (setq dest (union dest (char-component-variants c))))
112 ((setq ret (get-char-attribute char '->ucs-unified))
113 (setq dest (cons char ret))
115 (setq dest (union dest
116 (some (lambda (feature)
117 (get-char-attribute c feature))
118 (of-component-features))
121 ((and (setq ret (get-char-attribute char '=>ucs))
122 (setq uchr (decode-char '=ucs ret)))
123 (setq dest (cons uchr (char-variants uchr)))
125 (setq dest (union dest
126 (some (lambda (feature)
127 (get-char-attribute c feature))
128 (of-component-features))
134 (unless (memq c dest)
135 (setq dest (cons c dest)))
138 (some (lambda (feature)
139 (char-feature c feature))
140 (of-component-features))
148 (defun ideographic-products-find (&rest components)
149 (if (stringp (car components))
150 (setq components (string-to-char-list (car components))))
152 (dolist (variant (char-component-variants (car components)))
155 (get-char-attribute variant 'ideographic-products))))
158 (setq components (cdr components)))
160 (dolist (variant (char-component-variants (car components)))
163 (get-char-attribute variant 'ideographic-products))))
164 (setq dest (intersection dest products)))
167 (defun ideograph-find-products-with-variants (components &optional ignored-chars)
168 (if (stringp components)
169 (setq components (string-to-char-list components)))
171 (dolist (variant (char-component-variants (car components)))
175 (get-char-attribute variant 'ideographic-products)
179 (setq components (cdr components)))
181 (dolist (variant (char-component-variants (car components)))
185 (get-char-attribute variant 'ideographic-products)
187 (setq dest (intersection dest products)))
190 (defun ideograph-find-products (components &optional ignored-chars)
191 (if (stringp components)
192 (setq components (string-to-char-list components)))
194 ;; (dolist (variant (char-component-variants (car components)))
197 ;; (get-char-attribute variant 'ideographic-products))))
198 ;; (setq dest products)
199 (setq dest (get-char-attribute (car components) 'ideographic-products))
201 (setq components (cdr components)))
202 ;; (setq products nil)
203 ;; (dolist (variant (char-component-variants (car components)))
206 ;; (get-char-attribute variant 'ideographic-products))))
207 (setq products (get-char-attribute (car components) 'ideographic-products))
208 (setq dest (intersection dest products)))
212 (defun ideographic-structure-char= (c1 c2)
215 (let ((m1 (char-ucs c1))
219 (memq c1 (char-component-variants c2)))))))
221 (defun ideographic-structure-member-compare-components (component s-component)
223 (cond ((char-ref= component s-component #'ideographic-structure-char=))
225 (if (setq ret (assq 'ideographic-structure s-component))
226 (ideographic-structure-member component (cdr ret))))
227 ((setq ret (get-char-attribute s-component 'ideographic-structure))
228 (ideographic-structure-member component ret)))))
231 (defun ideographic-structure-member (component structure)
232 "Return non-nil if COMPONENT is included in STRUCTURE."
233 (or (memq component structure)
235 (setq structure (cdr structure))
236 (ideographic-structure-member-compare-components
237 component (car structure)))
239 (setq structure (cdr structure))
240 (ideographic-structure-member-compare-components
241 component (car structure)))
243 (setq structure (cdr structure))
245 (ideographic-structure-member-compare-components
246 component (car structure))))))
250 (defun ideographic-structure-repertoire-p (structure components)
251 "Return non-nil if STRUCTURE can be constructed by a subset of COMPONENTS."
253 (let (ret s-component)
255 (while (setq structure (cdr structure))
256 (setq s-component (car structure))
257 (unless (characterp s-component)
258 (if (setq ret (find-char s-component))
259 (setq s-component ret)))
262 (if (setq ret (assq 'ideographic-structure s-component))
263 (ideographic-structure-repertoire-p
264 (cdr ret) components)))
265 ((member* s-component components
266 :test #'ideographic-structure-char=))
268 (get-char-attribute s-component
269 'ideographic-structure))
270 (ideographic-structure-repertoire-p ret components)))
275 (defvar ids-find-result-buffer "*ids-chars*")
277 (defun ids-find-format-line (c v)
278 (format "%c\t%s\t%s\n"
280 (or (let ((ucs (or (char-ucs c)
281 (encode-char c 'ucs))))
283 (cond ((<= ucs #xFFFF)
284 (format " U+%04X" ucs))
286 (format "U-%08X" ucs)))))
288 (or (ideographic-structure-to-ids v)
291 (defun ids-insert-chars-including-components* (components
292 &optional level ignored-chars)
296 (dolist (c (sort (copy-tree (ideograph-find-products components
299 (if (setq as (char-total-strokes a))
300 (if (setq bs (char-total-strokes b))
302 (ideograph-char< a b)
305 (ideograph-char< a b)))))
306 (unless (memq c ignored-chars)
307 (setq is (char-feature c 'ideographic-structure))
312 (insert (ids-find-format-line c is))
314 (ids-insert-chars-including-components*
315 (char-to-string c) (1+ level)
316 (cons c ignored-chars))))
321 (defun ids-insert-chars-including-components (components
322 &optional level ignored-chars)
327 (ids-insert-chars-including-components* components
328 level ignored-chars)))
330 (dolist (c ignored-chars)
331 (dolist (vc (char-component-variants c))
332 (unless (memq vc ignored-chars)
333 (when (setq is (get-char-attribute vc 'ideographic-structure))
338 (insert (ids-find-format-line vc is))
340 (ids-insert-chars-including-components*
341 (char-to-string vc) (1+ level)
342 (cons vc ignored-chars)))))))
343 (dolist (c (sort (copy-tree (ideograph-find-products-with-variants
344 components ignored-chars))
346 (if (setq as (char-total-strokes a))
347 (if (setq bs (char-total-strokes b))
349 (ideograph-char< a b)
352 (ideograph-char< a b)))))
353 (unless (memq c ignored-chars)
354 (setq is (get-char-attribute c 'ideographic-structure))
359 (insert (ids-find-format-line c is))
361 (ids-insert-chars-including-components*
362 (char-to-string c) (1+ level)
363 (cons c ignored-chars))))
369 (defun ids-find-chars-including-components (components)
370 "Search Ideographs whose structures have COMPONENTS."
371 (interactive "sComponents : ")
372 (with-current-buffer (get-buffer-create ids-find-result-buffer)
373 (setq buffer-read-only nil)
375 (ids-insert-chars-including-components components 0 nil)
376 ;; (let ((ignored-chars
378 ;; (ids-insert-chars-including-components components 0 nil
379 ;; #'ideograph-find-products)))
381 ;; (setq rest ignored-chars)
382 ;; ;; (dolist (c rest)
383 ;; ;; (setq ignored-chars
384 ;; ;; (union ignored-chars
385 ;; ;; (ids-insert-chars-including-components
386 ;; ;; (list c) 0 ignored-chars
387 ;; ;; #'ideograph-find-products-with-variants))))
388 ;; (ids-insert-chars-including-components components 0 ignored-chars
389 ;; #'ideograph-find-products-with-variants))
390 (goto-char (point-min)))
391 (view-buffer ids-find-result-buffer))
394 (define-obsolete-function-alias 'ideographic-structure-search-chars
395 'ids-find-chars-including-components)
398 (defun ids-find-chars-covered-by-components (components)
399 "Search Ideographs which structures are consisted by subsets of COMPONENTS."
400 (interactive "sComponents: ")
401 (if (stringp components)
402 (setq components (string-to-char-list components)))
403 (with-current-buffer (get-buffer-create ids-find-result-buffer)
404 (setq buffer-read-only nil)
408 (when (ideographic-structure-repertoire-p v components)
409 (insert (ids-find-format-line c v))))
410 'ideographic-structure)
411 (goto-char (point-min)))
412 (view-buffer ids-find-result-buffer))
415 (defun ideographic-structure-merge-components-alist (ca1 ca2)
416 (let ((dest-alist ca1)
419 (if (setq ret (assq (car cell) dest-alist))
420 (setcdr ret (+ (cdr ret)(cdr cell)))
421 (setq dest-alist (cons cell dest-alist))))
424 (defun ideographic-structure-to-components-alist (structure)
425 (apply #'ideographic-structure-to-components-alist* structure))
427 (defun ideographic-structure-to-components-alist* (operator component1 component2
430 (let (dest-alist ret)
432 (cond ((characterp component1)
433 (unless (encode-char component1 'ascii)
434 (list (cons component1 1)))
436 ((setq ret (assq 'ideographic-structure component1))
437 (ideographic-structure-to-components-alist (cdr ret))
439 ((setq ret (find-char component1))
443 (ideographic-structure-merge-components-alist
445 (cond ((characterp component2)
446 (unless (encode-char component2 'ascii)
447 (list (cons component2 1)))
449 ((setq ret (assq 'ideographic-structure component2))
450 (ideographic-structure-to-components-alist (cdr ret))
452 ((setq ret (find-char component2))
455 (if (memq operator '(?\u2FF2 ?\u2FF3))
456 (ideographic-structure-merge-components-alist
458 (cond ((characterp component3)
459 (unless (encode-char component3 'ascii)
460 (list (cons component3 1)))
462 ((setq ret (assq 'ideographic-structure component3))
463 (ideographic-structure-to-components-alist (cdr ret))
465 ((setq ret (find-char component3))
470 (defun ids-find-merge-variables (ve1 ve2)
476 (let ((dest-alist ve1)
480 (setq cell (car rest))
481 (if (setq ret (assq (car cell) ve1))
482 (eq (cdr ret)(cdr cell))
483 (setq dest-alist (cons cell dest-alist))))
484 (setq rest (cdr rest)))
490 (defun ideographic-structure-equal (structure1 structure2)
491 (let (dest-alist ret)
492 (and (setq dest-alist (ideographic-structure-character=
493 (car structure1)(car structure2)))
494 (setq ret (ideographic-structure-character=
495 (nth 1 structure1)(nth 1 structure2)))
496 (setq dest-alist (ids-find-merge-variables dest-alist ret))
497 (setq ret (ideographic-structure-character=
498 (nth 2 structure1)(nth 2 structure2)))
499 (setq dest-alist (ids-find-merge-variables dest-alist ret))
500 (if (memq (car structure1) '(?\u2FF2 ?\u2FF3))
501 (and (setq ret (ideographic-structure-character=
502 (nth 3 structure1)(nth 3 structure2)))
503 (setq dest-alist (ids-find-merge-variables dest-alist ret)))
507 (defun ideographic-structure-character= (c1 c2)
509 (cond ((characterp c1)
510 (cond ((encode-char c1 'ascii)
514 (if (encode-char c2 'ascii)
518 ((setq ret2 (find-char c2))
521 ((setq ret2 (assq 'ideographic-structure c2))
522 (and (setq ret (get-char-attribute c1 'ideographic-structure))
523 (ideographic-structure-equal ret (cdr ret2)))
526 ((setq ret (assq 'ideographic-structure c1))
527 (cond ((characterp c2)
528 (if (encode-char c2 'ascii)
530 (and (setq ret2 (get-char-attribute c2 'ideographic-structure))
531 (ideographic-structure-equal (cdr ret) ret2)))
533 ((setq ret2 (find-char c2))
534 (and (setq ret2 (get-char-attribute ret2 'ideographic-structure))
535 (ideographic-structure-equal (cdr ret) ret2))
537 ((setq ret2 (assq 'ideographic-structure c2))
538 (ideographic-structure-equal (cdr ret)(cdr ret2))
541 ((setq ret (find-char c1))
542 (cond ((characterp c2)
543 (if (encode-char c2 'ascii)
547 ((setq ret2 (find-char c2))
550 ((setq ret2 (assq 'ideographic-structure c2))
551 (and (setq ret (get-char-attribute ret 'ideographic-structure))
552 (ideographic-structure-equal ret (cdr ret2))
556 (defun ideographic-structure-find-chars (structure)
557 (apply #'ideographic-structure-find-chars* structure))
559 (defun ideographic-structure-find-chars* (operator component1 component2
560 &optional component3)
561 (let ((comp-alist (ideographic-structure-to-components-alist*
562 operator component1 component2 component3))
567 (sort (mapcar (lambda (cell)
568 (if (setq ret (get-char-attribute
569 (car cell) 'ideographic-products))
570 (cons ret (length ret))
574 (< (cdr a)(cdr b))))))
575 (when (and (setq str (get-char-attribute pc 'ideographic-structure))
577 (ideographic-structure-character= (car str) operator))
578 (setq c1 (nth 1 str))
579 (setq ret (ideographic-structure-character= c1 component1))
580 (setq var-alist (ids-find-merge-variables var-alist ret))
581 (setq c2 (nth 2 str))
582 (setq ret (ideographic-structure-character= c2 component2))
583 (setq var-alist (ids-find-merge-variables var-alist ret))
584 (cond ((memq (car str) '(?\u2FF2 ?\u2FF3))
585 (setq c3 (nth 3 str))
586 (and (setq ret (ideographic-structure-character=
588 (ids-find-merge-variables var-alist ret))
591 (setq pl (cons pc pl))
596 (defun ideographic-char-count-components (char component)
599 (cond ((eq char component)
601 ((setq structure (get-char-attribute char 'ideographic-structure))
602 (dolist (cell (ideographic-structure-to-components-alist structure))
605 (if (eq (car cell) char)
607 (* (ideographic-char-count-components (car cell) component)
619 ;;; ids-find.el ends here