(ids-update-index): New optional argument `in-memory'; if it is
[chise/ids.git] / ids-find.el
1 ;;; ids-find.el --- search utility based on Ideographic-structures ;; -*- coding: utf-8-mcs-er -*-
2
3 ;; Copyright (C) 2002, 2003, 2005, 2006, 2007, 2017, 2020 MORIOKA Tomohiko
4
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: Kanji, Ideographs, search, IDS, CHISE, UCS, Unicode
7
8 ;; This file is a part of CHISE-IDS.
9
10 ;; This program is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
14
15 ;; This program is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 ;; General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with this program; see the file COPYING.  If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;;; Code:
26
27 (defun ids-index-store-char (product component)
28   (let ((ret (get-char-attribute component 'ideographic-products)))
29     (unless (memq product ret)
30       (put-char-attribute component 'ideographic-products
31                           (cons product ret))
32       (when (setq ret (char-feature component 'ideographic-structure))
33         (ids-index-store-structure product ret)))
34     ))
35
36 (defun ids-index-store-structure (product structure)
37   (let (ret)
38     (dolist (cell (cdr structure))
39       (if (char-ref-p cell)
40           (setq cell (plist-get cell :char)))
41       (cond ((characterp cell)
42              (ids-index-store-char product cell))
43             ((setq ret (assq 'ideographic-structure cell))
44              (ids-index-store-structure product (cdr ret)))
45             ((setq ret (find-char cell))
46              (ids-index-store-char product ret))
47             ))))
48
49 ;;;###autoload
50 (defun ids-update-index (&optional in-memory)
51   (interactive)
52   (map-char-attribute
53    (lambda (c v)
54      (ids-index-store-structure c v)
55      nil)
56    'ideographic-structure)
57   (map-char-attribute
58    (lambda (c v)
59      (ids-index-store-structure c v)
60      nil)
61    'ideographic-structure@apparent)
62   (unless in-memory
63     (save-char-attribute-table 'ideographic-products)))
64
65
66 (mount-char-attribute-table 'ideographic-products)
67
68 ;;;###autoload
69 (defun ids-find-all-products (char)
70   (let (dest)
71     (dolist (cell (char-feature char 'ideographic-products))
72       (unless (memq cell dest)
73         (setq dest (cons cell dest)))
74       (setq dest (union dest (ids-find-all-products cell))))
75     dest))
76
77 (defun of-component-features ()
78   (let (dest)
79     (dolist (feature (char-attribute-list))
80       (when (string-match "^<-.*[@/]component\\(/[^*/]+\\)*$"
81                           (symbol-name feature))
82         (push feature dest)))
83     (list* '<-mistakable '->mistakable
84            '<-formed '->formed
85            '<-same '->same
86            '<-original '->original
87            '<-ancient '->ancient
88            dest)))
89
90 (defun to-component-features ()
91   (let (dest)
92     (dolist (feature (char-attribute-list))
93       (when (string-match "^->.*[@/]component\\(/[^*/]+\\)*$"
94                           (symbol-name feature))
95         (push feature dest)))
96     dest))
97
98 ;;;###autoload
99 (defun char-component-variants (char)
100   (let ((dest (list char))
101         ret uchr)
102     (dolist (feature (to-component-features))
103       (if (setq ret (get-char-attribute char feature))
104           (dolist (c ret)
105             (setq dest (union dest (char-component-variants c))))))
106     (cond
107      ;; ((setq ret (some (lambda (feature)
108      ;;                    (get-char-attribute char feature))
109      ;;                  (to-component-features)))
110      ;;  (dolist (c ret)
111      ;;    (setq dest (union dest (char-component-variants c))))
112      ;;  )
113      ((setq ret (get-char-attribute char '->ucs-unified))
114       (setq dest (cons char ret))
115       (dolist (c dest)
116         (setq dest (union dest
117                           (some (lambda (feature)
118                                   (get-char-attribute c feature))
119                                 (of-component-features))
120                           )))
121       )
122      ((and (setq ret (get-char-attribute char '=>ucs))
123            (setq uchr (decode-char '=ucs ret)))
124       (setq dest (cons uchr (char-variants uchr)))
125       (dolist (c dest)
126         (setq dest (union dest
127                           (some (lambda (feature)
128                                   (get-char-attribute c feature))
129                                 (of-component-features))
130                           )))
131       )
132      (t
133       (map-char-family
134        (lambda (c)
135          (unless (memq c dest)
136            (setq dest (cons c dest)))
137          (setq dest
138                (union dest
139                       (some (lambda (feature)
140                               (char-feature c feature))
141                             (of-component-features))
142                       ))
143          nil)
144        char)
145       ))
146     dest))
147
148 ;;;###autoload
149 (defun ideographic-products-find (&rest components)
150   (if (stringp (car components))
151       (setq components (string-to-char-list (car components))))
152   (let (dest products)
153     (dolist (variant (char-component-variants (car components)))
154       (setq products
155             (union products
156                    (get-char-attribute variant 'ideographic-products))))
157     (setq dest products)
158     (while (and dest
159                 (setq components (cdr components)))
160       (setq products nil)
161       (dolist (variant (char-component-variants (car components)))
162         (setq products
163               (union products
164                      (get-char-attribute variant 'ideographic-products))))
165       (setq dest (intersection dest products)))
166     dest))
167
168 (defun ideograph-find-products-with-variants (components &optional ignored-chars)
169   (if (stringp components)
170       (setq components (string-to-char-list components)))
171   (let (dest products)
172     (dolist (variant (char-component-variants (car components)))
173       (setq products
174             (union products
175                    (set-difference
176                     (get-char-attribute variant 'ideographic-products)
177                     ignored-chars))))
178     (setq dest products)
179     (while (and dest
180                 (setq components (cdr components)))
181       (setq products nil)
182       (dolist (variant (char-component-variants (car components)))
183         (setq products
184               (union products
185                      (set-difference
186                       (get-char-attribute variant 'ideographic-products)
187                       ignored-chars))))
188       (setq dest (intersection dest products)))
189     dest))
190
191 (defun ideograph-find-products (components &optional ignored-chars)
192   (if (stringp components)
193       (setq components (string-to-char-list components)))
194   (let (dest products)
195     ;; (dolist (variant (char-component-variants (car components)))
196     ;;   (setq products
197     ;;         (union products
198     ;;                (get-char-attribute variant 'ideographic-products))))
199     ;; (setq dest products)
200     (setq dest (get-char-attribute (car components) 'ideographic-products))
201     (while (and dest
202                 (setq components (cdr components)))
203       ;; (setq products nil)
204       ;; (dolist (variant (char-component-variants (car components)))
205       ;;   (setq products
206       ;;         (union products
207       ;;                (get-char-attribute variant 'ideographic-products))))
208       (setq products (get-char-attribute (car components) 'ideographic-products))
209       (setq dest (intersection dest products)))
210     dest))
211
212
213 (defun ideographic-structure-char= (c1 c2)
214   (or (eq c1 c2)
215       (and c1 c2
216            (let ((m1 (char-ucs c1))
217                  (m2 (char-ucs c2)))
218              (or (and m1 m2
219                       (eq m1 m2))
220                  (memq c1 (char-component-variants c2)))))))
221
222 (defun ideographic-structure-member-compare-components (component s-component)
223   (let (ret)
224     (cond ((char-ref= component s-component #'ideographic-structure-char=))
225           ((listp s-component)
226            (if (setq ret (assq 'ideographic-structure s-component))
227                (ideographic-structure-member component (cdr ret))))
228           ((setq ret (get-char-attribute s-component 'ideographic-structure))
229            (ideographic-structure-member component ret)))))
230
231 ;;;###autoload
232 (defun ideographic-structure-member (component structure)
233   "Return non-nil if COMPONENT is included in STRUCTURE."
234   (or (memq component structure)
235       (progn
236         (setq structure (cdr structure))
237         (ideographic-structure-member-compare-components
238          component (car structure)))
239       (progn
240         (setq structure (cdr structure))
241         (ideographic-structure-member-compare-components
242          component (car structure)))
243       (progn
244         (setq structure (cdr structure))
245         (and (car structure)
246              (ideographic-structure-member-compare-components
247               component (car structure))))))
248
249
250 ;;;###autoload
251 (defun ideographic-structure-repertoire-p (structure components)
252   "Return non-nil if STRUCTURE can be constructed by a subset of COMPONENTS."
253   (and structure
254        (let (ret s-component)
255          (catch 'tag
256            (while (setq structure (cdr structure))
257              (setq s-component (car structure))
258              (unless (characterp s-component)
259                (if (setq ret (find-char s-component))
260                    (setq s-component ret)))
261              (unless (cond
262                       ((listp s-component)
263                        (if (setq ret (assq 'ideographic-structure s-component))
264                            (ideographic-structure-repertoire-p
265                             (cdr ret) components)))
266                       ((member* s-component components
267                                 :test #'ideographic-structure-char=))
268                       ((setq ret
269                              (get-char-attribute s-component
270                                                  'ideographic-structure))
271                        (ideographic-structure-repertoire-p ret components)))
272                (throw 'tag nil)))
273            t))))
274
275
276 (defvar ids-find-result-buffer "*ids-chars*")
277
278 (defun ids-find-format-line (c v)
279   (format "%c\t%s\t%s\n"
280           c
281           (or (let ((ucs (or (char-ucs c)
282                              (encode-char c 'ucs))))
283                 (if ucs
284                     (cond ((<= ucs #xFFFF)
285                            (format "    U+%04X" ucs))
286                           ((<= ucs #x10FFFF)
287                            (format "U-%08X" ucs)))))
288               "          ")
289           (or (ideographic-structure-to-ids v)
290               v)))
291
292 (defun ids-insert-chars-including-components* (components
293                                                &optional level ignored-chars)
294   (unless level
295     (setq level 0))
296   (let (is i as bs)
297     (dolist (c (sort (copy-tree (ideograph-find-products components
298                                                          ignored-chars))
299                      (lambda (a b)
300                        (if (setq as (char-total-strokes a))
301                            (if (setq bs (char-total-strokes b))
302                                (if (= as bs)
303                                    (ideograph-char< a b)
304                                  (< as bs))
305                              t)
306                          (ideograph-char< a b)))))
307       (unless (memq c ignored-chars)
308         (setq is (char-feature c 'ideographic-structure))
309         (setq i 0)
310         (while (< i level)
311           (insert "\t")
312           (setq i (1+ i)))
313         (insert (ids-find-format-line c is))
314         (setq ignored-chars
315               (ids-insert-chars-including-components*
316                (char-to-string c) (1+ level)
317                (cons c ignored-chars))))
318       )
319     )
320   ignored-chars)
321
322 (defun ids-insert-chars-including-components (components
323                                               &optional level ignored-chars)
324   (unless level
325     (setq level 0))
326   (setq ignored-chars
327         (nreverse
328          (ids-insert-chars-including-components* components
329                                                  level ignored-chars)))
330   (let (is i as bs)
331     (dolist (c ignored-chars)
332       (dolist (vc (char-component-variants c))
333         (unless (memq vc ignored-chars)
334           (when (setq is (get-char-attribute vc 'ideographic-structure))
335             (setq i 0)
336             (while (< i level)
337               (insert "\t")
338               (setq i (1+ i)))
339             (insert (ids-find-format-line vc is))
340             (setq ignored-chars
341                   (ids-insert-chars-including-components*
342                    (char-to-string vc) (1+ level)
343                    (cons vc ignored-chars)))))))
344     (dolist (c (sort (copy-tree (ideograph-find-products-with-variants
345                                  components ignored-chars))
346                      (lambda (a b)
347                        (if (setq as (char-total-strokes a))
348                            (if (setq bs (char-total-strokes b))
349                                (if (= as bs)
350                                    (ideograph-char< a b)
351                                  (< as bs))
352                              t)
353                          (ideograph-char< a b)))))
354       (unless (memq c ignored-chars)
355         (setq is (get-char-attribute c 'ideographic-structure))
356         (setq i 0)
357         (while (< i level)
358           (insert "\t")
359           (setq i (1+ i)))
360         (insert (ids-find-format-line c is))
361         (setq ignored-chars
362               (ids-insert-chars-including-components*
363                (char-to-string c) (1+ level)
364                (cons c ignored-chars))))
365       )
366     )
367   ignored-chars)
368
369 ;;;###autoload
370 (defun ids-find-chars-including-components (components)
371   "Search Ideographs whose structures have COMPONENTS."
372   (interactive "sComponents : ")
373   (with-current-buffer (get-buffer-create ids-find-result-buffer)
374     (setq buffer-read-only nil)
375     (erase-buffer)
376     (ids-insert-chars-including-components components 0 nil)
377     ;; (let ((ignored-chars
378     ;;        (nreverse
379     ;;         (ids-insert-chars-including-components components 0 nil
380     ;;                                                #'ideograph-find-products)))
381     ;;       rest)
382     ;;   (setq rest ignored-chars)
383     ;;   ;; (dolist (c rest)
384     ;;   ;;   (setq ignored-chars
385     ;;   ;;         (union ignored-chars
386     ;;   ;;                (ids-insert-chars-including-components
387     ;;   ;;                 (list c) 0 ignored-chars
388     ;;   ;;                 #'ideograph-find-products-with-variants))))
389     ;;   (ids-insert-chars-including-components components 0 ignored-chars
390     ;;                                          #'ideograph-find-products-with-variants))
391     (goto-char (point-min)))
392   (view-buffer ids-find-result-buffer))
393
394 ;;;###autoload
395 (define-obsolete-function-alias 'ideographic-structure-search-chars
396   'ids-find-chars-including-components)
397
398 ;;;###autoload
399 (defun ids-find-chars-covered-by-components (components)
400   "Search Ideographs which structures are consisted by subsets of COMPONENTS."
401   (interactive "sComponents: ")
402   (if (stringp components)
403       (setq components (string-to-char-list components)))
404   (with-current-buffer (get-buffer-create ids-find-result-buffer)
405     (setq buffer-read-only nil)
406     (erase-buffer)
407     (map-char-attribute
408      (lambda (c v)
409        (when (ideographic-structure-repertoire-p v components)
410          (insert (ids-find-format-line c v))))
411      'ideographic-structure)
412     (goto-char (point-min)))
413   (view-buffer ids-find-result-buffer))
414
415
416 (defun ideographic-structure-merge-components-alist (ca1 ca2)
417   (let ((dest-alist ca1)
418         ret)
419     (dolist (cell ca2)
420       (if (setq ret (assq (car cell) dest-alist))
421           (setcdr ret (+ (cdr ret)(cdr cell)))
422         (setq dest-alist (cons cell dest-alist))))
423     dest-alist))
424
425 (defun ideographic-structure-to-components-alist (structure)
426   (apply #'ideographic-structure-to-components-alist* structure))
427
428 (defun ideographic-structure-to-components-alist* (operator component1 component2
429                                                             &optional component3
430                                                             &rest opts)
431   (let (dest-alist ret)
432     (setq dest-alist
433           (cond ((characterp component1)
434                  (unless (encode-char component1 'ascii)
435                    (list (cons component1 1)))
436                  )
437                 ((setq ret (assq 'ideographic-structure component1))
438                  (ideographic-structure-to-components-alist (cdr ret))
439                  )
440                 ((setq ret (find-char component1))
441                  (list (cons ret 1))
442                  )))
443     (setq dest-alist
444           (ideographic-structure-merge-components-alist
445            dest-alist
446            (cond ((characterp component2)
447                   (unless (encode-char component2 'ascii)
448                     (list (cons component2 1)))
449                   )
450                  ((setq ret (assq 'ideographic-structure component2))
451                   (ideographic-structure-to-components-alist (cdr ret))
452                   )
453                  ((setq ret (find-char component2))
454                   (list (cons ret 1))
455                   ))))
456     (if (memq operator '(?\u2FF2 ?\u2FF3))
457         (ideographic-structure-merge-components-alist
458          dest-alist
459          (cond ((characterp component3)
460                 (unless (encode-char component3 'ascii)
461                   (list (cons component3 1)))
462                 )
463                ((setq ret (assq 'ideographic-structure component3))
464                 (ideographic-structure-to-components-alist (cdr ret))
465                 )
466                ((setq ret (find-char component3))
467                 (list (cons ret 1))
468                 )))
469       dest-alist)))
470
471 (defun ids-find-merge-variables (ve1 ve2)
472   (cond ((eq ve1 t)
473          ve2)
474         ((eq ve2 t)
475          ve1)
476         (t
477          (let ((dest-alist ve1)
478                (rest ve2)
479                cell ret)
480            (while (and rest
481                        (setq cell (car rest))
482                        (if (setq ret (assq (car cell) ve1))
483                            (eq (cdr ret)(cdr cell))
484                          (setq dest-alist (cons cell dest-alist))))
485              (setq rest (cdr rest)))
486            (if rest
487                nil
488              dest-alist)))))
489
490 ;;;###autoload
491 (defun ideographic-structure-equal (structure1 structure2)
492   (let (dest-alist ret)
493     (and (setq dest-alist (ideographic-structure-character=
494                            (car structure1)(car structure2)))
495          (setq ret (ideographic-structure-character=
496                     (nth 1 structure1)(nth 1 structure2)))
497          (setq dest-alist (ids-find-merge-variables dest-alist ret))
498          (setq ret (ideographic-structure-character=
499                     (nth 2 structure1)(nth 2 structure2)))
500          (setq dest-alist (ids-find-merge-variables dest-alist ret))
501          (if (memq (car structure1) '(?\u2FF2 ?\u2FF3))
502              (and (setq ret (ideographic-structure-character=
503                              (nth 3 structure1)(nth 3 structure2)))
504                   (setq dest-alist (ids-find-merge-variables dest-alist ret)))
505            dest-alist))))
506
507 ;;;###autoload
508 (defun ideographic-structure-character= (c1 c2)
509   (let (ret ret2)
510     (cond ((characterp c1)
511            (cond ((encode-char c1 'ascii)
512                   (list (cons c1 c2))
513                   )
514                  ((characterp c2)
515                   (if (encode-char c2 'ascii)
516                       (list (cons c2 c1))
517                     (eq c1 c2))
518                   )
519                  ((setq ret2 (find-char c2))
520                   (eq c1 ret2)
521                   )
522                  ((setq ret2 (assq 'ideographic-structure c2))
523                   (and (setq ret (get-char-attribute c1 'ideographic-structure))
524                        (ideographic-structure-equal ret (cdr ret2)))
525                   ))
526            )
527           ((setq ret (assq 'ideographic-structure c1))
528            (cond ((characterp c2)
529                   (if (encode-char c2 'ascii)
530                       (list (cons c2 c1))
531                     (and (setq ret2 (get-char-attribute c2 'ideographic-structure))
532                          (ideographic-structure-equal (cdr ret) ret2)))
533                   )
534                  ((setq ret2 (find-char c2))
535                   (and (setq ret2 (get-char-attribute ret2 'ideographic-structure))
536                        (ideographic-structure-equal (cdr ret) ret2))
537                   )
538                  ((setq ret2 (assq 'ideographic-structure c2))
539                   (ideographic-structure-equal (cdr ret)(cdr ret2))
540                   ))
541            )
542           ((setq ret (find-char c1))
543            (cond ((characterp c2)
544                   (if (encode-char c2 'ascii)
545                       (list (cons c2 c1))
546                     (eq ret c2))
547                   )
548                  ((setq ret2 (find-char c2))
549                   (eq ret ret2)
550                   )
551                  ((setq ret2 (assq 'ideographic-structure c2))
552                   (and (setq ret (get-char-attribute ret 'ideographic-structure))
553                        (ideographic-structure-equal ret (cdr ret2))
554                        )))))))
555
556 ;;;###autoload
557 (defun ideographic-structure-find-chars (structure)
558   (apply #'ideographic-structure-find-chars* structure))
559
560 (defun ideographic-structure-find-chars* (operator component1 component2
561                                                    &optional component3)
562   (let ((comp-alist (ideographic-structure-to-components-alist*
563                      operator component1 component2 component3))
564         c1 c2 c3
565         ret pl str
566         var-alist)
567     (dolist (pc (caar
568                  (sort (mapcar (lambda (cell)
569                                  (if (setq ret (get-char-attribute
570                                                 (car cell) 'ideographic-products))
571                                      (cons ret (length ret))
572                                    (cons nil 0)))
573                                comp-alist)
574                        (lambda (a b)
575                          (< (cdr a)(cdr b))))))
576       (when (and (setq str (get-char-attribute pc 'ideographic-structure))
577                  (setq var-alist
578                        (ideographic-structure-character= (car str) operator))
579                  (setq c1 (nth 1 str))
580                  (setq ret (ideographic-structure-character= c1 component1))
581                  (setq var-alist (ids-find-merge-variables var-alist ret))
582                  (setq c2 (nth 2 str))
583                  (setq ret (ideographic-structure-character= c2 component2))
584                  (setq var-alist (ids-find-merge-variables var-alist ret))
585                  (cond ((memq (car str) '(?\u2FF2 ?\u2FF3))
586                         (setq c3 (nth 3 str))
587                         (and (setq ret (ideographic-structure-character=
588                                         c3 component3))
589                              (ids-find-merge-variables var-alist ret))
590                         )
591                        (t var-alist)))
592         (setq pl (cons pc pl))
593         ))
594     pl))
595
596 ;;;###autoload
597 (defun ideographic-char-count-components (char component)
598   (let ((dest 0)
599         structure)
600     (cond ((eq char component)
601            1)
602           ((setq structure (get-char-attribute char 'ideographic-structure))
603            (dolist (cell (ideographic-structure-to-components-alist structure))
604              (setq dest
605                    (+ dest
606                       (if (eq (car cell) char)
607                           (cdr cell)
608                         (* (ideographic-char-count-components (car cell) component)
609                            (cdr cell))))))
610            dest)
611           (t
612            0))))
613
614
615 ;;;###autoload
616 (defun ideographic-character-get-structure (character)
617   "Return ideographic-structure of CHARACTER.
618 CHARACTER can be a character or char-spec."
619   (let (ret)
620     (cond ((characterp character)
621            (get-char-attribute character 'ideographic-structure)
622            )
623           ((setq ret (assq 'ideographic-structure character))
624            (cdr ret)
625            )
626           ((setq ret (find-char character))
627            (get-char-attribute ret 'ideographic-structure)
628            ))))
629
630 ;;;###autoload
631 (defun ideographic-char-match-component (char component)
632   "Return non-nil if character CHAR has COMPONENT in ideographic-structure.
633 COMPONENT can be a character or char-spec."
634   (or (ideographic-structure-character= char component)
635       (let ((str (ideographic-character-get-structure char)))
636         (and str
637              (or (ideographic-char-match-component (nth 1 str) component)
638                  (ideographic-char-match-component (nth 2 str) component)
639                  (if (memq (car str) '(?\u2FF2 ?\u2FF3))
640                      (ideographic-char-match-component (nth 3 str) component)))))))
641
642 (defun ideographic-structure-char< (a b)
643   (let ((sa (get-char-attribute a 'ideographic-structure))
644         (sb (get-char-attribute b 'ideographic-structure))
645         tsa tsb)
646     (cond (sa
647            (cond (sb
648                   (setq tsa (char-total-strokes a)
649                         tsb (char-total-strokes b))
650                   (if tsa
651                       (if tsb
652                           (or (< tsa tsb)
653                               (and (= tsa tsb)
654                                    (ideograph-char< a b)))
655                         t)
656                     (if tsb
657                         nil
658                       (ideograph-char< a b))))
659                  (t
660                   nil))
661            )
662           (t
663            (cond (sb
664                   t)
665                  (t
666                   (setq tsa (char-total-strokes a)
667                         tsb (char-total-strokes b))
668                   (if tsa
669                       (if tsb
670                           (or (< tsa tsb)
671                               (and (= tsa tsb)
672                                    (ideograph-char< a b)))
673                         t)
674                     (if tsb
675                         nil
676                       (ideograph-char< a b)))
677                   ))
678            ))
679     ))
680
681 (defun ideographic-chars-to-is-a-tree (chars)
682   (let (comp char products others dest rest
683              la lb)
684     (setq chars (sort chars #'ideographic-structure-char<))
685     (while chars
686       (setq comp (pop chars)
687             rest chars
688             products nil
689             others nil)
690       (while rest
691         (setq char (pop rest))
692         (cond
693          ((ideographic-char-match-component char comp)
694           (push char products)
695           )
696          (t
697           (push char others)
698           )))
699       (push (cons comp
700                   ;; (nreverse products)
701                   (if products
702                       (sort (ideographic-chars-to-is-a-tree products)
703                             (lambda (a b)
704                               (setq la (length (cdr a))
705                                     lb (length (cdr b)))
706                               (or (> la lb)
707                                   (and (= la lb)
708                                        (ideograph-char< (car a) (car b))
709                                        ;; (progn
710                                        ;;   (setq tsa (char-total-strokes (car a))
711                                        ;;         tsb (char-total-strokes (car b)))
712                                        ;;   (if tsa
713                                        ;;       (if tsb
714                                        ;;           (or (< tsa tsb)
715                                        ;;               (and (= tsa tsb)
716                                        ;;                    (ideograph-char<
717                                        ;;                     (car a) (car b))))
718                                        ;;         t)
719                                        ;;     (if tsb
720                                        ;;         nil
721                                        ;;       (ideograph-char< (car a) (car b)))))
722                                        ))))
723                     nil)
724                   )
725             dest)
726       (setq chars others))
727     dest))
728
729 (defun ids-find-chars-including-ids* (operator component1 component2
730                                                &optional component3)
731   (let ((comp-alist (ideographic-structure-to-components-alist*
732                      operator component1 component2 component3))
733         (comp-spec
734          (list (list* 'ideographic-structure
735                       operator component1 component2
736                       (if component3
737                           (list component3)))))
738         ret str rest)
739     (dolist (pc (caar
740                  (sort (mapcar (lambda (cell)
741                                  (if (setq ret (get-char-attribute
742                                                 (car cell) 'ideographic-products))
743                                      (cons ret (length ret))
744                                    (cons nil 0)))
745                                comp-alist)
746                        (lambda (a b)
747                          (< (cdr a)(cdr b))))))
748       (when (and (every (lambda (cell)
749                           (>= (ideographic-char-count-components pc (car cell))
750                               (cdr cell)))
751                         comp-alist)
752                  (or (ideographic-char-match-component pc comp-spec)
753                      (and (setq str (get-char-attribute pc 'ideographic-structure))
754                           (ideographic-char-match-component
755                            (list
756                             (cons
757                              'ideographic-structure
758                              (functional-ideographic-structure-to-apparent-structure
759                               str)))
760                            comp-spec))))
761         (push pc rest)))
762     (ideographic-chars-to-is-a-tree rest)))
763
764 (defun ids-find-chars-including-ids (structure)
765   (if (characterp structure)
766       (setq structure (get-char-attribute structure 'ideographic-structure)))
767   (apply #'ids-find-chars-including-ids* structure))
768
769 (defun functional-ideographic-structure-to-apparent-structure (structure)
770   (let (enc enc-str enc2-str new-str)
771     (cond
772      ((eq (car structure) ?⿸)
773       (setq enc (nth 1 structure))
774       (when (setq enc-str
775                   (cond ((characterp enc)
776                          (get-char-attribute enc 'ideographic-structure)
777                          )
778                         ((consp enc)
779                          (cdr (assq 'ideographic-structure enc))
780                          )))
781         (cond
782          ((eq (car enc-str) ?⿰)
783           (list ?⿰ (nth 1 enc-str)
784                 (list (list 'ideographic-structure
785                             ?⿱
786                             (nth 2 enc-str)
787                             (nth 2 structure))))
788           )
789          ((and (eq (car enc-str) ?⿲)
790                (memq (char-ucs (nth 1 enc-str)) '(#x4EBB #x2E85))
791                (eq (nth 2 enc-str) ?丨))
792           (list ?⿰
793                 (decode-char '=big5-cdp #x8B7A)
794                 (list (list 'ideographic-structure
795                             ?⿱
796                             (nth 3 enc-str)
797                             (nth 2 structure))))
798           )
799          ((eq (car enc-str) ?⿱)
800           (list ?⿱ (nth 1 enc-str)
801                 (list
802                  (cons 'ideographic-structure
803                        (or (functional-ideographic-structure-to-apparent-structure
804                             (setq new-str
805                                   (list
806                                    (cond
807                                     ((characterp (nth 2 enc-str))
808                                      (if (or (eq (encode-char
809                                                   (nth 2 enc-str)
810                                                   '=>ucs@component)
811                                                  #x20087)
812                                              (eq (encode-char
813                                                   (nth 2 enc-str)
814                                                   '=>ucs@component)
815                                                  #x5382)
816                                              (eq (encode-char
817                                                   (nth 2 enc-str)
818                                                   '=>ucs@component)
819                                                  #x4E06)
820                                              (eq (encode-char
821                                                   (nth 2 enc-str)
822                                                   '=big5-cdp)
823                                                  #x89CE)
824                                              (eq (encode-char
825                                                   (nth 2 enc-str)
826                                                   '=>big5-cdp)
827                                                  #x88E2)
828                                              (eq (encode-char
829                                                   (nth 2 enc-str)
830                                                   '=big5-cdp)
831                                                  #x88AD)
832                                              (eq (or (encode-char
833                                                       (nth 2 enc-str)
834                                                       '=>big5-cdp)
835                                                      (encode-char
836                                                       (nth 2 enc-str)
837                                                       '=big5-cdp-itaiji-001))
838                                                  #x8766)
839                                              (eq (car
840                                                   (get-char-attribute
841                                                    (nth 2 enc-str)
842                                                    'ideographic-structure))
843                                                  ?⿸))
844                                          ?⿸
845                                        ?⿰))
846                                     ((eq (car
847                                           (cdr
848                                            (assq 'ideographic-structure
849                                                  (nth 2 enc-str))))
850                                          ?⿸)
851                                      ?⿸)
852                                     (t
853                                      ?⿰))
854                                    (nth 2 enc-str)
855                                    (nth 2 structure)
856                                    )))
857                            new-str))))
858           )
859          ((eq (car enc-str) ?⿸)
860           (list ?⿸ (nth 1 enc-str)
861                 (list (list 'ideographic-structure
862                             ?⿱
863                             (nth 2 enc-str)
864                             (nth 2 structure))))
865           )))
866       )
867      ((eq (car structure) ?⿹)
868       (setq enc (nth 1 structure))
869       (when (setq enc-str
870                   (cond ((characterp enc)
871                          (get-char-attribute enc 'ideographic-structure)
872                          )
873                         ((consp enc)
874                          (cdr (assq 'ideographic-structure enc))
875                          )))
876         (cond
877          ((eq (car enc-str) ?⿰)
878           (list ?⿰
879                 (list (list 'ideographic-structure
880                             ?⿱
881                             (nth 1 enc-str)
882                             (nth 2 structure)))
883                 (nth 2 enc-str))
884           )))
885       )
886      ((eq (get-char-attribute (car structure) '=ucs-itaiji-001) #x2FF6)
887       (setq enc (nth 1 structure))
888       (when (setq enc-str
889                   (cond ((characterp enc)
890                          (get-char-attribute enc 'ideographic-structure)
891                          )
892                         ((consp enc)
893                          (cdr (assq 'ideographic-structure enc))
894                          )))
895         (cond
896          ((eq (car enc-str) ?⿺)
897           (list ?⿺
898                 (list (list 'ideographic-structure
899                             ?⿱
900                             (nth 2 structure)
901                             (nth 1 enc-str)))
902                 (nth 2 enc-str))
903           )
904          ((eq (car enc-str) ?⿱)
905           (list ?⿱
906                 (list (list 'ideographic-structure
907                             ?⿰
908                             (nth 2 structure)
909                             (nth 1 enc-str)))
910                 (nth 2 enc-str))
911           ))
912         )
913       )
914      ((eq (car structure) ?⿴)
915       (setq enc (nth 1 structure))
916       (when (setq enc-str
917                   (cond ((characterp enc)
918                          (get-char-attribute enc 'ideographic-structure)
919                          )
920                         ((consp enc)
921                          (cdr (assq 'ideographic-structure enc))
922                          )))
923         (cond
924          ((eq (car enc-str) ?⿱)
925           (cond
926            ((and (characterp (nth 2 enc-str))
927                  (or (memq (char-ucs (nth 2 enc-str)) '(#x56D7 #x5F51 #x897F))
928                      (eq (char-feature (nth 2 enc-str) '=>big5-cdp)
929                          #x87A5)))
930             (list ?⿱
931                   (nth 1 enc-str)
932                   (list (list 'ideographic-structure
933                               ?⿴
934                               (nth 2 enc-str)
935                               (nth 2 structure)))
936                   )
937             )
938            ((and (characterp (nth 2 enc-str))
939                  (eq (char-ucs (nth 2 enc-str)) #x51F5))
940             (list ?⿱
941                   (nth 1 enc-str)
942                   (list (list 'ideographic-structure
943                               ?⿶
944                               (nth 2 enc-str)
945                               (nth 2 structure)))
946                   )
947             )       
948            ((and (characterp (nth 1 enc-str))
949                  (eq (char-feature (nth 1 enc-str) '=>ucs@component)
950                      #x300E6))
951             (list ?⿱
952                   (list (list 'ideographic-structure
953                               ?⿵
954                               (nth 1 enc-str)
955                               (nth 2 structure)))
956                   (nth 2 enc-str))
957             )
958            (t
959             (list ?⿳
960                   (nth 1 enc-str)
961                   (nth 2 structure)
962                   (nth 2 enc-str))
963             ))
964           ))
965         )
966       )
967      ((eq (car structure) ?⿶)
968       (setq enc (nth 1 structure))
969       (when (setq enc-str
970                   (cond ((characterp enc)
971                          (get-char-attribute enc 'ideographic-structure)
972                          )
973                         ((consp enc)
974                          (cdr (assq 'ideographic-structure enc))
975                          )))
976         (cond
977          ((eq (car enc-str) ?⿱)
978           (setq enc2-str (ideographic-character-get-structure (nth 1 enc-str)))
979           (when (and enc2-str
980                      (eq (car enc2-str) ?⿰))
981             (list ?⿱
982                   (list (list 'ideographic-structure
983                               ?⿲
984                               (nth 1 enc2-str)
985                               (nth 2 structure)
986                               (nth 2 enc2-str)))
987                   (nth 2 enc-str)))
988           )
989          ((eq (car enc-str) ?⿳)
990           (setq enc2-str (ideographic-character-get-structure (nth 1 enc-str)))
991           (when (and enc2-str
992                      (eq (car enc2-str) ?⿰))
993             (list ?⿳
994                   (list (list 'ideographic-structure
995                               ?⿲
996                               (nth 1 enc2-str)
997                               (nth 2 structure)
998                               (nth 2 enc2-str)))
999                   (nth 2 enc-str)
1000                   (nth 3 enc-str)))
1001           )
1002          ((eq (car enc-str) ?⿲)
1003           (list ?⿲
1004                 (nth 1 enc-str)
1005                 (list (list 'ideographic-structure
1006                             ?⿱
1007                             (nth 2 structure)
1008                             (nth 2 enc-str)))
1009                 (nth 3 enc-str))
1010           )
1011          ((eq (car enc-str) ?⿴)
1012           (setq enc2-str (ideographic-character-get-structure (nth 1 enc-str)))
1013           (when (and enc2-str
1014                      (eq (car enc2-str) ?⿰))
1015             (list ?⿲
1016                   (nth 1 enc2-str)
1017                   (list (list 'ideographic-structure
1018                               ?⿱
1019                               (nth 2 structure)
1020                               (nth 2 enc-str)))
1021                   (nth 2 enc2-str)))
1022           )))
1023       )
1024      ((eq (car structure) ?⿵)
1025       (setq enc (nth 1 structure))
1026       (when (setq enc-str
1027                   (cond ((characterp enc)
1028                          (get-char-attribute enc 'ideographic-structure)
1029                          )
1030                         ((consp enc)
1031                          (cdr (assq 'ideographic-structure enc))
1032                          )))
1033         (cond
1034          ((eq (car enc-str) ?⿱)
1035           (setq enc2-str (ideographic-character-get-structure (nth 2 enc-str)))
1036           (when (and enc2-str
1037                      (eq (car enc2-str) ?⿰))
1038             (list ?⿱
1039                   (nth 1 enc-str)
1040                   (list (list 'ideographic-structure
1041                               ?⿲
1042                               (nth 1 enc2-str)
1043                               (nth 2 structure)
1044                               (nth 2 enc2-str)))))
1045           )
1046          ((eq (car enc-str) ?⿳)
1047           (setq enc2-str (ideographic-character-get-structure (nth 3 enc-str)))
1048           (when (and enc2-str
1049                      (eq (car enc2-str) ?⿰))
1050             (list ?⿳
1051                   (nth 1 enc-str)
1052                   (nth 2 enc-str)
1053                   (list (list 'ideographic-structure
1054                               ?⿲
1055                               (nth 1 enc2-str)
1056                               (nth 2 structure)
1057                               (nth 2 enc2-str)))))
1058           )
1059          ((eq (car enc-str) ?⿲)
1060           (list ?⿲
1061                 (nth 1 enc-str)
1062                 (list (list 'ideographic-structure
1063                             ?⿱
1064                             (nth 2 enc-str)
1065                             (nth 2 structure)))
1066                 (nth 3 enc-str))
1067           )
1068          ((eq (car enc-str) ?⿴)
1069           (setq enc2-str (ideographic-character-get-structure (nth 1 enc-str)))
1070           (when (and enc2-str
1071                      (eq (car enc2-str) ?⿰))
1072             (list ?⿲
1073                   (nth 1 enc2-str)
1074                   (list (list 'ideographic-structure
1075                               ?⿱
1076                               (nth 2 enc-str)
1077                               (nth 2 structure)))
1078                   (nth 2 enc2-str)))
1079           )))
1080       )
1081      ((eq (car structure) ?⿻)
1082       (setq enc (nth 1 structure))
1083       (when (setq enc-str
1084                   (cond ((characterp enc)
1085                          (get-char-attribute enc 'ideographic-structure)
1086                          )
1087                         ((consp enc)
1088                          (cdr (assq 'ideographic-structure enc))
1089                          )))
1090         (cond
1091          ((eq (car enc-str) ?⿱)
1092           (list ?⿳
1093                 (nth 1 enc-str)
1094                 (nth 2 structure)
1095                 (nth 2 enc-str))
1096           )))
1097       ))
1098     ))
1099
1100 ;;;###autoload
1101 (defun ideographic-structure-compact (structure)
1102   (let ((rest structure)
1103         cell
1104         ret dest sub)
1105     (while rest
1106       (setq cell (pop rest))
1107       (cond
1108        ((and (consp cell)
1109              (cond ((setq ret (assq 'ideographic-structure cell))
1110                     (setq sub (cdr ret))
1111                     )
1112                    ((atom (car cell))
1113                     (setq sub cell)
1114                     )))
1115         (setq cell
1116               (if (setq ret (ideographic-structure-find-chars sub))
1117                   (car ret)
1118                 (list (cons 'ideographic-structure sub))))
1119         ))
1120       (setq dest (cons cell dest)))
1121     (nreverse dest)))
1122
1123
1124 ;;; @ End.
1125 ;;;
1126
1127 (provide 'ids-find)
1128
1129 ;;; ids-find.el ends here