1 ;;; ids-find.el --- search utility based on Ideographic-structures
3 ;; Copyright (C) 2002,2003,2005,2006,2007,2017 MORIOKA Tomohiko
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: Kanji, Ideographs, search, IDS, CHISE, UCS, Unicode
8 ;; This file is a part of CHISE IDS.
10 ;; This program is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
15 ;; This program is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with this program; see the file COPYING. If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 (defun ids-index-store-char (product component)
28 (let ((ret (get-char-attribute component 'ideographic-products)))
29 (unless (memq product ret)
30 (put-char-attribute component 'ideographic-products
32 (when (setq ret (char-feature component 'ideographic-structure))
33 (ids-index-store-structure product ret)))
36 (defun ids-index-store-structure (product structure)
38 (dolist (cell (cdr structure))
40 (setq cell (plist-get cell :char)))
41 (cond ((characterp cell)
42 (ids-index-store-char product cell))
43 ((setq ret (assq 'ideographic-structure cell))
44 (ids-index-store-structure product (cdr ret)))
45 ((setq ret (find-char cell))
46 (ids-index-store-char product ret))
50 (defun ids-update-index ()
54 (ids-index-store-structure c v)
56 'ideographic-structure)
57 (save-char-attribute-table 'ideographic-products))
60 (mount-char-attribute-table 'ideographic-products)
63 (defun ids-find-all-products (char)
65 (dolist (cell (char-feature char 'ideographic-products))
66 (unless (memq cell dest)
67 (setq dest (cons cell dest)))
68 (setq dest (union dest (ids-find-all-products cell))))
71 (defun of-component-features ()
73 (dolist (feature (char-attribute-list))
74 (when (string-match "^<-.*[@/]component\\(/[^*/]+\\)*$"
75 (symbol-name feature))
77 (list* '<-mistakable '->mistakable
80 '<-original '->original
84 (defun to-component-features ()
86 (dolist (feature (char-attribute-list))
87 (when (string-match "^->.*[@/]component\\(/[^*/]+\\)*$"
88 (symbol-name feature))
93 (defun char-component-variants (char)
94 (let ((dest (list char))
96 (dolist (feature (to-component-features))
97 (if (setq ret (get-char-attribute char feature))
99 (setq dest (union dest (char-component-variants c))))))
101 ;; ((setq ret (some (lambda (feature)
102 ;; (get-char-attribute char feature))
103 ;; (to-component-features)))
105 ;; (setq dest (union dest (char-component-variants c))))
107 ((setq ret (get-char-attribute char '->ucs-unified))
108 (setq dest (cons char ret))
110 (setq dest (union dest
111 (some (lambda (feature)
112 (get-char-attribute c feature))
113 (of-component-features))
116 ((and (setq ret (get-char-attribute char '=>ucs))
117 (setq uchr (decode-char '=ucs ret)))
118 (setq dest (cons uchr (char-variants uchr)))
120 (setq dest (union dest
121 (some (lambda (feature)
122 (get-char-attribute c feature))
123 (of-component-features))
129 (unless (memq c dest)
130 (setq dest (cons c dest)))
133 (some (lambda (feature)
134 (char-feature c feature))
135 (of-component-features))
143 (defun ideographic-products-find (&rest components)
144 (if (stringp (car components))
145 (setq components (string-to-char-list (car components))))
147 (dolist (variant (char-component-variants (car components)))
150 (get-char-attribute variant 'ideographic-products))))
153 (setq components (cdr components)))
155 (dolist (variant (char-component-variants (car components)))
158 (get-char-attribute variant 'ideographic-products))))
159 (setq dest (intersection dest products)))
162 (defun ideograph-find-products-with-variants (components &optional ignored-chars)
163 (if (stringp components)
164 (setq components (string-to-char-list components)))
166 (dolist (variant (char-component-variants (car components)))
170 (get-char-attribute variant 'ideographic-products)
174 (setq components (cdr components)))
176 (dolist (variant (char-component-variants (car components)))
180 (get-char-attribute variant 'ideographic-products)
182 (setq dest (intersection dest products)))
185 (defun ideograph-find-products (components &optional ignored-chars)
186 (if (stringp components)
187 (setq components (string-to-char-list components)))
189 ;; (dolist (variant (char-component-variants (car components)))
192 ;; (get-char-attribute variant 'ideographic-products))))
193 ;; (setq dest products)
194 (setq dest (get-char-attribute (car components) 'ideographic-products))
196 (setq components (cdr components)))
197 ;; (setq products nil)
198 ;; (dolist (variant (char-component-variants (car components)))
201 ;; (get-char-attribute variant 'ideographic-products))))
202 (setq products (get-char-attribute (car components) 'ideographic-products))
203 (setq dest (intersection dest products)))
207 (defun ideographic-structure-char= (c1 c2)
210 (let ((m1 (char-ucs c1))
214 (memq c1 (char-component-variants c2)))))))
216 (defun ideographic-structure-member-compare-components (component s-component)
218 (cond ((char-ref= component s-component #'ideographic-structure-char=))
220 (if (setq ret (assq 'ideographic-structure s-component))
221 (ideographic-structure-member component (cdr ret))))
222 ((setq ret (get-char-attribute s-component 'ideographic-structure))
223 (ideographic-structure-member component ret)))))
226 (defun ideographic-structure-member (component structure)
227 "Return non-nil if COMPONENT is included in STRUCTURE."
228 (or (memq component structure)
230 (setq structure (cdr structure))
231 (ideographic-structure-member-compare-components
232 component (car structure)))
234 (setq structure (cdr structure))
235 (ideographic-structure-member-compare-components
236 component (car structure)))
238 (setq structure (cdr structure))
240 (ideographic-structure-member-compare-components
241 component (car structure))))))
245 (defun ideographic-structure-repertoire-p (structure components)
246 "Return non-nil if STRUCTURE can be constructed by a subset of COMPONENTS."
248 (let (ret s-component)
250 (while (setq structure (cdr structure))
251 (setq s-component (car structure))
252 (unless (characterp s-component)
253 (if (setq ret (find-char s-component))
254 (setq s-component ret)))
257 (if (setq ret (assq 'ideographic-structure s-component))
258 (ideographic-structure-repertoire-p
259 (cdr ret) components)))
260 ((member* s-component components
261 :test #'ideographic-structure-char=))
263 (get-char-attribute s-component
264 'ideographic-structure))
265 (ideographic-structure-repertoire-p ret components)))
270 (defvar ids-find-result-buffer "*ids-chars*")
272 (defun ids-find-format-line (c v)
273 (format "%c\t%s\t%s\n"
275 (or (let ((ucs (or (char-ucs c)
276 (encode-char c 'ucs))))
278 (cond ((<= ucs #xFFFF)
279 (format " U+%04X" ucs))
281 (format "U-%08X" ucs)))))
283 (or (ideographic-structure-to-ids v)
286 (defun ids-insert-chars-including-components* (components
287 &optional level ignored-chars)
291 (dolist (c (sort (copy-tree (ideograph-find-products components
294 (if (setq as (char-total-strokes a))
295 (if (setq bs (char-total-strokes b))
297 (ideograph-char< a b)
300 (ideograph-char< a b)))))
301 (unless (memq c ignored-chars)
302 (setq is (char-feature c 'ideographic-structure))
307 (insert (ids-find-format-line c is))
309 (ids-insert-chars-including-components*
310 (char-to-string c) (1+ level)
311 (cons c ignored-chars))))
316 (defun ids-insert-chars-including-components (components
317 &optional level ignored-chars)
322 (ids-insert-chars-including-components* components
323 level ignored-chars)))
325 (dolist (c ignored-chars)
326 (dolist (vc (char-component-variants c))
327 (unless (memq vc ignored-chars)
328 (when (setq is (get-char-attribute vc 'ideographic-structure))
333 (insert (ids-find-format-line vc is))
335 (ids-insert-chars-including-components*
336 (char-to-string vc) (1+ level)
337 (cons vc ignored-chars)))))))
338 (dolist (c (sort (copy-tree (ideograph-find-products-with-variants
339 components ignored-chars))
341 (if (setq as (char-total-strokes a))
342 (if (setq bs (char-total-strokes b))
344 (ideograph-char< a b)
347 (ideograph-char< a b)))))
348 (unless (memq c ignored-chars)
349 (setq is (get-char-attribute c 'ideographic-structure))
354 (insert (ids-find-format-line c is))
356 (ids-insert-chars-including-components*
357 (char-to-string c) (1+ level)
358 (cons c ignored-chars))))
364 (defun ids-find-chars-including-components (components)
365 "Search Ideographs whose structures have COMPONENTS."
366 (interactive "sComponents : ")
367 (with-current-buffer (get-buffer-create ids-find-result-buffer)
368 (setq buffer-read-only nil)
370 (ids-insert-chars-including-components components 0 nil)
371 ;; (let ((ignored-chars
373 ;; (ids-insert-chars-including-components components 0 nil
374 ;; #'ideograph-find-products)))
376 ;; (setq rest ignored-chars)
377 ;; ;; (dolist (c rest)
378 ;; ;; (setq ignored-chars
379 ;; ;; (union ignored-chars
380 ;; ;; (ids-insert-chars-including-components
381 ;; ;; (list c) 0 ignored-chars
382 ;; ;; #'ideograph-find-products-with-variants))))
383 ;; (ids-insert-chars-including-components components 0 ignored-chars
384 ;; #'ideograph-find-products-with-variants))
385 (goto-char (point-min)))
386 (view-buffer ids-find-result-buffer))
389 (define-obsolete-function-alias 'ideographic-structure-search-chars
390 'ids-find-chars-including-components)
393 (defun ids-find-chars-covered-by-components (components)
394 "Search Ideographs which structures are consisted by subsets of COMPONENTS."
395 (interactive "sComponents: ")
396 (if (stringp components)
397 (setq components (string-to-char-list components)))
398 (with-current-buffer (get-buffer-create ids-find-result-buffer)
399 (setq buffer-read-only nil)
403 (when (ideographic-structure-repertoire-p v components)
404 (insert (ids-find-format-line c v))))
405 'ideographic-structure)
406 (goto-char (point-min)))
407 (view-buffer ids-find-result-buffer))
415 ;;; ids-find.el ends here