1 ;;; ids-find.el --- search utility based on Ideographic-structures
3 ;; Copyright (C) 2002,2003,2005,2006,2007 MORIOKA Tomohiko
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: Kanji, Ideographs, search, IDS, CHISE, UCS, Unicode
8 ;; This file is a part of CHISE IDS.
10 ;; This program is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
15 ;; This program is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with this program; see the file COPYING. If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 (defun ids-index-store-char (product component)
28 (let ((ret (get-char-attribute component 'ideographic-products)))
29 (unless (memq product ret)
30 (put-char-attribute component 'ideographic-products
32 (when (setq ret (char-feature component 'ideographic-structure))
33 (ids-index-store-structure product ret)))
36 (defun ids-index-store-structure (product structure)
38 (dolist (cell (cdr structure))
40 (setq cell (plist-get cell :char)))
41 (cond ((characterp cell)
42 (ids-index-store-char product cell))
43 ((setq ret (assq 'ideographic-structure cell))
44 (ids-index-store-structure product (cdr ret)))
45 ((setq ret (find-char cell))
46 (ids-index-store-char product ret))
50 (defun ids-update-index ()
54 (ids-index-store-structure c v)
56 'ideographic-structure)
57 (save-char-attribute-table 'ideographic-products))
60 (mount-char-attribute-table 'ideographic-products)
63 (defun ids-find-all-products (char)
65 (dolist (cell (char-feature char 'ideographic-products))
66 (unless (memq cell dest)
67 (setq dest (cons cell dest)))
68 (setq dest (union dest (ids-find-all-products cell))))
71 (defun of-component-features ()
73 (dolist (feature (char-attribute-list))
74 (when (string-match "^<-.*[@/]component\\(/[^*/]+\\)*$"
75 (symbol-name feature))
77 (cons '<-ideographic-component-forms
80 (defun to-component-features ()
82 (dolist (feature (char-attribute-list))
83 (when (string-match "^->.*[@/]component\\(/[^*/]+\\)*$"
84 (symbol-name feature))
86 (cons '->ideographic-component-forms
90 (defun char-component-variants (char)
91 (let ((dest (list char))
93 (dolist (feature (to-component-features))
94 (if (setq ret (get-char-attribute char feature))
96 (setq dest (union dest (char-component-variants c))))))
98 ;; ((setq ret (some (lambda (feature)
99 ;; (get-char-attribute char feature))
100 ;; (to-component-features)))
102 ;; (setq dest (union dest (char-component-variants c))))
104 ((setq ret (get-char-attribute char '->ucs-unified))
105 (setq dest (cons char ret))
107 (setq dest (union dest
108 (some (lambda (feature)
109 (get-char-attribute c feature))
110 (of-component-features))
113 ((and (setq ret (get-char-attribute char '=>ucs))
114 (setq uchr (decode-char '=ucs ret)))
115 (setq dest (cons uchr (char-variants uchr)))
117 (setq dest (union dest
118 (some (lambda (feature)
119 (get-char-attribute c feature))
120 (of-component-features))
126 (unless (memq c dest)
127 (setq dest (cons c dest)))
130 (some (lambda (feature)
131 (char-feature c feature))
132 (of-component-features))
140 (defun ideographic-products-find (&rest components)
141 (if (stringp (car components))
142 (setq components (string-to-char-list (car components))))
144 (dolist (variant (char-component-variants (car components)))
147 (get-char-attribute variant 'ideographic-products))))
150 (setq components (cdr components)))
152 (dolist (variant (char-component-variants (car components)))
155 (get-char-attribute variant 'ideographic-products))))
156 (setq dest (intersection dest products)))
158 ;; (defun ideographic-products-find (&rest components)
159 ;; (if (stringp (car components))
160 ;; (setq components (car components)))
161 ;; (let ((len (length components))
164 ;; (dolist (variant (char-component-variants (elt components 0)))
167 ;; (get-char-attribute variant 'ideographic-products))))
168 ;; (setq dest products)
172 ;; (setq products nil)
173 ;; (dolist (variant (char-component-variants (elt components i)))
174 ;; (dolist (product (get-char-attribute
175 ;; variant 'ideographic-products))
176 ;; (unless (memq product products)
177 ;; (when (memq product dest)
178 ;; (setq products (cons product products))))))
179 ;; (setq dest products)))
184 (defun ideographic-structure-char= (c1 c2)
187 (let ((m1 (char-ucs c1))
191 (memq c1 (char-component-variants c2))
192 ;; (some (lambda (feature)
193 ;; (some (lambda (b2)
194 ;; (unless (characterp b2)
195 ;; (setq b2 (find-char b2)))
197 ;; (ideographic-structure-char= c1 b2)))
198 ;; (char-feature c2 feature)
199 ;; ;; (get-char-attribute
200 ;; ;; c2 '<-ideographic-component-forms)
202 ;; (of-component-features))
204 ;; (setq m1 (car (get-char-attribute c1 '<-radical))
205 ;; m2 (car (get-char-attribute c2 '<-radical)))
206 ;; (unless (characterp m1)
207 ;; (setq m1 (find-char m1)))
208 ;; (unless (characterp m2)
209 ;; (setq m2 (find-char m2)))
211 ;; (ideographic-structure-char= m1 m2))
215 (defun ideographic-structure-member-compare-components (component s-component)
217 (cond ((char-ref= component s-component #'ideographic-structure-char=))
219 (if (setq ret (assq 'ideographic-structure s-component))
220 (ideographic-structure-member component (cdr ret))))
221 ((setq ret (get-char-attribute s-component 'ideographic-structure))
222 (ideographic-structure-member component ret)))))
225 (defun ideographic-structure-member (component structure)
226 "Return non-nil if COMPONENT is included in STRUCTURE."
227 (or (memq component structure)
229 (setq structure (cdr structure))
230 (ideographic-structure-member-compare-components
231 component (car structure)))
233 (setq structure (cdr structure))
234 (ideographic-structure-member-compare-components
235 component (car structure)))
237 (setq structure (cdr structure))
239 (ideographic-structure-member-compare-components
240 component (car structure))))))
244 (defun ideographic-structure-repertoire-p (structure components)
245 "Return non-nil if STRUCTURE can be constructed by a subset of COMPONENTS."
247 (let (ret s-component)
249 (while (setq structure (cdr structure))
250 (setq s-component (car structure))
251 (unless (characterp s-component)
252 (if (setq ret (find-char s-component))
253 (setq s-component ret)))
256 (if (setq ret (assq 'ideographic-structure s-component))
257 (ideographic-structure-repertoire-p
258 (cdr ret) components)))
259 ((member* s-component components
260 :test #'ideographic-structure-char=))
262 (get-char-attribute s-component
263 'ideographic-structure))
264 (ideographic-structure-repertoire-p ret components)))
269 (defvar ids-find-result-buffer "*ids-chars*")
271 (defun ids-find-format-line (c v)
272 (format "%c\t%s\t%s\n"
274 (or (let ((ucs (or (char-ucs c)
275 (encode-char c 'ucs))))
277 (cond ((<= ucs #xFFFF)
278 (format " U+%04X" ucs))
280 (format "U-%08X" ucs)))))
282 (or (ideographic-structure-to-ids v)
285 (defun ids-insert-chars-including-components (components
286 &optional level ignored-chars)
290 (dolist (c (sort (copy-tree (ideographic-products-find components))
292 (if (setq as (char-total-strokes a))
293 (if (setq bs (char-total-strokes b))
295 (ideograph-char< a b)
298 (ideograph-char< a b)))))
299 (unless (memq c ignored-chars)
300 (setq is (char-feature c 'ideographic-structure))
305 (insert (ids-find-format-line c is))
307 (ids-insert-chars-including-components
308 (char-to-string c) (1+ level)
309 (cons c ignored-chars))))
312 ;; (defun ids-insert-chars-including-components (components level)
314 ;; (dolist (c (ideographic-products-find components))
315 ;; (setq is (char-feature c 'ideographic-structure))
317 ;; (while (< i level)
320 ;; (insert (ids-find-format-line c is))
321 ;; ;;(forward-line -1)
322 ;; (ids-insert-chars-including-components
323 ;; (char-to-string c) (1+ level))
327 (defun ids-find-chars-including-components (components)
328 "Search Ideographs whose structures have COMPONENTS."
329 (interactive "sComponents : ")
330 (with-current-buffer (get-buffer-create ids-find-result-buffer)
331 (setq buffer-read-only nil)
333 (ids-insert-chars-including-components components 0)
335 ;; (dolist (c (ideographic-products-find components))
336 ;; (setq is (char-feature c 'ideographic-structure))
337 ;; ;; to avoid problems caused by wrong indexes
338 ;; ;; (when (every (lambda (cc)
339 ;; ;; (ideographic-structure-member cc is))
341 ;; (dolist (dc (ideographic-products-find (char-to-string c)))
342 ;; (setq dis (char-feature dc 'ideographic-structure))
343 ;; ;; ;; to avoid problems caused by wrong indexes
344 ;; ;; (when (every (lambda (dcc)
345 ;; ;; (ideographic-structure-member dcc is))
348 ;; (insert (ids-find-format-line dc dis))
352 ;; (insert (ids-find-format-line c is))
357 (goto-char (point-min)))
358 (view-buffer ids-find-result-buffer))
359 ;; (defun ids-find-chars-including-components (components)
360 ;; "Search Ideographs whose structures have COMPONENTS."
361 ;; (interactive "sComponents : ")
362 ;; (with-current-buffer (get-buffer-create ids-find-result-buffer)
363 ;; (setq buffer-read-only nil)
365 ;; (map-char-attribute
367 ;; (when (every (lambda (p)
368 ;; (ideographic-structure-member p v))
370 ;; (insert (ids-find-format-line c v)))
372 ;; 'ideographic-structure)
373 ;; (goto-char (point-min)))
374 ;; (view-buffer ids-find-result-buffer))
377 (define-obsolete-function-alias 'ideographic-structure-search-chars
378 'ids-find-chars-including-components)
381 (defun ids-find-chars-covered-by-components (components)
382 "Search Ideographs which structures are consisted by subsets of COMPONENTS."
383 (interactive "sComponents: ")
384 (if (stringp components)
385 (setq components (string-to-char-list components)))
386 (with-current-buffer (get-buffer-create ids-find-result-buffer)
387 (setq buffer-read-only nil)
392 (when (ideographic-structure-repertoire-p v components)
393 (insert (ids-find-format-line c v))))
394 'ideographic-structure))
395 (goto-char (point-min)))
396 (view-buffer ids-find-result-buffer))
404 ;;; ids-find.el ends here