1 ;;; ids-dump.el --- Dump utility of IDS-* files
3 ;; Copyright (C) 2002,2003,2004,2005,2009 MORIOKA Tomohiko
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: IDS, IDC, Ideographs, UCS, Unicode
8 ;; This file is a part of IDS.
10 ;; This program is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
15 ;; This program is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with this program; see the file COPYING. If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
29 (defun ids-dump-format-list (ids-list)
36 (unless (encode-char c '=ucs 'defined-only)
37 (or (get-char-attribute c '=ucs@unicode)
38 (get-char-attribute c '=ucs@iso))))
39 (decode-char '=ucs ucs)
41 (ids-format-list ids-list) ""))))
43 (defun ids-dump-insert-line (ccs line-spec code)
44 (let ((chr (decode-char ccs code))
47 (setq id-list (get-char-attribute chr 'ideographic-structure))
48 (insert (format line-spec
49 code (decode-builtin-char ccs code)
51 (ids-dump-format-list id-list)
52 (char-to-string chr)))))))
54 (defun ids-dump-insert-ccs-ranges (ccs line-spec &rest ranges)
55 (let (range code max-code)
57 (setq range (car ranges))
59 (setq code (car range)
61 (while (<= code max-code)
62 (ids-dump-insert-line ccs line-spec code)
63 (setq code (1+ code))))
65 (ids-dump-insert-line ccs line-spec range))
66 (t (error 'wrong-type-argument range)))
67 (setq ranges (cdr ranges)))))
69 (defun ids-dump-insert-94x94-ccs-ranges (ccs line-spec &rest ranges)
70 (let (range code max-code l)
72 (setq range (car ranges))
74 (setq code (car range)
76 (while (<= code max-code)
77 (setq l (logand code 255))
78 (if (and (<= #x21 l)(<= l #x7E))
79 (ids-dump-insert-line ccs line-spec code))
80 (setq code (1+ code))))
82 (ids-dump-insert-line ccs line-spec range))
83 (t (error 'wrong-type-argument range)))
84 (setq ranges (cdr ranges)))))
86 (defun ids-dump-insert-daikanwa (start end)
92 (when (= (length val) 2)
95 (put-alist (nth 1 val)
97 (cdr (assq (car val) mdh-alist)))))
101 (when (setq chr (decode-char 'ideograph-daikanwa i))
103 (format "M-%05d \t%c\t%s\n"
104 i (decode-builtin-char 'ideograph-daikanwa i)
105 (or (ids-dump-format-list
106 (get-char-attribute chr 'ideographic-structure))
108 (when (setq sal (assq i mdh-alist))
110 (when (setq chr (assq 1 sal))
113 (format "M-%05d'\t%c\t%s\n"
115 (or (ids-dump-format-list
116 (get-char-attribute chr 'ideographic-structure))
118 (when (setq chr (assq 2 sal))
121 (format "M-%05d\"\t%c\t%s\n"
123 (ids-dump-format-list
124 (get-char-attribute chr 'ideographic-structure)))))
128 (defun ids-dump-insert-daikanwa-hokan ()
132 (when (and (eq (car val) 'ho)
133 (null (nthcdr 2 val)))
134 (setq sal (cons (cons (nth 1 val) key) sal)))
137 (setq sal (sort sal (lambda (a b) (< (car a)(car b)))))
139 (setq chr (cdr cell))
141 (format "MH-%04d \t%c\t%s\n"
144 (or (ids-dump-format-list
145 (get-char-attribute chr 'ideographic-structure))
148 (defun ids-dump-insert-jis-x0208-1990 ()
156 (setq chr (make-char 'japanese-jisx0208-1990 h l))
158 (format "J90-%02X%02X\t%c\t%s\n"
160 (decode-builtin-char 'japanese-jisx0208-1990
161 (logior (lsh h 8) l))
162 (or (ideographic-structure-to-ids
163 (get-char-attribute chr 'ideographic-structure))
165 (setq cell (1+ cell)))
171 (setq chr (make-char 'japanese-jisx0208-1990 h l))
173 (format "J90-%02X%02X\t%c\t%s\n"
175 (decode-builtin-char 'japanese-jisx0208-1990
176 (logior (lsh h 8) l))
177 (or (ideographic-structure-to-ids
178 (get-char-attribute chr 'ideographic-structure))
180 (setq cell (1+ cell)))))
182 (defun ids-dump-insert-big5 (ccs prefix)
184 l code chr structure)
188 (setq chr (make-char ccs h l))
190 (when (setq structure
191 (get-char-attribute chr 'ideographic-structure))
193 (format "%s%02X%02X\t%c\t%s\n"
195 (decode-builtin-char ccs
196 (logior (lsh h 8) l))
198 (get-char-attribute chr 'ideographic-structure))
203 (setq chr (make-char ccs h l))
205 (when (setq structure
206 (get-char-attribute chr 'ideographic-structure))
208 (format "%s%02X%02X\t%c\t%s\n"
210 (decode-builtin-char ccs
211 (logior (lsh h 8) l))
213 (get-char-attribute chr 'ideographic-structure))
218 (defun ids-dump-insert-big5-pua (ccs prefix)
219 (let ((line-spec (concat prefix "%04X\t%c\t%s\n"))
225 (ids-dump-insert-line ccs line-spec (logior (lsh h 8) l))
229 (ids-dump-insert-line ccs line-spec (logior (lsh h 8) l))
235 (ids-dump-insert-line ccs line-spec (logior (lsh h 8) l))
241 (ids-dump-insert-line ccs line-spec (logior (lsh h 8) l))
245 (ids-dump-insert-line ccs line-spec (logior (lsh h 8) l))
252 (ids-dump-insert-line ccs line-spec (logior (lsh h 8) l))
256 (ids-dump-insert-line ccs line-spec (logior (lsh h 8) l))
260 (defun ids-dump-range (file path func &rest args)
262 (let* ((coding-system-for-write 'utf-8-mcs-er))
263 (if (file-directory-p path)
264 (setq path (expand-file-name file path)))
265 (insert ";; -*- coding: utf-8-mcs-er -*-\n")
267 (write-region (point-min)(point-max) path))))
270 (defun ids-dump-ucs-basic (filename)
271 (interactive "Fdump IDS-UCS-Basic : ")
272 (ids-dump-range "IDS-UCS-Basic.txt" filename
273 #'ids-dump-insert-ccs-ranges 'ucs "U+%04X\t%c\t%s\n"
277 (defun ids-dump-ucs-basic@unicode (filename)
278 (interactive "Fdump IDS-UCS-Basic : ")
279 (ids-dump-range "IDS-UCS-Basic_u.txt" filename
280 #'ids-dump-insert-ccs-ranges '=ucs@unicode
285 (defun ids-dump-ucs-ext-a (filename)
286 (interactive "Fdump IDS-UCS-Ext-A : ")
287 (ids-dump-range "IDS-UCS-Ext-A.txt" filename
288 #'ids-dump-insert-ccs-ranges 'ucs "U+%04X\t%c\t%s\n"
289 '(#x3400 . #x4DB5) #xFA1F #xFA23))
292 (defun ids-dump-ucs-compat (filename)
293 (interactive "Fdump IDS-UCS-Compat : ")
294 (ids-dump-range "IDS-UCS-Compat.txt" filename
295 #'ids-dump-insert-ccs-ranges 'ucs "U+%04X\t%c\t%s\n"
296 '(#xF900 . #xFA1E) '(#xFA20 . #xFA22) '(#xFA24 . #xFA2D)))
299 (defun ids-dump-ucs-ext-b-1 (filename)
300 (interactive "Fdump IDS-UCS-Ext-B-1 : ")
301 (ids-dump-range "IDS-UCS-Ext-B-1.txt" filename
302 #'ids-dump-insert-ccs-ranges 'ucs "U-%08X\t%c\t%s\n"
303 '(#x20000 . #x21FFF)))
306 (defun ids-dump-ucs-ext-b-2 (filename)
307 (interactive "Fdump IDS-UCS-Ext-B-2 : ")
308 (ids-dump-range "IDS-UCS-Ext-B-2.txt" filename
309 #'ids-dump-insert-ccs-ranges 'ucs "U-%08X\t%c\t%s\n"
310 '(#x22000 . #x23FFF)))
313 (defun ids-dump-ucs-ext-b-3 (filename)
314 (interactive "Fdump IDS-UCS-Ext-B-3 : ")
315 (ids-dump-range "IDS-UCS-Ext-B-3.txt" filename
316 #'ids-dump-insert-ccs-ranges 'ucs "U-%08X\t%c\t%s\n"
317 '(#x24000 . #x25FFF)))
320 (defun ids-dump-ucs-ext-b-4 (filename)
321 (interactive "Fdump IDS-UCS-Ext-B-4 : ")
322 (ids-dump-range "IDS-UCS-Ext-B-4.txt" filename
323 #'ids-dump-insert-ccs-ranges 'ucs "U-%08X\t%c\t%s\n"
324 '(#x26000 . #x27FFF)))
327 (defun ids-dump-ucs-ext-b-5 (filename)
328 (interactive "Fdump IDS-UCS-Ext-B-5 : ")
329 (ids-dump-range "IDS-UCS-Ext-B-5.txt" filename
330 #'ids-dump-insert-ccs-ranges 'ucs "U-%08X\t%c\t%s\n"
331 '(#x28000 . #x29FFF)))
334 (defun ids-dump-ucs-ext-b-6 (filename)
335 (interactive "Fdump IDS-UCS-Ext-B-6 : ")
336 (ids-dump-range "IDS-UCS-Ext-B-6.txt" filename
337 #'ids-dump-insert-ccs-ranges 'ucs "U-%08X\t%c\t%s\n"
338 '(#x2A000 . #x2A6D6)))
341 (defun ids-dump-ucs-compat-supplement (filename)
342 (interactive "Fdump IDS-UCS-Compat-Supplement : ")
343 (ids-dump-range "IDS-UCS-Compat-Supplement.txt" filename
344 #'ids-dump-insert-ccs-ranges 'ucs "U-%08X\t%c\t%s\n"
345 '(#x2F800 . #x2FA1D)))
348 (defun ids-dump-cns11643-1 (filename)
349 (interactive "Fdump IDS-CNS-1 : ")
350 (ids-dump-range "IDS-CNS-1.txt" filename
351 #'ids-dump-insert-94x94-ccs-ranges
352 'chinese-cns11643-1 "C1-%04X\t%c\t%s\n"
356 (defun ids-dump-cns11643-2 (filename)
357 (interactive "Fdump IDS-CNS-2 : ")
358 (ids-dump-range "IDS-CNS-2.txt" filename
359 #'ids-dump-insert-94x94-ccs-ranges
360 'chinese-cns11643-2 "C2-%04X\t%c\t%s\n"
364 (defun ids-dump-cns11643-3 (filename)
365 (interactive "Fdump IDS-CNS-3 : ")
366 (ids-dump-range "IDS-CNS-3.txt" filename
367 #'ids-dump-insert-94x94-ccs-ranges
368 'chinese-cns11643-3 "C3-%04X\t%c\t%s\n"
372 (defun ids-dump-daikanwa-01 (filename)
373 (interactive "Fdump IDS-Daikanwa-01 : ")
374 (ids-dump-range "IDS-Daikanwa-01.txt" filename
375 #'ids-dump-insert-daikanwa 00001 01449))
378 (defun ids-dump-daikanwa-02 (filename)
379 (interactive "Fdump IDS-Daikanwa-02 : ")
380 (ids-dump-range "IDS-Daikanwa-02.txt" filename
381 #'ids-dump-insert-daikanwa 01450 04674))
384 (defun ids-dump-daikanwa-03 (filename)
385 (interactive "Fdump IDS-Daikanwa-03 : ")
386 (ids-dump-range "IDS-Daikanwa-03.txt" filename
387 #'ids-dump-insert-daikanwa 04675 07410))
390 (defun ids-dump-daikanwa-04 (filename)
391 (interactive "Fdump IDS-Daikanwa-04 : ")
392 (ids-dump-range "IDS-Daikanwa-04.txt" filename
393 #'ids-dump-insert-daikanwa 07411 11529))
396 (defun ids-dump-daikanwa-05 (filename)
397 (interactive "Fdump IDS-Daikanwa-05 : ")
398 (ids-dump-range "IDS-Daikanwa-05.txt" filename
399 #'ids-dump-insert-daikanwa 11530 14414))
402 (defun ids-dump-daikanwa-06 (filename)
403 (interactive "Fdump IDS-Daikanwa-06 : ")
404 (ids-dump-range "IDS-Daikanwa-06.txt" filename
405 #'ids-dump-insert-daikanwa 14415 17574))
408 (defun ids-dump-daikanwa-07 (filename)
409 (interactive "Fdump IDS-Daikanwa-07 : ")
410 (ids-dump-range "IDS-Daikanwa-07.txt" filename
411 #'ids-dump-insert-daikanwa 17575 22677))
414 (defun ids-dump-daikanwa-08 (filename)
415 (interactive "Fdump IDS-Daikanwa-08 : ")
416 (ids-dump-range "IDS-Daikanwa-08.txt" filename
417 #'ids-dump-insert-daikanwa 22678 28107))
420 (defun ids-dump-daikanwa-09 (filename)
421 (interactive "Fdump IDS-Daikanwa-09 : ")
422 (ids-dump-range "IDS-Daikanwa-09.txt" filename
423 #'ids-dump-insert-daikanwa 28108 32803))
426 (defun ids-dump-daikanwa-10 (filename)
427 (interactive "Fdump IDS-Daikanwa-10 : ")
428 (ids-dump-range "IDS-Daikanwa-10.txt" filename
429 #'ids-dump-insert-daikanwa 32804 38699))
432 (defun ids-dump-daikanwa-11 (filename)
433 (interactive "Fdump IDS-Daikanwa-11 : ")
434 (ids-dump-range "IDS-Daikanwa-11.txt" filename
435 #'ids-dump-insert-daikanwa 38700 42209))
438 (defun ids-dump-daikanwa-12 (filename)
439 (interactive "Fdump IDS-Daikanwa-12 : ")
440 (ids-dump-range "IDS-Daikanwa-12.txt" filename
441 #'ids-dump-insert-daikanwa 42210 48902))
444 (defun ids-dump-daikanwa-index (filename)
445 (interactive "Fdump IDS-Daikanwa-dx : ")
446 (ids-dump-range "IDS-Daikanwa-dx.txt" filename
447 #'ids-dump-insert-daikanwa 48903 49964))
450 (defun ids-dump-daikanwa-hokan (filename)
451 (interactive "Fdump IDS-Daikanwa-ho : ")
452 (ids-dump-range "IDS-Daikanwa-ho.txt" filename
453 #'ids-dump-insert-daikanwa-hokan))
456 (defun ids-dump-cbeta (filename)
457 (interactive "Fdump IDS-CBETA : ")
458 (ids-dump-range "IDS-CBETA.txt" filename
459 #'ids-dump-insert-ccs-ranges
460 'ideograph-cbeta "CB%05d\t%c\t%s\n"
464 (defun ids-dump-jis-x0208-1990 (filename)
465 (interactive "Fdump IDS-JIS-X0208-1990 : ")
466 (ids-dump-range "IDS-JIS-X0208-1990.txt" filename
467 #'ids-dump-insert-jis-x0208-1990))
470 (defun ids-dump-big5-cdp (filename)
471 (interactive "Fdump IDS-CDP : ")
472 (ids-dump-range "IDS-CDP.txt" filename
473 #'ids-dump-insert-big5-pua
482 ;;; ids-dump.el ends here