1 ;;; char-db-dump.el --- Dump utility of char-spec files
3 ;; Copyright (C) 2002,2003 MORIOKA Tomohiko
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: Ideographs, Character Database, UTF-2000, UCS, Unicode
8 ;; This file is a part of tomoyo-tools.
10 ;; This program is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
15 ;; This program is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with this program; see the file COPYING. If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 (require 'ideograph-util)
30 (defun char-db-dump-ideographs (directory)
31 (interactive "DDump Ideographs : ")
32 (update-ideograph-radical-table)
35 (write-ideograph-radical-char-data i directory)
38 (defvar char-db-ucs-range-file-list
39 '((#x0000 #x001F "u00000-C0.el")
40 (#x0020 #x007F "u00020-Basic-Latin.el")
41 (#x0080 #x009F "u00080-C1.el")
42 (#x00A0 #x00FF "u000A0-Latin-1-Supplement.el")
43 (#x0100 #x017F "u00100-Latin-Extended-A.el")
44 (#x0180 #x024F "u00180-Latin-Extended-B.el")
45 (#x0250 #x02AF "u00250-IPA-Extensions.el")
46 (#x02B0 #x02FF "u002B0-Spacing-Modifier-Letters.el")
47 (#x0300 #x036F "u00300-Combining-Diacritical-Marks.el")
48 (#x0370 #x03FF "u00370-Greek.el")
49 (#x0400 #x04FF "u00400-Cyrillic.el")
50 (#x0530 #x058F "u00530-Armenian.el")
51 (#x0590 #x05FF "u00590-Hebrew.el")
52 (#x0600 #x06FF "u00600-Arabic.el")
53 (#x0700 #x074F "u00700-Syriac.el")
54 (#x0780 #x07BF "u00780-Thaana.el")
55 (#x0900 #x097F "u00900-Devanagari.el")
56 (#x0980 #x09FF "u00980-Bengali.el")
57 (#x0A00 #x0A7F "u00A00-Gurmukhi.el")
58 (#x0A80 #x0AFF "u00A80-Gujarati.el")
59 (#x0B00 #x0B7F "u00B00-Oriya.el")
60 (#x0B80 #x0BFF "u00B80-Tamil.el")
61 (#x0C00 #x0C7F "u00C00-Telugu.el")
62 (#x0C80 #x0CFF "u00C80-Kannada.el")
63 (#x0D00 #x0D7F "u00D00-Malayalam.el")
64 (#x0D80 #x0DFF "u00D80-Sinhala.el")
65 (#x0E00 #x0E7F "u00E00-Thai.el")
66 (#x0E80 #x0EFF "u00E80-Lao.el")
67 (#x0F00 #x0FFF "u00F00-Tibetan.el")
68 (#x1000 #x109F "u01000-Myanmar.el")
69 (#x10A0 #x10FF "u010A0-Georgian.el")
70 (#x1100 #x11FF "u01100-Hangul-Jamo.el")
71 (#x1200 #x137C "u01200-Ethiopic.el")
72 (#x13A0 #x13FF "u013A0-Cherokee.el")
73 (#x1400 #x167F "u01400-Canadian.el")
74 (#x1680 #x169F "u01680-Ogham.el")
75 (#x16A0 #x16FF "u016A0-Runic.el")
76 (#x1780 #x17FF "u01780-Khmer.el")
77 (#x1800 #x18AF "u01800-Mongolian.el")
78 (#x1E00 #x1EFF "u01E00-Latin-Extended-Additional.el")
79 (#x1F00 #x1FFF "u01F00-Greek-Extended.el")
80 (#x2000 #x206F "u02000-General-Punctuation.el")
81 (#x2070 #x207F "u02070-Superscripts.el")
82 (#x2080 #x208F "u02080-Subscripts.el")
83 (#x20A0 #x20AF "u020A0-Currency-Symbols.el")
84 (#x20D0 #x20FF "u020D0-Combining-Diacritical-Marks-for-Symbols.el")
85 (#x2100 #x214F "u02100-Letterlike-Symbols.el")
86 (#x2150 #x218F "u02150-Number-Forms.el")
87 (#x2190 #x21FF "u02190-Arrows.el")
88 (#x2200 #x22FF "u02200-Mathematical-Operators.el")
89 (#x2300 #x23FF "u02300-Miscellaneous-Technical.el")
90 (#x2400 #x243F "u02400-Control-Pictures.el")
91 (#x2440 #x244A "u02440-OCR.el")
92 (#x2460 #x24FF "u02460-Enclosed-Alphanumerics.el")
93 (#x2500 #x257F "u02500-Box-Drawing.el")
94 (#x2580 #x259F "u02580-Block-Elements.el")
95 (#x25A0 #x25FF "u025A0-Geometric-Shapes.el")
96 (#x2600 #x267F "u02600-Miscellaneous-Symbols.el")
97 (#x2700 #x27BF "u02700-Dingbats.el")
98 (#x2800 #x28FF "u02800-Braille-Patterns.el")
99 (#x2900 #x297F "u02900-Supplemental-Arrows-B.el")
100 (#x2980 #x29FF "u02980-Miscellaneous-Mathematical-Symbols-B.el")
101 (#x2E00 #x2EFF "u02E00-CJK-Radical-Supplement.el")
102 (#x2F00 #x2FEF "u02F00-Kangxi-Radicals.el")
103 (#x2FF0 #x2FFF "u02FF0-Ideographic-Description-Characters.el")
104 (#x3000 #x303F "u03000-CJK-Symbols-and-Punctuation.el" nil Ideograph)
105 (#x3040 #x309F "u03040-Hiragana.el")
106 (#x30A0 #x30FF "u030A0-Katakana.el")
107 (#x3100 #x312F "u03100-Bopomofo.el" 'Bopomofo)
108 (#x3130 #x318F "u03130-Hangul-Compatibility-Jamo.el")
109 (#x3190 #x319F "u03190-Kanbun.el")
110 (#x31A0 #x31BF "u031A0-Bopomofo-Extended.el")
111 (#x31F0 #x31FF "u031F0-Katakana-Phonetic-Extensions.el")
112 (#x3200 #x32FF "u03200-Enclosed-CJK-Letters-and-Months.el")
113 (#x3300 #x33FF "u03300-CJK-Compatibility.el")
114 ;; snip CJK Unified Ideographs
115 (#xA000 #xA48F "u0A000-Yi-Syllables.el")
116 (#xA490 #xA4CF "u0A490-Yi-Radicals.el")
117 (#xAC00 #xD7AF "u0AC00-Hangul-Syllables.el")
118 (#xE000 #xF8FF "u0E000-Private-Use.el")
119 ;; snip CJK Compatibility Ideographs
120 (#xFB00 #xFB4F "u0FB00-Alphabetic-Presentation-Forms.el")
121 (#xFB50 #xFDFF "u0FB50-Arabic-Presentation-Forms-A.el")
122 (#xFE20 #xFE2F "u0FE20-Combining-Half-Marks.el")
123 (#xFE30 #xFE4F "u0FE30-CJK-Compatibility-Forms.el")
124 (#xFE50 #xFE6F "u0FE50-Small-Form-Variants.el")
125 (#xFE70 #xFEFF "u0FE70-Arabic-Presentation-Forms-B.el")
126 (#xFF00 #xFFEF "u0FF00-Halfwidth-and-Fullwidth-Forms.el")
127 (#xFFF0 #xFFFF "u0FFF0-Specials.el")
131 (defun char-db-dump-non-ideographs (directory)
132 (interactive "DDump Non-Ideographs : ")
133 (dolist (range char-db-ucs-range-file-list)
134 (write-char-range-data-to-file
135 (car range) (nth 1 range)
136 (expand-file-name (nth 2 range) "/var/tmp")
137 (nth 3 range)(nth 4 range))))
141 (defun char-db-dump (directory)
142 (interactive "DDirectory to dump : ")
143 (char-db-dump-ideographs directory)
144 (char-db-dump-non-ideographs directory))
150 (provide 'char-db-dump)
152 ;;; char-db-dump.el ends here