1 ;;; char-db-util.el --- Character Database utility
3 ;; Copyright (C) 1998, 1999 MORIOKA Tomohiko.
5 ;; Author: MORIOKA Tomohiko <tomo@m17n.org>
6 ;; Keywords: UTF-2000, ISO/IEC 10646, Unicode, UCS-4, MULE.
8 ;; This file is part of UTF-2000.
10 ;; UTF-2000 is free software; you can redistribute it and/or modify it
11 ;; under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; UTF-2000 is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with XEmacs; see the file COPYING. If not, write to the Free
22 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
29 (defconst unidata-normative-category-alist
30 '(("Lu" letter uppercase)
31 ("Ll" letter lowercase)
32 ("Lt" letter titlecase)
33 ("Mn" mark non-spacing)
34 ("Mc" mark spacing-combining)
36 ("Nd" number decimal-digit)
39 ("Zs" separator space)
41 ("Zp" separator paragraph)
44 ("Cs" other surrogate)
45 ("Co" other private-use)
46 ("Cn" other not-assigned)))
48 (defconst unidata-informative-category-alist
49 '(("Lm" letter modifier)
51 ("Pc" punctuation connector)
52 ("Pd" punctuation dash)
53 ("Ps" punctuation open)
54 ("Pe" punctuation close)
55 ("Pi" punctuation initial-quote)
56 ("Pf" punctuation final-quote)
57 ("Po" punctuation other)
59 ("Sc" symbol currency)
60 ("Sk" symbol modifier)
64 (defun insert-char-data (char)
65 (let ((data (char-attribute-alist char))
66 cell ret name has-long-ccs-name rest)
69 (narrow-to-region (point)(point))
72 (when (setq cell (assq 'name data))
73 (setq cell (cdr cell))
75 (if (> (length cell) 47)
81 (setq data (del-alist 'name data))
83 (when (setq cell (assq 'name data))
84 (setq cell (cdr cell))
86 (if (> (length cell) 47)
92 (setq data (del-alist 'name data))
94 (when (setq cell (assq 'ucs data))
95 (setq cell (cdr cell))
96 (insert (format "(ucs\t\t. #x%04X)
99 (setq data (del-alist 'ucs data))
101 (when (setq cell (assq '->ucs data))
102 (setq cell (cdr cell))
103 (insert (format "(->ucs\t\t. #x%04X)
106 (setq data (del-alist '->ucs data))
108 (when (setq cell (assq 'general-category data))
109 (setq ret (cdr cell))
111 "(general-category\t%s) ; %s
113 (mapconcat (lambda (cell)
116 (cond ((rassoc (cdr cell)
117 unidata-normative-category-alist)
118 "Normative Category")
120 unidata-informative-category-alist)
121 "Informative Category")
123 "Unknown Category"))))
124 (setq data (del-alist 'general-category data))
126 (when (setq cell (assq 'bidi-category data))
127 (setq cell (cdr cell))
128 (insert (format "(bidi-category\t. %S)
131 (setq data (del-alist 'bidi-category data))
133 (when (setq cell (assq 'mirrored data))
134 (setq cell (cdr cell))
135 (insert (format "(mirrored\t\t. %S)
138 (setq data (del-alist 'mirrored data))
140 (when (setq cell (assq 'decimal-digit-value data))
141 (setq cell (cdr cell))
142 (insert (format "(decimal-digit-value . %S)
145 (setq data (del-alist 'decimal-digit-value data))
146 (when (setq cell (assq 'digit-value data))
147 (setq cell (cdr cell))
148 (insert (format "(digit-value\t . %S)
151 (setq data (del-alist 'digit-value data))
153 (when (setq cell (assq 'numeric-value data))
154 (setq cell (cdr cell))
155 (insert (format "(numeric-value\t . %S)
158 (setq data (del-alist 'numeric-value data))
161 (when (setq cell (assq '->decomposition data))
162 (setq cell (cdr cell))
163 (insert (format "(->decomposition\t%s)
165 (mapconcat (lambda (code)
166 (cond ((symbolp code)
169 (format "#x%04X" code))
171 (format "\n %S" code))))
173 (setq data (del-alist '->decomposition data))
175 (when (setq cell (assq '->uppercase data))
176 (setq cell (cdr cell))
177 (insert (format "(->uppercase\t%s)
179 (mapconcat (lambda (code)
180 (cond ((symbolp code)
183 (format "#x%04X" code))
185 (format "\n %S" code))))
187 (setq data (del-alist '->uppercase data))
189 (when (setq cell (assq '->titlecase data))
190 (setq cell (cdr cell))
191 (insert (format "(->titlecase\t%s)
193 (mapconcat (lambda (code)
194 (cond ((symbolp code)
197 (format "#x%04X" code))
199 (format "\n %S" code))))
201 (setq data (del-alist '->titlecase data))
203 (when (setq cell (assq '->lowercase data))
204 (setq cell (cdr cell))
205 (insert (format "(->lowercase\t%s)
207 (mapconcat (lambda (code)
208 (cond ((symbolp code)
211 (format "#x%04X" code))
213 (format "\n %S" code))))
215 (setq data (del-alist '->lowercase data))
222 (cond ((find-charset ka)
223 (cond ((find-charset kb)
224 (cond ((= (charset-dimension ka)
225 (charset-dimension kb))
227 ((>= (charset-final ka) ?@)
228 (if (>= (charset-final kb) ?@)
229 (< (charset-final ka)
232 ((>= (charset-final ka) ?0)
234 ((>= (charset-final kb) ?@)
236 ((>= (charset-final kb) ?0)
237 (< (charset-final ka)
241 (< (charset-dimension ka)
242 (charset-dimension kb))
249 (string< (symbol-name ka)
257 (setq cell (car rest))
258 (if (setq ret (find-charset (car cell)))
259 (if (>= (length (symbol-name (charset-name ret))) 19)
261 (setq has-long-ccs-name t)
265 (setq rest (cdr rest)))
267 (setq cell (car data))
268 (cond ((setq ret (find-charset (car cell)))
269 (insert (format (if has-long-ccs-name
276 (mapconcat (lambda (b)
280 ((string-match "^->" (symbol-name (car cell)))
285 (mapconcat (lambda (code)
286 (cond ((symbolp code)
289 (format "#x%04X" code))
291 (format "\n %S" code))))
298 (insert (format "(%-18s . %S)
300 (car cell)(cdr cell)))
302 (setq data (cdr data)))
304 (goto-char (point-min))
305 (while (re-search-forward "[ \t]+$" nil t)
307 (goto-char (point-max))
308 (tabify (point-min)(point-max))
311 (defun insert-char-range-data (min max)
316 (setq char (int-char code))
317 (insert-char-data char)
318 (setq variants (char-variants char))
320 (insert-char-data (car variants))
321 (setq variants (cdr variants)))
322 (setq code (1+ code))
325 (defun write-char-range-data-to-file (min max file)
327 (insert-char-range-data min max)
328 (write-region (point-min)(point-max) file)))
331 (defun char-db-update-comment ()
334 (goto-char (point-min))
335 (let (cdef table char)
336 (while (re-search-forward "^[ \t]*\\(([^.()]+)\\)" nil t)
337 (goto-char (match-beginning 1))
338 (setq cdef (read (current-buffer)))
339 (when (find-charset (car cdef))
340 (goto-char (match-end 0))
342 (if (or (eq (car cdef) 'ascii)
343 (= (char-int (charset-final (car cdef))) 0))
344 (apply (function make-char) cdef)
345 (if (setq table (charset-mapping-table (car cdef)))
346 (set-charset-mapping-table (car cdef) nil))
348 (apply (function make-char) cdef)
350 (set-charset-mapping-table (car cdef) table)))))
351 (when (not (or (< (char-int char) 32)
352 (and (<= (char-int char) 128)
353 (< (char-int char) 160))))
354 (delete-region (point) (point-at-eol))
355 (insert (format "\t; %c" char)))
359 (defun what-char-definition (char)
360 (interactive (list (char-after)))
361 (let ((buf (get-buffer-create "*Character Description*"))
362 (the-buf (current-buffer))
363 (win-conf (current-window-configuration)))
365 (make-local-variable 'what-character-original-window-configuration)
366 (setq what-character-original-window-configuration win-conf)
367 (setq buffer-read-only nil)
371 (insert-char-data char)
372 (set-buffer-modified-p nil)
373 (view-mode the-buf (lambda (buf)
374 (set-window-configuration
375 what-character-original-window-configuration)
377 (goto-char (point-min)))
379 (set-window-configuration
380 what-character-original-window-configuration)
381 (signal (car err) (cdr err)))))))
383 (provide 'char-db-util)
385 ;;; char-db-util.el ends here