1 ;;; char-db-util.el --- Character Database utility
3 ;; Copyright (C) 1998, 1999 MORIOKA Tomohiko.
5 ;; Author: MORIOKA Tomohiko <tomo@m17n.org>
6 ;; Keywords: UTF-2000, ISO/IEC 10646, Unicode, UCS-4, MULE.
8 ;; This file is part of UTF-2000.
10 ;; UTF-2000 is free software; you can redistribute it and/or modify it
11 ;; under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; UTF-2000 is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with XEmacs; see the file COPYING. If not, write to the Free
22 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
29 (defconst unidata-normative-category-alist
30 '(("Lu" letter uppercase)
31 ("Ll" letter lowercase)
32 ("Lt" letter titlecase)
33 ("Mn" mark non-spacing)
34 ("Mc" mark spacing-combining)
36 ("Nd" number decimal-digit)
39 ("Zs" separator space)
41 ("Zp" separator paragraph)
44 ("Cs" other surrogate)
45 ("Co" other private-use)
46 ("Cn" other not-assigned)))
48 (defconst unidata-informative-category-alist
49 '(("Lm" letter modifier)
51 ("Pc" punctuation connector)
52 ("Pd" punctuation dash)
53 ("Ps" punctuation open)
54 ("Pe" punctuation close)
55 ("Pi" punctuation initial-quote)
56 ("Pf" punctuation final-quote)
57 ("Po" punctuation other)
59 ("Sc" symbol currency)
60 ("Sk" symbol modifier)
64 (defun char-attribute-name< (ka kb)
70 ((= (charset-dimension ka)
71 (charset-dimension kb))
72 (cond ((= (charset-chars ka)(charset-chars kb))
74 ((>= (charset-final ka) ?@)
75 (if (>= (charset-final kb) ?@)
79 ((>= (charset-final ka) ?0)
81 ((>= (charset-final kb) ?@)
83 ((>= (charset-final kb) ?0)
87 ((<= (charset-chars ka)(charset-chars kb)))))
89 (< (charset-dimension ka)
90 (charset-dimension kb))
97 (string< (symbol-name ka)
103 (defun insert-char-data (char)
104 (let ((data (char-attribute-alist char))
105 cell ret name has-long-ccs-name rest)
108 (narrow-to-region (point)(point))
109 (insert "(define-char
111 (when (setq cell (assq 'name data))
112 (setq cell (cdr cell))
114 (if (> (length cell) 47)
120 (setq data (del-alist 'name data))
122 (when (setq cell (assq 'name data))
123 (setq cell (cdr cell))
125 (if (> (length cell) 47)
131 (setq data (del-alist 'name data))
133 (when (setq cell (assq 'ucs data))
134 (setq cell (cdr cell))
135 (insert (format "(ucs\t\t. #x%04X)
138 (setq data (del-alist 'ucs data))
140 (when (setq cell (assq '->ucs data))
141 (setq cell (cdr cell))
142 (insert (format "(->ucs\t\t. #x%04X)
145 (setq data (del-alist '->ucs data))
147 (when (setq cell (assq 'general-category data))
148 (setq ret (cdr cell))
150 "(general-category\t%s) ; %s
152 (mapconcat (lambda (cell)
155 (cond ((rassoc (cdr cell)
156 unidata-normative-category-alist)
157 "Normative Category")
159 unidata-informative-category-alist)
160 "Informative Category")
162 "Unknown Category"))))
163 (setq data (del-alist 'general-category data))
165 (when (setq cell (assq 'bidi-category data))
166 (setq cell (cdr cell))
167 (insert (format "(bidi-category\t. %S)
170 (setq data (del-alist 'bidi-category data))
172 (when (setq cell (assq 'mirrored data))
173 (setq cell (cdr cell))
174 (insert (format "(mirrored\t\t. %S)
177 (setq data (del-alist 'mirrored data))
179 (when (setq cell (assq 'decimal-digit-value data))
180 (setq cell (cdr cell))
181 (insert (format "(decimal-digit-value . %S)
184 (setq data (del-alist 'decimal-digit-value data))
185 (when (setq cell (assq 'digit-value data))
186 (setq cell (cdr cell))
187 (insert (format "(digit-value\t . %S)
190 (setq data (del-alist 'digit-value data))
192 (when (setq cell (assq 'numeric-value data))
193 (setq cell (cdr cell))
194 (insert (format "(numeric-value\t . %S)
197 (setq data (del-alist 'numeric-value data))
200 (when (setq cell (assq 'iso-10646-comment data))
201 (setq cell (cdr cell))
202 (insert (format "(iso-10646-comment\t. %S)
205 (setq data (del-alist 'iso-10646-comment data))
207 (when (setq cell (assq '->decomposition data))
208 (setq cell (cdr cell))
209 (insert (format "(->decomposition\t%s)
211 (mapconcat (lambda (code)
212 (cond ((symbolp code)
215 (format "#x%04X" code))
217 (format "\n %S" code))))
219 (setq data (del-alist '->decomposition data))
221 (when (setq cell (assq '->uppercase data))
222 (setq cell (cdr cell))
223 (insert (format "(->uppercase\t%s)
225 (mapconcat (lambda (code)
226 (cond ((symbolp code)
229 (format "#x%04X" code))
231 (format "\n %S" code))))
233 (setq data (del-alist '->uppercase data))
235 (when (setq cell (assq '->lowercase data))
236 (setq cell (cdr cell))
237 (insert (format "(->lowercase\t%s)
239 (mapconcat (lambda (code)
240 (cond ((symbolp code)
243 (format "#x%04X" code))
245 (format "\n %S" code))))
247 (setq data (del-alist '->lowercase data))
249 (when (setq cell (assq '->titlecase data))
250 (setq cell (cdr cell))
251 (insert (format "(->titlecase\t%s)
253 (mapconcat (lambda (code)
254 (cond ((symbolp code)
257 (format "#x%04X" code))
259 (format "\n %S" code))))
261 (setq data (del-alist '->titlecase data))
266 (char-attribute-name< (car a)(car b)))))
270 (setq cell (car rest))
271 (if (setq ret (find-charset (car cell)))
272 (if (>= (length (symbol-name (charset-name ret))) 19)
274 (setq has-long-ccs-name t)
278 (setq rest (cdr rest)))
280 (setq cell (car data))
281 (cond ((setq ret (find-charset (car cell)))
282 (insert (format (if has-long-ccs-name
292 (if (= (charset-graphic ret) 1)
296 ((string-match "^->" (symbol-name (car cell)))
301 (mapconcat (lambda (code)
302 (cond ((symbolp code)
305 (format "#x%04X" code))
307 (format "\n %S" code))))
314 (insert (format "(%-18s . %S)
316 (car cell)(cdr cell)))
318 (setq data (cdr data)))
320 (goto-char (point-min))
321 (while (re-search-forward "[ \t]+$" nil t)
323 (goto-char (point-max))
324 (tabify (point-min)(point-max))
328 (defun char-db-update-comment ()
331 (goto-char (point-min))
332 (let (cdef table char)
333 (while (re-search-forward "^[ \t]*\\(([^.()]+)\\)" nil t)
334 (goto-char (match-beginning 1))
335 (setq cdef (read (current-buffer)))
336 (when (find-charset (car cdef))
337 (goto-char (match-end 0))
339 (if (or (memq (car cdef) '(ascii latin-viscii-upper
342 (= (char-int (charset-final (car cdef))) 0))
343 (apply (function make-char) cdef)
344 (if (setq table (charset-mapping-table (car cdef)))
345 (set-charset-mapping-table (car cdef) nil))
347 (apply (function make-char) cdef)
349 (set-charset-mapping-table (car cdef) table)))))
350 (when (not (or (< (char-int char) 32)
351 (and (<= 128 (char-int char))
352 (< (char-int char) 160))))
353 (delete-region (point) (point-at-eol))
354 (insert (format "\t; %c" char)))
357 (defun insert-char-data-with-variant (char)
358 (insert-char-data char)
359 (let ((variants (char-variants char)))
361 (insert-char-data (car variants))
362 (setq variants (cdr variants))
365 (defun insert-char-range-data (min max)
370 (setq char (int-char code))
371 (insert-char-data-with-variant char)
372 (setq code (1+ code))
375 (defun write-char-range-data-to-file (min max file)
377 (insert-char-range-data min max)
378 (write-region (point-min)(point-max) file)))
381 (defun what-char-definition (char)
382 (interactive (list (char-after)))
383 (let ((buf (get-buffer-create "*Character Description*"))
384 (the-buf (current-buffer))
385 (win-conf (current-window-configuration)))
387 (make-local-variable 'what-character-original-window-configuration)
388 (setq what-character-original-window-configuration win-conf)
389 (setq buffer-read-only nil)
393 (insert-char-data-with-variant char)
394 (char-db-update-comment)
395 (set-buffer-modified-p nil)
396 (view-mode the-buf (lambda (buf)
397 (set-window-configuration
398 what-character-original-window-configuration)
400 (goto-char (point-min)))
402 (set-window-configuration
403 what-character-original-window-configuration)
404 (signal (car err) (cdr err)))))))
406 (provide 'char-db-util)
408 ;;; char-db-util.el ends here