1 ;;; char-db-util.el --- Character Database utility
3 ;; Copyright (C) 1998, 1999 MORIOKA Tomohiko.
5 ;; Author: MORIOKA Tomohiko <tomo@m17n.org>
6 ;; Keywords: UTF-2000, ISO/IEC 10646, Unicode, UCS-4, MULE.
8 ;; This file is part of UTF-2000.
10 ;; UTF-2000 is free software; you can redistribute it and/or modify it
11 ;; under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; UTF-2000 is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with XEmacs; see the file COPYING. If not, write to the Free
22 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
29 (defconst unidata-normative-category-alist
30 '(("Lu" letter uppercase)
31 ("Ll" letter lowercase)
32 ("Lt" letter titlecase)
33 ("Mn" mark non-spacing)
34 ("Mc" mark spacing-combining)
36 ("Nd" number decimal-digit)
39 ("Zs" separator space)
41 ("Zp" separator paragraph)
44 ("Cs" other surrogate)
45 ("Co" other private-use)
46 ("Cn" other not-assigned)))
48 (defconst unidata-informative-category-alist
49 '(("Lm" letter modifier)
51 ("Pc" punctuation connector)
52 ("Pd" punctuation dash)
53 ("Ps" punctuation open)
54 ("Pe" punctuation close)
55 ("Pi" punctuation initial-quote)
56 ("Pf" punctuation final-quote)
57 ("Po" punctuation other)
59 ("Sc" symbol currency)
60 ("Sk" symbol modifier)
64 (defun char-attribute-name< (ka kb)
70 ((= (charset-dimension ka)
71 (charset-dimension kb))
72 (cond ((= (charset-chars ka)(charset-chars kb))
74 ((>= (charset-final ka) ?@)
75 (if (>= (charset-final kb) ?@)
79 ((>= (charset-final ka) ?0)
81 ((>= (charset-final kb) ?@)
83 ((>= (charset-final kb) ?0)
87 ((<= (charset-chars ka)(charset-chars kb)))))
89 (< (charset-dimension ka)
90 (charset-dimension kb))
97 (string< (symbol-name ka)
103 (defun insert-char-data (char)
104 (let ((data (char-attribute-alist char))
105 cell ret name has-long-ccs-name rest)
108 (narrow-to-region (point)(point))
109 (insert "(define-char
111 (when (setq cell (assq 'name data))
112 (setq cell (cdr cell))
114 (if (> (length cell) 47)
120 (setq data (del-alist 'name data))
122 (when (setq cell (assq 'name data))
123 (setq cell (cdr cell))
125 (if (> (length cell) 47)
131 (setq data (del-alist 'name data))
133 (when (setq cell (assq 'ucs data))
134 (setq cell (cdr cell))
135 (insert (format "(ucs\t\t. #x%04X)
138 (setq data (del-alist 'ucs data))
140 (when (setq cell (assq '->ucs data))
141 (setq cell (cdr cell))
142 (insert (format "(->ucs\t\t. #x%04X)
145 (setq data (del-alist '->ucs data))
147 (when (setq cell (assq 'general-category data))
148 (setq ret (cdr cell))
150 "(general-category\t%s) ; %s
152 (mapconcat (lambda (cell)
155 (cond ((rassoc (cdr cell)
156 unidata-normative-category-alist)
157 "Normative Category")
159 unidata-informative-category-alist)
160 "Informative Category")
162 "Unknown Category"))))
163 (setq data (del-alist 'general-category data))
165 (when (setq cell (assq 'bidi-category data))
166 (setq cell (cdr cell))
167 (insert (format "(bidi-category\t. %S)
170 (setq data (del-alist 'bidi-category data))
172 (when (setq cell (assq 'mirrored data))
173 (setq cell (cdr cell))
174 (insert (format "(mirrored\t\t. %S)
177 (setq data (del-alist 'mirrored data))
180 ((setq cell (assq 'decimal-digit-value data))
181 (setq cell (cdr cell))
182 (insert (format "(decimal-digit-value . %S)
185 (setq data (del-alist 'decimal-digit-value data))
186 (when (setq cell (assq 'digit-value data))
187 (setq cell (cdr cell))
188 (insert (format "(digit-value\t . %S)
191 (setq data (del-alist 'digit-value data))
193 (when (setq cell (assq 'numeric-value data))
194 (setq cell (cdr cell))
195 (insert (format "(numeric-value\t . %S)
198 (setq data (del-alist 'numeric-value data))
202 (when (setq cell (assq 'digit-value data))
203 (setq cell (cdr cell))
204 (insert (format "(digit-value\t. %S)
207 (setq data (del-alist 'digit-value data))
209 (when (setq cell (assq 'numeric-value data))
210 (setq cell (cdr cell))
211 (insert (format "(numeric-value\t. %S)
214 (setq data (del-alist 'numeric-value data))
216 (when (setq cell (assq 'iso-10646-comment data))
217 (setq cell (cdr cell))
218 (insert (format "(iso-10646-comment\t. %S)
221 (setq data (del-alist 'iso-10646-comment data))
223 (when (setq cell (assq '->decomposition data))
224 (setq cell (cdr cell))
225 (insert (format "(->decomposition\t%s)
227 (mapconcat (lambda (code)
228 (cond ((symbolp code)
231 (format "#x%04X" code))
233 (format "\n %S" code))))
235 (setq data (del-alist '->decomposition data))
237 (when (setq cell (assq '->uppercase data))
238 (setq cell (cdr cell))
239 (insert (format "(->uppercase\t%s)
241 (mapconcat (lambda (code)
242 (cond ((symbolp code)
245 (format "#x%04X" code))
247 (format "\n %S" code))))
249 (setq data (del-alist '->uppercase data))
251 (when (setq cell (assq '->lowercase data))
252 (setq cell (cdr cell))
253 (insert (format "(->lowercase\t%s)
255 (mapconcat (lambda (code)
256 (cond ((symbolp code)
259 (format "#x%04X" code))
261 (format "\n %S" code))))
263 (setq data (del-alist '->lowercase data))
265 (when (setq cell (assq '->titlecase data))
266 (setq cell (cdr cell))
267 (insert (format "(->titlecase\t%s)
269 (mapconcat (lambda (code)
270 (cond ((symbolp code)
273 (format "#x%04X" code))
275 (format "\n %S" code))))
277 (setq data (del-alist '->titlecase data))
282 (char-attribute-name< (car a)(car b)))))
286 (setq cell (car rest))
287 (if (setq ret (find-charset (car cell)))
288 (if (>= (length (symbol-name (charset-name ret))) 19)
290 (setq has-long-ccs-name t)
294 (setq rest (cdr rest)))
296 (setq cell (car data))
297 (cond ((setq ret (find-charset (car cell)))
298 (insert (format (if has-long-ccs-name
308 (if (= (charset-graphic ret) 1)
312 ((string-match "^->" (symbol-name (car cell)))
317 (mapconcat (lambda (code)
318 (cond ((symbolp code)
321 (format "#x%04X" code))
323 (format "\n %S" code))))
330 (insert (format "(%-18s . %S)
332 (car cell)(cdr cell)))
334 (setq data (cdr data)))
336 (goto-char (point-min))
337 (while (re-search-forward "[ \t]+$" nil t)
339 (goto-char (point-max))
340 (tabify (point-min)(point-max))
344 (defun char-db-update-comment ()
347 (goto-char (point-min))
348 (let (cdef table char)
349 (while (re-search-forward "^[ \t]*\\(([^.()]+)\\)" nil t)
350 (goto-char (match-beginning 1))
351 (setq cdef (read (current-buffer)))
352 (when (find-charset (car cdef))
353 (goto-char (match-end 0))
355 (if (or (memq (car cdef) '(ascii latin-viscii-upper
358 (= (char-int (charset-final (car cdef))) 0))
359 (apply (function make-char) cdef)
360 (if (setq table (charset-mapping-table (car cdef)))
361 (set-charset-mapping-table (car cdef) nil))
363 (apply (function make-char) cdef)
365 (set-charset-mapping-table (car cdef) table)))))
366 (when (not (or (< (char-int char) 32)
367 (and (<= 128 (char-int char))
368 (< (char-int char) 160))))
369 (delete-region (point) (point-at-eol))
370 (insert (format "\t; %c" char)))
373 (defun insert-char-data-with-variant (char)
374 (insert-char-data char)
375 (let ((variants (char-variants char)))
377 (insert-char-data (car variants))
378 (setq variants (cdr variants))
381 (defun insert-char-range-data (min max)
386 (setq char (int-char code))
387 (insert-char-data-with-variant char)
388 (setq code (1+ code))
391 (defun write-char-range-data-to-file (min max file)
393 (insert-char-range-data min max)
394 (write-region (point-min)(point-max) file)))
397 (defun what-char-definition (char)
398 (interactive (list (char-after)))
399 (let ((buf (get-buffer-create "*Character Description*"))
400 (the-buf (current-buffer))
401 (win-conf (current-window-configuration)))
403 (make-local-variable 'what-character-original-window-configuration)
404 (setq what-character-original-window-configuration win-conf)
405 (setq buffer-read-only nil)
409 (insert-char-data-with-variant char)
410 (char-db-update-comment)
411 (set-buffer-modified-p nil)
412 (view-mode the-buf (lambda (buf)
413 (set-window-configuration
414 what-character-original-window-configuration)
416 (goto-char (point-min)))
418 (set-window-configuration
419 what-character-original-window-configuration)
420 (signal (car err) (cdr err)))))))
422 (provide 'char-db-util)
424 ;;; char-db-util.el ends here