1 ;;; char-db-util.el --- Character Database utility
3 ;; Copyright (C) 1998, 1999 MORIOKA Tomohiko.
5 ;; Author: MORIOKA Tomohiko <tomo@m17n.org>
6 ;; Keywords: UTF-2000, ISO/IEC 10646, Unicode, UCS-4, MULE.
8 ;; This file is part of UTF-2000.
10 ;; UTF-2000 is free software; you can redistribute it and/or modify it
11 ;; under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; UTF-2000 is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with XEmacs; see the file COPYING. If not, write to the Free
22 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
29 (defconst unidata-normative-category-alist
30 '(("Lu" letter uppercase)
31 ("Ll" letter lowercase)
32 ("Lt" letter titlecase)
33 ("Mn" mark non-spacing)
34 ("Mc" mark spacing-combining)
36 ("Nd" number decimal-digit)
39 ("Zs" separator space)
41 ("Zp" separator paragraph)
44 ("Cs" other surrogate)
45 ("Co" other private-use)
46 ("Cn" other not-assigned)))
48 (defconst unidata-informative-category-alist
49 '(("Lm" letter modifier)
51 ("Pc" punctuation connector)
52 ("Pd" punctuation dash)
53 ("Ps" punctuation open)
54 ("Pe" punctuation close)
55 ("Pi" punctuation initial-quote)
56 ("Pf" punctuation final-quote)
57 ("Po" punctuation other)
59 ("Sc" symbol currency)
60 ("Sk" symbol modifier)
64 (defun insert-char-data (char)
65 (let ((data (char-attribute-alist char))
66 cell ret name has-long-ccs-name rest)
69 (narrow-to-region (point)(point))
72 (when (setq cell (assq 'name data))
73 (setq cell (cdr cell))
75 (if (> (length cell) 47)
81 (setq data (del-alist 'name data))
83 (when (setq cell (assq 'name data))
84 (setq cell (cdr cell))
86 (if (> (length cell) 47)
92 (setq data (del-alist 'name data))
94 (when (setq cell (assq 'ucs data))
95 (setq cell (cdr cell))
96 (insert (format "(ucs\t\t. #x%04X)
99 (setq data (del-alist 'ucs data))
101 (when (setq cell (assq '->ucs data))
102 (setq cell (cdr cell))
103 (insert (format "(->ucs\t\t. #x%04X)
106 (setq data (del-alist '->ucs data))
108 (when (setq cell (assq 'general-category data))
109 (setq ret (cdr cell))
111 "(general-category\t%s) ; %s
113 (mapconcat (lambda (cell)
116 (cond ((rassoc (cdr cell)
117 unidata-normative-category-alist)
118 "Normative Category")
120 unidata-informative-category-alist)
121 "Informative Category")
123 "Unknown Category"))))
124 (setq data (del-alist 'general-category data))
126 (when (setq cell (assq 'bidi-category data))
127 (setq cell (cdr cell))
128 (insert (format "(bidi-category\t. %S)
131 (setq data (del-alist 'bidi-category data))
133 (when (setq cell (assq 'mirrored data))
134 (setq cell (cdr cell))
135 (insert (format "(mirrored\t\t. %S)
138 (setq data (del-alist 'mirrored data))
140 (when (setq cell (assq 'decimal-digit-value data))
141 (setq cell (cdr cell))
142 (insert (format "(decimal-digit-value . %S)
145 (setq data (del-alist 'decimal-digit-value data))
146 (when (setq cell (assq 'digit-value data))
147 (setq cell (cdr cell))
148 (insert (format "(digit-value\t . %S)
151 (setq data (del-alist 'digit-value data))
153 (when (setq cell (assq 'numeric-value data))
154 (setq cell (cdr cell))
155 (insert (format "(numeric-value\t . %S)
158 (setq data (del-alist 'numeric-value data))
161 (setq data (sort data
165 (cond ((find-charset ka)
166 (cond ((find-charset kb)
167 (cond ((= (charset-dimension ka)
168 (charset-dimension kb))
169 (< (charset-final ka)
172 (< (charset-dimension ka)
173 (charset-dimension kb))
180 (string< (symbol-name ka)
188 (setq cell (car rest))
189 (if (setq ret (find-charset (car cell)))
190 (if (>= (length (symbol-name (charset-name ret))) 19)
192 (setq has-long-ccs-name t)
196 (setq rest (cdr rest)))
198 (setq cell (car data))
199 (cond ((setq ret (find-charset (car cell)))
200 (insert (format (if has-long-ccs-name
207 (mapconcat (lambda (b)
211 ((string-match "^->" (symbol-name (car cell)))
216 (mapconcat (lambda (code)
217 (cond ((symbolp code)
220 (format "#x%04X" code))
222 (format "\n %S" code))))
229 (insert (format "(%-18s . %S)
231 (car cell)(cdr cell)))
233 (setq data (cdr data)))
235 (goto-char (point-min))
236 (while (re-search-forward "[ \t]+$" nil t)
238 (goto-char (point-max))
239 (tabify (point-min)(point-max))
242 (defun insert-char-range-data (min max)
245 (insert-char-data (int-char code))
246 (setq code (1+ code))
249 (defun write-char-range-data-to-file (min max file)
251 (insert-char-range-data min max)
252 (write-region (point-min)(point-max) file)))
254 (defun char-db-update-comment ()
257 (goto-char (point-min))
259 (while (re-search-forward "^[ \t]*\\(([^.()]+)\\)" nil t)
260 (goto-char (match-beginning 1))
261 (setq cdef (read (current-buffer)))
262 (when (find-charset (car cdef))
263 (goto-char (match-end 0))
264 (if (setq table (charset-mapping-table (car cdef)))
265 (set-charset-mapping-table (car cdef) nil))
266 (delete-region (point) (point-at-eol))
267 (insert (format "\t; %c" (apply #'make-char cdef)))
269 (set-charset-mapping-table (car cdef) table))
273 (defun what-char-definition (char)
274 (interactive (list (char-after)))
275 (let ((buf (get-buffer-create "*Character Description*"))
276 (the-buf (current-buffer))
277 (win-conf (current-window-configuration)))
279 (make-local-variable 'what-character-original-window-configuration)
280 (setq what-character-original-window-configuration win-conf)
281 (setq buffer-read-only nil)
285 (insert-char-data char)
286 (set-buffer-modified-p nil)
287 (view-mode the-buf (lambda (buf)
288 (set-window-configuration
289 what-character-original-window-configuration)
291 (goto-char (point-min)))
293 (set-window-configuration
294 what-character-original-window-configuration)
295 (signal (car err) (cdr err)))))))
297 (provide 'char-db-util)
299 ;;; char-db-util.el ends here