(char-db-coded-charset-priority-list): Add `=ucs@iso'.
[chise/xemacs-chise.git.1] / lisp / utf-2000 / char-db-util.el
1 ;;; char-db-util.el --- Character Database utility
2
3 ;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004,2005 MORIOKA Tomohiko.
4
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, Unicode, UCS-4, MULE.
7
8 ;; This file is part of XEmacs CHISE.
9
10 ;; XEmacs CHISE is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
14
15 ;; XEmacs CHISE is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 ;; General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;;; Code:
26
27 (require 'alist)
28
29 (defconst unidata-normative-category-alist
30   '(("Lu" letter        uppercase)
31     ("Ll" letter        lowercase)
32     ("Lt" letter        titlecase)
33     ("Mn" mark          non-spacing)
34     ("Mc" mark          spacing-combining)
35     ("Me" mark          enclosing)
36     ("Nd" number        decimal-digit)
37     ("Nl" number        letter)
38     ("No" number        other)
39     ("Zs" separator     space)
40     ("Zl" separator     line)
41     ("Zp" separator     paragraph)
42     ("Cc" other         control)
43     ("Cf" other         format)
44     ("Cs" other         surrogate)
45     ("Co" other         private-use)
46     ("Cn" other         not-assigned)))
47
48 (defconst unidata-informative-category-alist
49   '(("Lm" letter        modifier)
50     ("Lo" letter        other)
51     ("Pc" punctuation   connector)
52     ("Pd" punctuation   dash)
53     ("Ps" punctuation   open)
54     ("Pe" punctuation   close)
55     ("Pi" punctuation   initial-quote)
56     ("Pf" punctuation   final-quote)
57     ("Po" punctuation   other)
58     ("Sm" symbol        math)
59     ("Sc" symbol        currency)
60     ("Sk" symbol        modifier)
61     ("So" symbol        other)
62     ))
63
64 (defconst ideographic-radicals
65   (let ((v (make-vector 215 nil))
66         (i 1))
67     (while (< i 215)
68       (aset v i (decode-char '=ucs (+ #x2EFF i)))
69       (setq i (1+ i)))
70     v))
71
72 (defvar char-db-file-coding-system 'utf-8-mcs-er)
73
74 (defvar char-db-feature-domains
75   '(ucs daikanwa cns gt jis jis/alt jis/a jis/b
76         jis-x0212 jis-x0213 cdp shinjigen misc unknown))
77
78 (defvar char-db-ignored-attributes '(ideographic-products))
79
80 (defun char-attribute-name< (ka kb)
81   (cond
82    ((eq '->denotational kb)
83     t)
84    ((eq '->subsumptive kb)
85     (not (eq '->denotational ka)))
86    ((eq '->denotational ka)
87     nil)
88    ((eq '->subsumptive ka)
89     nil)
90    ((find-charset ka)
91     (if (find-charset kb)
92         (if (<= (charset-id ka) 1)
93             (if (<= (charset-id kb) 1)
94                 (cond
95                  ((= (charset-dimension ka)
96                      (charset-dimension kb))
97                   (> (charset-id ka)(charset-id kb)))
98                  (t
99                   (> (charset-dimension ka)
100                      (charset-dimension kb))
101                   ))
102               t)
103           (if (<= (charset-id kb) 1)
104               nil
105             (< (charset-id ka)(charset-id kb))))
106       nil))
107    ((find-charset kb)
108     t)
109    ((symbolp ka)
110     (cond ((symbolp kb)
111            (string< (symbol-name ka)
112                     (symbol-name kb)))
113           (t)))
114    ((symbolp kb)
115     nil)))
116
117 (defvar char-db-coded-charset-priority-list
118   '(ascii
119     control-1
120     latin-iso8859-1
121     latin-iso8859-2
122     latin-iso8859-3
123     latin-iso8859-4
124     latin-iso8859-9
125     latin-jisx0201
126     cyrillic-iso8859-5
127     greek-iso8859-7
128     thai-tis620
129     =jis-x0208
130     =jis-x0208@1978
131     =jis-x0208@1983
132     japanese-jisx0212
133     chinese-gb2312
134     chinese-cns11643-1
135     chinese-cns11643-2
136     chinese-cns11643-3
137     chinese-cns11643-4
138     chinese-cns11643-5
139     chinese-cns11643-6
140     chinese-cns11643-7
141     =jis-x0208@1990
142     =jis-x0213-1-2000
143     =jis-x0213-2-2000
144     korean-ksc5601
145     chinese-isoir165
146     katakana-jisx0201
147     hebrew-iso8859-8
148     chinese-gb12345
149     latin-viscii
150     ethiopic-ucs
151     =big5-cdp
152     =gt
153     ideograph-daikanwa-2
154     ideograph-daikanwa
155     =cbeta
156     ideograph-hanziku-1
157     ideograph-hanziku-2
158     ideograph-hanziku-3
159     ideograph-hanziku-4
160     ideograph-hanziku-5
161     ideograph-hanziku-6
162     ideograph-hanziku-7
163     ideograph-hanziku-8
164     ideograph-hanziku-9
165     ideograph-hanziku-10
166     ideograph-hanziku-11
167     ideograph-hanziku-12
168     =gt-k
169     =ucs@iso
170     =ucs@unicode
171     =big5
172     =big5-eten
173     =jis-x0208@1997
174     =jef-china3))
175
176 (defun char-db-make-char-spec (char)
177   (let (ret char-spec)
178     (cond ((characterp char)
179            (cond ((and (setq ret (encode-char char '=ucs 'defined-only))
180                        (not (and (<= #xE000 ret)(<= ret #xF8FF))))
181                   (setq char-spec (list (cons '=ucs ret)))
182                   (cond ((setq ret (get-char-attribute char 'name))
183                          (setq char-spec (cons (cons 'name ret) char-spec))
184                          )
185                         ((setq ret (get-char-attribute char 'name*))
186                          (setq char-spec (cons (cons 'name* ret) char-spec))
187                          ))
188                   )
189                  ((setq ret
190                         (catch 'tag
191                           (let ((rest char-db-coded-charset-priority-list)
192                                 ccs)
193                             (while rest
194                               (setq ccs (charset-name
195                                          (find-charset (car rest))))
196                               (if (setq ret
197                                         (encode-char char ccs
198                                                      'defined-only))
199                                   (throw 'tag (cons ccs ret)))
200                               (setq rest (cdr rest))))))
201                   (setq char-spec (list ret))
202                   (dolist (ccs (delq (car ret) (charset-list)))
203                     (if (and (or (charset-iso-final-char ccs)
204                                  (memq ccs
205                                        '(=daikanwa
206                                          =daikanwa@rev2
207                                          ;; =gt-k
208                                          )))
209                              (setq ccs (charset-name ccs))
210                              (null (assq ccs char-spec))
211                              (setq ret (encode-char char ccs 'defined-only)))
212                         (setq char-spec (cons (cons ccs ret) char-spec))))
213                   (if (null char-spec)
214                       (setq char-spec (split-char char)))
215                   (cond ((setq ret (get-char-attribute char 'name))
216                          (setq char-spec (cons (cons 'name ret) char-spec))
217                          )
218                         ((setq ret (get-char-attribute char 'name*))
219                          (setq char-spec (cons (cons 'name* ret) char-spec))
220                          ))
221                   ))
222            char-spec)
223           ((consp char)
224            char))))
225     
226 (defun char-db-insert-char-spec (char &optional readable column
227                                       required-features)
228   (unless column
229     (setq column (current-column)))
230   (let (char-spec temp-char)
231     (setq char-spec (char-db-make-char-spec char))
232     (unless (or (characterp char) ; char
233                 (condition-case nil
234                     (setq char (find-char char-spec))
235                   (error nil)))
236       ;; define temporary character
237       ;;   Current implementation is dirty.
238       (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
239                                          char-spec)))
240       (remove-char-attribute temp-char 'ideograph-daikanwa)
241       (setq char temp-char))
242     (insert-char-attributes char
243                             readable
244                             (union (mapcar #'car char-spec)
245                                    required-features))
246     (when temp-char
247       ;; undefine temporary character
248       ;;   Current implementation is dirty.
249       (setq char-spec (char-attribute-alist temp-char))
250       (while char-spec
251         (remove-char-attribute temp-char (car (car char-spec)))
252         (setq char-spec (cdr char-spec))))))
253
254 (defun char-db-insert-alist (alist &optional readable column)
255   (unless column
256     (setq column (current-column)))
257   (let ((line-breaking
258          (concat "\n" (make-string (1+ column) ?\ )))
259         name value
260         ret al ; cal
261         key
262         lbs cell rest separator)
263     (insert "(")
264     (while alist
265       (setq name (car (car alist))
266             value (cdr (car alist)))
267       (cond ((eq name 'char)
268              (insert "(char . ")
269              (if (setq ret (condition-case nil
270                                (find-char value)
271                              (error nil)))
272                  (progn
273                    (setq al nil
274                          ;; cal nil
275                          )
276                    (while value
277                      (setq key (car (car value)))
278                      ;; (if (find-charset key)
279                      ;;     (setq cal (cons key cal))
280                      (setq al (cons key al))
281                      ;; )
282                      (setq value (cdr value)))
283                    (insert-char-attributes ret
284                                            readable
285                                            (or al 'none) ; cal
286                                            ))
287                (insert (prin1-to-string value)))
288              (insert ")")
289              (insert line-breaking))
290             ((consp value)
291              (insert (format "(%-18s " name))
292              (setq lbs (concat "\n" (make-string (current-column) ?\ )))
293              (while (consp value)
294                (setq cell (car value))
295                (if (and (consp cell)
296                         (consp (car cell))
297                         (setq ret (condition-case nil
298                                       (find-char cell)
299                                     (error nil)))
300                         )
301                    (progn
302                      (setq rest cell
303                            al nil
304                            ;; cal nil
305                            )
306                      (while rest
307                        (setq key (car (car rest)))
308                        ;; (if (find-charset key)
309                        ;;     (setq cal (cons key cal))
310                        (setq al (cons key al))
311                        ;; )
312                        (setq rest (cdr rest)))
313                      (if separator
314                          (insert lbs))
315                      (insert-char-attributes ret
316                                              readable
317                                              al ; cal
318                                              )
319                      (setq separator lbs))
320                  (if separator
321                      (insert separator))
322                  (insert (prin1-to-string cell))
323                  (setq separator " "))
324                (setq value (cdr value)))
325              (insert ")")
326              (insert line-breaking))
327             (t
328              (insert (format "(%-18s . %S)%s"
329                              name value
330                              line-breaking))))
331       (setq alist (cdr alist))))
332   (insert ")"))
333
334 (defun char-db-insert-char-reference (plist &optional readable column)
335   (unless column
336     (setq column (current-column)))
337   (let ((line-breaking
338          (concat "\n" (make-string (1+ column) ?\ )))
339         (separator "")
340         name value)
341     (insert "(")
342     (while plist
343       (setq name (pop plist))
344       (setq value (pop plist))
345       (cond ((eq name :char)
346              (insert separator)
347              (insert ":char\t")
348              (cond ((numberp value)
349                     (setq value (decode-char '=ucs value)))
350                    ;; ((consp value)
351                    ;;  (setq value (or (find-char value)
352                    ;;                  value)))
353                    )
354              (char-db-insert-char-spec value readable)
355              (insert line-breaking)
356              (setq separator ""))
357             ((eq name :radical)
358              (insert (format "%s%s\t%d ; %c%s"
359                              separator
360                              name value
361                              (aref ideographic-radicals value)
362                              line-breaking))
363              (setq separator ""))
364             (t
365              (insert (format "%s%s\t%S" separator name value))
366              (setq separator line-breaking)))
367       ))
368   (insert ")"))
369
370 (defun char-db-decode-isolated-char (ccs code-point)
371   (let (ret)
372     (setq ret
373           (cond ((eq ccs 'arabic-iso8859-6)
374                  (decode-char ccs code-point))
375                 ((and (memq ccs '(=gt-pj-1
376                                   =gt-pj-2
377                                   =gt-pj-3
378                                   =gt-pj-4
379                                   =gt-pj-5
380                                   =gt-pj-6
381                                   =gt-pj-7
382                                   =gt-pj-8
383                                   =gt-pj-9
384                                   =gt-pj-10
385                                   =gt-pj-11))
386                       (setq ret (decode-char ccs code-point))
387                       (setq ret (encode-char ret '=gt 'defined-only)))
388                  (decode-builtin-char '=gt ret))
389                 (t
390                  (decode-builtin-char ccs code-point))))
391     (cond ((and (<= 0 (char-int ret))
392                 (<= (char-int ret) #x1F))
393            (decode-char '=ucs (+ #x2400 (char-int ret))))
394           ((= (char-int ret) #x7F)
395            ?\u2421)
396           (t ret))))
397
398 (defvar char-db-convert-obsolete-format t)
399
400 (defun char-db-insert-ccs-feature (name value line-breaking)
401   (insert
402    (format
403     (cond ((memq name '(=daikanwa
404                         =daikanwa@rev1 =daikanwa@rev2
405                         =gt =gt-k =cbeta))
406            "(%-18s . %05d)\t; %c")
407           ((eq name 'mojikyo)
408            "(%-18s . %06d)\t; %c")
409           ((>= (charset-dimension name) 2)
410            "(%-18s . #x%04X)\t; %c")
411           (t
412            "(%-18s . #x%02X)\t; %c"))
413     name
414     (if (= (charset-iso-graphic-plane name) 1)
415         (logior value
416                 (cond ((= (charset-dimension name) 1)
417                        #x80)
418                       ((= (charset-dimension name) 2)
419                        #x8080)
420                       ((= (charset-dimension name) 3)
421                        #x808080)
422                       (t 0)))
423       value)
424     (char-db-decode-isolated-char name value)))
425   (if (and (= (charset-chars name) 94)
426            (= (charset-dimension name) 2))
427       (insert (format " [%02d-%02d]"
428                       (- (lsh value -8) 32)
429                       (- (logand value 255) 32))))
430   (insert line-breaking))
431
432 (defun insert-char-attributes (char &optional readable attributes column)
433   (unless column
434     (setq column (current-column)))
435   (let (name value ; has-long-ccs-name
436         rest
437         radical strokes
438         (line-breaking
439          (concat "\n" (make-string (1+ column) ?\ )))
440         lbs cell separator ret
441         key al cal
442         dest-ccss
443         sources required-features
444         ccss)
445     (let (atr-d)
446       (setq attributes
447             (sort (if attributes
448                       (if (consp attributes)
449                           (progn
450                             (dolist (name attributes)
451                               (unless (memq name char-db-ignored-attributes)
452                                 (if (find-charset name)
453                                     (push name ccss))
454                                 (push name atr-d)))
455                             atr-d))
456                     (dolist (name (char-attribute-list))
457                       (unless (memq name char-db-ignored-attributes)
458                         (if (find-charset name)
459                             (push name ccss))
460                         (push name atr-d)))
461                     atr-d)
462                   #'char-attribute-name<)))
463     (insert "(")
464     (when (and (memq 'name attributes)
465                (setq value (get-char-attribute char 'name)))
466       (insert (format
467                (if (> (+ (current-column) (length value)) 48)
468                    "(name . %S)%s"
469                  "(name               . %S)%s")
470                value line-breaking))
471       (setq attributes (delq 'name attributes))
472       )
473     (when (and (memq 'name* attributes)
474                (setq value (get-char-attribute char 'name*)))
475       (insert (format
476                (if (> (+ (current-column) (length value)) 48)
477                    "(name* . %S)%s"
478                  "(name*              . %S)%s")
479                value line-breaking))
480       (setq attributes (delq 'name* attributes))
481       )
482     (when (and (memq 'script attributes)
483                (setq value (get-char-attribute char 'script)))
484       (insert (format "(script\t\t%s)%s"
485                       (mapconcat (function prin1-to-string)
486                                  value " ")
487                       line-breaking))
488       (setq attributes (delq 'script attributes))
489       )
490     ;; (when (and (memq '<-denotational attributes)
491     ;;            (setq value (get-char-attribute char '<-denotational))
492     ;;            (null (cdr value))
493     ;;            (setq value (encode-char (car value) 'ucs 'defined-only)))
494     ;;   (insert (format "(%-18s . #x%04X)\t; %c%s"
495     ;;                   '=>ucs value (decode-char 'ucs value)
496     ;;                   line-breaking))
497     ;;   (setq attributes (delq '<-denotational attributes)))
498     (dolist (name '(=>ucs =>ucs*))
499       (when (and (memq name attributes)
500                  (setq value (get-char-attribute char name)))
501         (insert (format "(%-18s . #x%04X)\t; %c%s"
502                         name value (decode-char '=ucs value)
503                         line-breaking))
504         (setq attributes (delq name attributes))))
505     (dolist (name '(=>ucs@gb =>ucs@cns =>ucs@jis =>ucs@ks =>ucs@big5))
506       (when (and (memq name attributes)
507                  (setq value (get-char-attribute char name)))
508         (insert (format "(%-18s . #x%04X)\t; %c%s"
509                         name value
510                         (decode-char (intern
511                                       (concat "="
512                                               (substring
513                                                (symbol-name name) 2)))
514                                      value)
515                         line-breaking))
516         (setq attributes (delq name attributes))
517         ))
518     (dolist (name '(=>daikanwa))
519       (when (and (memq name attributes)
520                  (setq value (get-char-attribute char name)))
521         (insert
522          (if (integerp value)
523              (format "(%-18s . %05d)\t; %c%s"
524                      name value (decode-char '=daikanwa value)
525                      line-breaking)
526            (format "(%-18s %s)\t; %c%s"
527                    name
528                    (mapconcat (function prin1-to-string)
529                               value " ")
530                    (char-representative-of-daikanwa char)
531                    line-breaking)))
532         (setq attributes (delq name attributes))))
533     (when (and (memq 'general-category attributes)
534                (setq value (get-char-attribute char 'general-category)))
535       (insert (format
536                "(general-category\t%s) ; %s%s"
537                (mapconcat (lambda (cell)
538                             (format "%S" cell))
539                           value " ")
540                (cond ((rassoc value unidata-normative-category-alist)
541                       "Normative Category")
542                      ((rassoc value unidata-informative-category-alist)
543                       "Informative Category")
544                      (t
545                       "Unknown Category"))
546                line-breaking))
547       (setq attributes (delq 'general-category attributes))
548       )
549     (when (and (memq 'bidi-category attributes)
550                (setq value (get-char-attribute char 'bidi-category)))
551       (insert (format "(bidi-category\t. %S)%s"
552                       value
553                       line-breaking))
554       (setq attributes (delq 'bidi-category attributes))
555       )
556     (unless (or (not (memq 'mirrored attributes))
557                 (eq (setq value (get-char-attribute char 'mirrored 'empty))
558                     'empty))
559       (insert (format "(mirrored\t\t. %S)%s"
560                       value
561                       line-breaking))
562       (setq attributes (delq 'mirrored attributes))
563       )
564     (cond
565      ((and (memq 'decimal-digit-value attributes)
566            (setq value (get-char-attribute char 'decimal-digit-value)))
567       (insert (format "(decimal-digit-value . %S)%s"
568                       value
569                       line-breaking))
570       (setq attributes (delq 'decimal-digit-value attributes))
571       (when (and (memq 'digit-value attributes)
572                  (setq value (get-char-attribute char 'digit-value)))
573         (insert (format "(digit-value\t . %S)%s"
574                         value
575                         line-breaking))
576         (setq attributes (delq 'digit-value attributes))
577         )
578       (when (and (memq 'numeric-value attributes)
579                  (setq value (get-char-attribute char 'numeric-value)))
580         (insert (format "(numeric-value\t . %S)%s"
581                         value
582                         line-breaking))
583         (setq attributes (delq 'numeric-value attributes))
584         )
585       )
586      (t
587       (when (and (memq 'digit-value attributes)
588                  (setq value (get-char-attribute char 'digit-value)))
589         (insert (format "(digit-value\t. %S)%s"
590                         value
591                         line-breaking))
592         (setq attributes (delq 'digit-value attributes))
593         )
594       (when (and (memq 'numeric-value attributes)
595                  (setq value (get-char-attribute char 'numeric-value)))
596         (insert (format "(numeric-value\t. %S)%s"
597                         value
598                         line-breaking))
599         (setq attributes (delq 'numeric-value attributes))
600         )))
601     (when (and (memq 'iso-10646-comment attributes)
602                (setq value (get-char-attribute char 'iso-10646-comment)))
603       (insert (format "(iso-10646-comment\t. %S)%s"
604                       value
605                       line-breaking))
606       (setq attributes (delq 'iso-10646-comment attributes))
607       )
608     (when (and (memq 'morohashi-daikanwa attributes)
609                (setq value (get-char-attribute char 'morohashi-daikanwa)))
610       (insert (format "(morohashi-daikanwa\t%s)%s"
611                       (mapconcat (function prin1-to-string) value " ")
612                       line-breaking))
613       (setq attributes (delq 'morohashi-daikanwa attributes))
614       )
615     (setq radical nil
616           strokes nil)
617     (when (and (memq 'ideographic-radical attributes)
618                (setq value (get-char-attribute char 'ideographic-radical)))
619       (setq radical value)
620       (insert (format "(ideographic-radical . %S)\t; %c%s"
621                       radical
622                       (aref ideographic-radicals radical)
623                       line-breaking))
624       (setq attributes (delq 'ideographic-radical attributes))
625       )
626     (let (key)
627       (dolist (domain
628                (append
629                 char-db-feature-domains
630                 (let (dest domain)
631                   (dolist (feature (char-attribute-list))
632                     (setq feature (symbol-name feature))
633                     (when (string-match
634                            "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
635                            feature)
636                       (setq domain (intern (match-string 2 feature)))
637                      (unless (memq domain dest)
638                        (setq dest (cons domain dest)))))
639                   (sort dest #'string<))))
640         (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
641         (when (and (memq key attributes)
642                    (setq value (get-char-attribute char key)))
643           (setq radical value)
644           (insert (format "(%s . %S)\t; %c%s"
645                           key
646                           radical
647                           (aref ideographic-radicals radical)
648                           line-breaking))
649           (setq attributes (delq key attributes))
650           )
651         (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
652         (when (and (memq key attributes)
653                    (setq value (get-char-attribute char key)))
654           (setq strokes value)
655           (insert (format "(%s . %S)%s"
656                           key
657                           strokes
658                           line-breaking))
659           (setq attributes (delq key attributes))
660           )
661         (setq key (intern (format "%s@%s" 'total-strokes domain)))
662         (when (and (memq key attributes)
663                    (setq value (get-char-attribute char key)))
664           (insert (format "(%s       . %S)%s"
665                           key
666                           value
667                           line-breaking))
668           (setq attributes (delq key attributes))
669           )
670         (dolist (feature '(ideographic-radical
671                            ideographic-strokes
672                            total-strokes))
673           (setq key (intern (format "%s@%s*sources" feature domain)))
674           (when (and (memq key attributes)
675                      (setq value (get-char-attribute char key)))
676             (insert (format "(%s%s" key line-breaking))
677             (dolist (cell value)
678               (insert (format " %s" cell)))
679             (insert ")")
680             (insert line-breaking)
681             (setq attributes (delq key attributes))
682             ))
683         ))
684     (when (and (memq 'ideographic-strokes attributes)
685                (setq value (get-char-attribute char 'ideographic-strokes)))
686       (setq strokes value)
687       (insert (format "(ideographic-strokes . %S)%s"
688                       strokes
689                       line-breaking))
690       (setq attributes (delq 'ideographic-strokes attributes))
691       )
692     (when (and (memq 'kangxi-radical attributes)
693                (setq value (get-char-attribute char 'kangxi-radical)))
694       (unless (eq value radical)
695         (insert (format "(kangxi-radical\t . %S)\t; %c%s"
696                         value
697                         (aref ideographic-radicals value)
698                         line-breaking))
699         (or radical
700             (setq radical value)))
701       (setq attributes (delq 'kangxi-radical attributes))
702       )
703     (when (and (memq 'kangxi-strokes attributes)
704                (setq value (get-char-attribute char 'kangxi-strokes)))
705       (unless (eq value strokes)
706         (insert (format "(kangxi-strokes\t . %S)%s"
707                         value
708                         line-breaking))
709         (or strokes
710             (setq strokes value)))
711       (setq attributes (delq 'kangxi-strokes attributes))
712       )
713     (when (and (memq 'japanese-radical attributes)
714                (setq value (get-char-attribute char 'japanese-radical)))
715       (unless (eq value radical)
716         (insert (format "(japanese-radical\t . %S)\t; %c%s"
717                         value
718                         (aref ideographic-radicals value)
719                         line-breaking))
720         (or radical
721             (setq radical value)))
722       (setq attributes (delq 'japanese-radical attributes))
723       )
724     (when (and (memq 'japanese-strokes attributes)
725                (setq value (get-char-attribute char 'japanese-strokes)))
726       (unless (eq value strokes)
727         (insert (format "(japanese-strokes\t . %S)%s"
728                         value
729                         line-breaking))
730         (or strokes
731             (setq strokes value)))
732       (setq attributes (delq 'japanese-strokes attributes))
733       )
734     (when (and (memq 'cns-radical attributes)
735                (setq value (get-char-attribute char 'cns-radical)))
736       (insert (format "(cns-radical\t . %S)\t; %c%s"
737                       value
738                       (aref ideographic-radicals value)
739                       line-breaking))
740       (setq attributes (delq 'cns-radical attributes))
741       )
742     (when (and (memq 'cns-strokes attributes)
743                (setq value (get-char-attribute char 'cns-strokes)))
744       (unless (eq value strokes)
745         (insert (format "(cns-strokes\t . %S)%s"
746                         value
747                         line-breaking))
748         (or strokes
749             (setq strokes value)))
750       (setq attributes (delq 'cns-strokes attributes))
751       )
752     (when (and (memq 'shinjigen-1-radical attributes)
753                (setq value (get-char-attribute char 'shinjigen-1-radical)))
754       (unless (eq value radical)
755         (insert (format "(shinjigen-1-radical . %S)\t; %c%s"
756                         value
757                         (aref ideographic-radicals value)
758                         line-breaking))
759         (or radical
760             (setq radical value)))
761       (setq attributes (delq 'shinjigen-1-radical attributes))
762       )
763     (when (and (memq 'ideographic- attributes)
764                (setq value (get-char-attribute char 'ideographic-)))
765       (insert "(ideographic-       ")
766       (setq lbs (concat "\n" (make-string (current-column) ?\ ))
767             separator nil)
768       (while (consp value)
769         (setq cell (car value))
770         (if (integerp cell)
771             (setq cell (decode-char '=ucs cell)))
772         (cond ((characterp cell)
773                (if separator
774                    (insert lbs))
775                (if readable
776                    (insert (format "%S" cell))
777                  (char-db-insert-char-spec cell readable))
778                (setq separator lbs))
779               ((consp cell)
780                (if separator
781                    (insert lbs))
782                (if (consp (car cell))
783                    (char-db-insert-char-spec cell readable)
784                  (char-db-insert-char-reference cell readable))
785                (setq separator lbs))
786               (t
787                (if separator
788                    (insert separator))
789                (insert (prin1-to-string cell))
790                (setq separator " ")))
791         (setq value (cdr value)))
792       (insert ")")
793       (insert line-breaking)
794       (setq attributes (delq 'ideographic- attributes)))
795     (when (and (memq 'total-strokes attributes)
796                (setq value (get-char-attribute char 'total-strokes)))
797       (insert (format "(total-strokes       . %S)%s"
798                       value
799                       line-breaking))
800       (setq attributes (delq 'total-strokes attributes))
801       )
802     (when (and (memq '->ideograph attributes)
803                (setq value (get-char-attribute char '->ideograph)))
804       (insert (format "(->ideograph\t%s)%s"
805                       (mapconcat (lambda (code)
806                                    (cond ((symbolp code)
807                                           (symbol-name code))
808                                          ((integerp code)
809                                           (format "#x%04X" code))
810                                          (t
811                                           (format "%s %S"
812                                                   line-breaking code))))
813                                  value " ")
814                       line-breaking))
815       (setq attributes (delq '->ideograph attributes))
816       )
817     (when (and (memq '->decomposition attributes)
818                (setq value (get-char-attribute char '->decomposition)))
819       (insert (format "(->decomposition\t%s)%s"
820                       (mapconcat (lambda (code)
821                                    (cond ((symbolp code)
822                                           (symbol-name code))
823                                          ((characterp code)
824                                           (if readable
825                                               (format "%S" code)
826                                             (format "#x%04X"
827                                                     (char-int code))
828                                             ))
829                                          ((integerp code)
830                                           (format "#x%04X" code))
831                                          (t
832                                           (format "%s%S" line-breaking code))))
833                                  value " ")
834                       line-breaking))
835       (setq attributes (delq '->decomposition attributes))
836       )
837     (if (equal (get-char-attribute char '->titlecase)
838                (get-char-attribute char '->uppercase))
839         (setq attributes (delq '->titlecase attributes)))
840     (when (and (memq '->mojikyo attributes)
841                (setq value (get-char-attribute char '->mojikyo)))
842       (insert (format "(->mojikyo\t\t. %06d)\t; %c%s"
843                       value (decode-char 'mojikyo value)
844                       line-breaking))
845       (setq attributes (delq '->mojikyo attributes))
846       )
847     (when (and (memq 'hanyu-dazidian-vol attributes)
848                (setq value (get-char-attribute char 'hanyu-dazidian-vol)))
849       (insert (format "(hanyu-dazidian-vol  . %d)%s"
850                       value line-breaking))
851       (setq attributes (delq 'hanyu-dazidian-vol attributes))
852       )
853     (when (and (memq 'hanyu-dazidian-page attributes)
854                (setq value (get-char-attribute char 'hanyu-dazidian-page)))
855       (insert (format "(hanyu-dazidian-page . %d)%s"
856                       value line-breaking))
857       (setq attributes (delq 'hanyu-dazidian-page attributes))
858       )
859     (when (and (memq 'hanyu-dazidian-char attributes)
860                (setq value (get-char-attribute char 'hanyu-dazidian-char)))
861       (insert (format "(hanyu-dazidian-char . %d)%s"
862                       value line-breaking))
863       (setq attributes (delq 'hanyu-dazidian-char attributes))
864       )
865     (unless readable
866       (dolist (ignored '(composition
867                          ->denotational <-subsumptive ->ucs-unified
868                          ->ideographic-component-forms))
869         (setq attributes (delq ignored attributes))))
870     (while attributes
871       (setq name (car attributes))
872       (if (setq value (get-char-attribute char name))
873           (cond ((setq ret (find-charset name))
874                  (setq name (charset-name ret))
875                  (if (and (not (memq name dest-ccss))
876                           (prog1
877                               (setq value (get-char-attribute char name))
878                             (setq dest-ccss (cons name dest-ccss))))
879                      (char-db-insert-ccs-feature name value line-breaking))
880                  )
881                 ((string-match "^=>ucs@" (symbol-name name))
882                  (insert (format "(%-18s . #x%04X)\t; %c%s"
883                                  name value (decode-char '=ucs value)
884                                  line-breaking))
885                  )
886                 ((eq name 'jisx0208-1978/4X)
887                  (insert (format "(%-18s . #x%04X)%s"
888                                  name value
889                                  line-breaking))
890                  )
891                 ((and (not readable)
892                       (null (get-char-attribute
893                              char
894                              (intern (format "%s*sources" name))))
895                       (not (string-match "\\*sources$" (symbol-name name)))
896                       (or (eq name '<-identical)
897                           (string-match "^->halfwidth" (symbol-name name))
898                           (and
899                            (string-match "^->fullwidth" (symbol-name name))
900                            (not
901                             (and (consp value)
902                                  (characterp (car value))
903                                  (encode-char
904                                   (car value) '=ucs 'defined-only))))
905                           (string-match "^->simplified" (symbol-name name))
906                           (string-match "^->vulgar" (symbol-name name))
907                           (string-match "^->wrong" (symbol-name name))
908                           (string-match "^->same" (symbol-name name))
909                           (string-match "^->original" (symbol-name name))
910                           (string-match "^->ancient" (symbol-name name))
911                           ))
912                  )
913                 ((or (eq name 'ideographic-structure)
914                      (eq name 'ideographic-)
915                      (string-match "^\\(->\\|<-\\)" (symbol-name name)))
916                  (insert (format "(%-18s%s " name line-breaking))
917                  (setq lbs (concat "\n" (make-string (current-column) ?\ ))
918                        separator nil)
919                  (while (consp value)
920                    (setq cell (car value))
921                    (if (integerp cell)
922                        (setq cell (decode-char '=ucs cell)))
923                    (cond ((eq name '->subsumptive)
924                           (when (or (not
925                                      (some (lambda (atr)
926                                              (get-char-attribute cell atr))
927                                            char-db-ignored-attributes))
928                                     (some (lambda (ccs)
929                                             (encode-char cell ccs
930                                                          'defined-only))
931                                           ccss))
932                             (if separator
933                                 (insert lbs))
934                             (let ((char-db-ignored-attributes
935                                    (cons '<-subsumptive
936                                          char-db-ignored-attributes)))
937                               (insert-char-attributes cell readable))
938                             (setq separator lbs))
939                           )
940                          ((characterp cell)
941                           (setq sources
942                                 (get-char-attribute
943                                  char
944                                  (intern (format "%s*sources" name))))
945                           (setq required-features nil)
946                           (dolist (source sources)
947                             (cond
948                              ((memq source '(JP JP/Jouyou
949                                                 shinjigen-1))
950                               (setq required-features
951                                     (union required-features
952                                            '(=jis-x0208
953                                              =jis-x0208@1990
954                                              =jis-x0213-1-2000
955                                              =jis-x0213-2-2000
956                                              =jis-x0212
957                                              =jis-x0208@1983
958                                              =jis-x0208@1978))))
959                              ((eq source 'CN)
960                               (setq required-features
961                                     (union required-features
962                                            '(=gb2312
963                                              =gb12345
964                                              =iso-ir165)))))
965                             (cond
966                              ((find-charset
967                                (setq ret (intern (format "=%s" source))))
968                               (setq required-features
969                                     (cons ret required-features)))
970                              (t (setq required-features
971                                       (cons source required-features)))))
972                           (cond ((string-match "@JP" (symbol-name name))
973                                  (setq required-features
974                                        (union required-features
975                                               '(=jis-x0208
976                                                 =jis-x0208@1990
977                                                 =jis-x0213-1-2000
978                                                 =jis-x0213-2-2000
979                                                 =jis-x0212
980                                                 =jis-x0208@1983
981                                                 =jis-x0208@1978))))
982                                 ((string-match "@CN" (symbol-name name))
983                                  (setq required-features
984                                        (union required-features
985                                               '(=gb2312
986                                                 =gb12345
987                                                 =iso-ir165)))))
988                           (if separator
989                               (insert lbs))
990                           (if readable
991                               (insert (format "%S" cell))
992                             (char-db-insert-char-spec cell readable
993                                                       nil
994                                                       required-features))
995                           (setq separator lbs))
996                          ((consp cell)
997                           (if separator
998                               (insert lbs))
999                           (if (consp (car cell))
1000                               (char-db-insert-char-spec cell readable)
1001                             (char-db-insert-char-reference cell readable))
1002                           (setq separator lbs))
1003                          (t
1004                           (if separator
1005                               (insert separator))
1006                           (insert (prin1-to-string cell))
1007                           (setq separator " ")))
1008                    (setq value (cdr value)))
1009                  (insert ")")
1010                  (insert line-breaking))
1011                 ((memq name '(ideograph=
1012                               original-ideograph-of
1013                               ancient-ideograph-of
1014                               vulgar-ideograph-of
1015                               wrong-ideograph-of
1016                               ;; simplified-ideograph-of
1017                               ideographic-variants
1018                               ;; ideographic-different-form-of
1019                               ))
1020                  (insert (format "(%-18s%s " name line-breaking))
1021                  (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1022                        separator nil)
1023                  (while (consp value)
1024                    (setq cell (car value))
1025                    (if (and (consp cell)
1026                             (consp (car cell)))
1027                        (progn
1028                          (if separator
1029                              (insert lbs))
1030                          (char-db-insert-alist cell readable)
1031                          (setq separator lbs))
1032                      (if separator
1033                          (insert separator))
1034                      (insert (prin1-to-string cell))
1035                      (setq separator " "))
1036                    (setq value (cdr value)))
1037                  (insert ")")
1038                  (insert line-breaking))
1039                 ((consp value)
1040                  (insert (format "(%-18s " name))
1041                  (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1042                        separator nil)
1043                  (while (consp value)
1044                    (setq cell (car value))
1045                    (if (and (consp cell)
1046                             (consp (car cell))
1047                             (setq ret (condition-case nil
1048                                           (find-char cell)
1049                                         (error nil))))
1050                        (progn
1051                          (setq rest cell
1052                                al nil
1053                                cal nil)
1054                          (while rest
1055                            (setq key (car (car rest)))
1056                            (if (find-charset key)
1057                                (setq cal (cons key cal))
1058                              (setq al (cons key al)))
1059                            (setq rest (cdr rest)))
1060                          (if separator
1061                              (insert lbs))
1062                          (insert-char-attributes ret
1063                                                  readable
1064                                                  al cal)
1065                          (setq separator lbs))
1066                      (setq ret (prin1-to-string cell))
1067                      (if separator
1068                          (if (< (+ (current-column)
1069                                    (length ret)
1070                                    (length separator))
1071                                 76)
1072                              (insert separator)
1073                            (insert lbs)))
1074                      (insert ret)
1075                      (setq separator " "))
1076                    (setq value (cdr value)))
1077                  (insert ")")
1078                  (insert line-breaking))
1079                 (t
1080                  (insert (format "(%-18s . %S)%s"
1081                                  name value
1082                                  line-breaking)))
1083                 ))
1084       (setq attributes (cdr attributes)))
1085     (insert ")")))
1086
1087 (defun insert-char-data (char &optional readable
1088                               attributes)
1089   (save-restriction
1090     (narrow-to-region (point)(point))
1091     (insert "(define-char
1092   '")
1093     (insert-char-attributes char readable attributes)
1094     (insert ")\n")
1095     (goto-char (point-min))
1096     (while (re-search-forward "[ \t]+$" nil t)
1097       (replace-match ""))
1098     ;; from tabify.
1099     (goto-char (point-min))
1100     (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
1101       (let ((column (current-column))
1102             (indent-tabs-mode t))
1103         (delete-region (match-beginning 0) (point))
1104         (indent-to column)))
1105     (goto-char (point-max))
1106     ;; (tabify (point-min)(point-max))
1107     ))
1108
1109 (defun insert-char-data-with-variant (char &optional printable
1110                                            no-ucs-unified
1111                                            script excluded-script)
1112   (insert-char-data char printable)
1113   (let ((variants (char-variants char))
1114         rest
1115         variant vs ret)
1116     (setq variants (sort variants #'<))
1117     (setq rest variants)
1118     (setq variants (cons char variants))
1119     (while rest
1120       (setq variant (car rest))
1121       (unless (get-char-attribute variant '<-subsumptive)
1122         (if (and (or (null script)
1123                      (null (setq vs (get-char-attribute variant 'script)))
1124                      (memq script vs))
1125                  (or (null excluded-script)
1126                      (null (setq vs (get-char-attribute variant 'script)))
1127                      (not (memq excluded-script vs))))
1128             (unless (and no-ucs-unified (get-char-attribute variant '=ucs))
1129               (insert-char-data variant printable)
1130               (if (setq ret (char-variants variant))
1131                   (while ret
1132                     (or (memq (car ret) variants)
1133                         ;; (get-char-attribute (car ret) '<-subsumptive)
1134                         (setq rest (nconc rest (list (car ret)))))
1135                     (setq ret (cdr ret)))))))
1136       (setq rest (cdr rest)))))
1137
1138 (defun insert-char-range-data (min max &optional script excluded-script)
1139   (let ((code min)
1140         char)
1141     (while (<= code max)
1142       (setq char (decode-char '=ucs code))
1143       (if (encode-char char '=ucs 'defined-only)
1144           (insert-char-data-with-variant char nil 'no-ucs-unified
1145                                          script excluded-script))
1146       (setq code (1+ code)))))
1147
1148 (defun write-char-range-data-to-file (min max file
1149                                           &optional script excluded-script)
1150   (let ((coding-system-for-write char-db-file-coding-system))
1151     (with-temp-buffer
1152       (insert (format ";; -*- coding: %s -*-\n"
1153                       char-db-file-coding-system))
1154       (insert-char-range-data min max script excluded-script)
1155       (write-region (point-min)(point-max) file))))
1156
1157 (defvar what-character-original-window-configuration)
1158
1159 ;;;###autoload
1160 (defun what-char-definition (char)
1161   (interactive (list (char-after)))
1162   (let ((buf (get-buffer-create "*Character Description*"))
1163         (the-buf (current-buffer))
1164         (win-conf (current-window-configuration)))
1165     (pop-to-buffer buf)
1166     (make-local-variable 'what-character-original-window-configuration)
1167     (setq what-character-original-window-configuration win-conf)
1168     (setq buffer-read-only nil)
1169     (erase-buffer)
1170     (condition-case err
1171         (progn
1172           (insert-char-data-with-variant char 'printable)
1173           (unless (char-attribute-alist char)
1174             (insert (format ";; = %c\n"
1175                             (let* ((rest (split-char char))
1176                                    (ccs (pop rest))
1177                                    (code (pop rest)))
1178                               (while rest
1179                                 (setq code (logior (lsh code 8)
1180                                                    (pop rest))))
1181                               (decode-char ccs code)))))
1182           ;; (char-db-update-comment)
1183           (set-buffer-modified-p nil)
1184           (view-mode the-buf (lambda (buf)
1185                                (set-window-configuration
1186                                 what-character-original-window-configuration)
1187                                ))
1188           (goto-char (point-min)))
1189       (error (progn
1190                (set-window-configuration
1191                 what-character-original-window-configuration)
1192                (signal (car err) (cdr err)))))))
1193
1194 (provide 'char-db-util)
1195
1196 ;;; char-db-util.el ends here