(insert-char-attributes): Don't display `<-identical' when running
[chise/xemacs-chise.git-] / lisp / utf-2000 / char-db-util.el
1 ;;; char-db-util.el --- Character Database utility
2
3 ;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004 MORIOKA Tomohiko.
4
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, Unicode, UCS-4, MULE.
7
8 ;; This file is part of XEmacs CHISE.
9
10 ;; XEmacs CHISE is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
14
15 ;; XEmacs CHISE is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 ;; General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;;; Code:
26
27 (require 'alist)
28
29 (defconst unidata-normative-category-alist
30   '(("Lu" letter        uppercase)
31     ("Ll" letter        lowercase)
32     ("Lt" letter        titlecase)
33     ("Mn" mark          non-spacing)
34     ("Mc" mark          spacing-combining)
35     ("Me" mark          enclosing)
36     ("Nd" number        decimal-digit)
37     ("Nl" number        letter)
38     ("No" number        other)
39     ("Zs" separator     space)
40     ("Zl" separator     line)
41     ("Zp" separator     paragraph)
42     ("Cc" other         control)
43     ("Cf" other         format)
44     ("Cs" other         surrogate)
45     ("Co" other         private-use)
46     ("Cn" other         not-assigned)))
47
48 (defconst unidata-informative-category-alist
49   '(("Lm" letter        modifier)
50     ("Lo" letter        other)
51     ("Pc" punctuation   connector)
52     ("Pd" punctuation   dash)
53     ("Ps" punctuation   open)
54     ("Pe" punctuation   close)
55     ("Pi" punctuation   initial-quote)
56     ("Pf" punctuation   final-quote)
57     ("Po" punctuation   other)
58     ("Sm" symbol        math)
59     ("Sc" symbol        currency)
60     ("Sk" symbol        modifier)
61     ("So" symbol        other)
62     ))
63
64 (defconst ideographic-radicals
65   (let ((v (make-vector 215 nil))
66         (i 1))
67     (while (< i 215)
68       (aset v i (decode-char '=ucs (+ #x2EFF i)))
69       (setq i (1+ i)))
70     v))
71
72 (defvar char-db-feature-domains
73   '(ucs daikanwa cns gt jis jis/alt jis/a jis/b
74         jis-x0213 misc unknown))
75
76 (defvar char-db-ignored-attributes nil)
77
78 (defun char-attribute-name< (ka kb)
79   (cond
80    ((eq '->denotational kb)
81     t)
82    ((eq '->subsumptive kb)
83     (not (eq '->denotational ka)))
84    ((eq '->denotational ka)
85     nil)
86    ((eq '->subsumptive ka)
87     nil)
88    ((find-charset ka)
89     (if (find-charset kb)
90         (if (<= (charset-id ka) 0)
91             (if (<= (charset-id kb) 0)
92                 (cond
93                  ((= (charset-dimension ka)
94                      (charset-dimension kb))
95                   (> (charset-id ka)(charset-id kb)))
96                  (t
97                   (> (charset-dimension ka)
98                      (charset-dimension kb))
99                   ))
100               t)
101           (if (<= (charset-id kb) 0)
102               nil
103             (< (charset-id ka)(charset-id kb))))
104       nil))
105    ((find-charset kb)
106     t)
107    ((symbolp ka)
108     (cond ((symbolp kb)
109            (string< (symbol-name ka)
110                     (symbol-name kb)))
111           (t)))
112    ((symbolp kb)
113     nil)))
114
115 (defvar char-db-coded-charset-priority-list
116   '(ascii
117     control-1
118     latin-iso8859-1
119     latin-iso8859-2
120     latin-iso8859-3
121     latin-iso8859-4
122     latin-iso8859-9
123     latin-jisx0201
124     cyrillic-iso8859-5
125     greek-iso8859-7
126     thai-tis620
127     =jis-x0208
128     japanese-jisx0208
129     japanese-jisx0212
130     japanese-jisx0208-1978
131     chinese-gb2312
132     chinese-cns11643-1
133     chinese-cns11643-2
134     chinese-cns11643-3
135     chinese-cns11643-4
136     chinese-cns11643-5
137     chinese-cns11643-6
138     chinese-cns11643-7
139     =jis-x0208-1990
140     =jis-x0213-1-2000
141     =jis-x0213-2-2000
142     korean-ksc5601
143     chinese-isoir165
144     katakana-jisx0201
145     hebrew-iso8859-8
146     chinese-gb12345
147     latin-viscii
148     ethiopic-ucs
149     =big5-cdp
150     =gt
151     ideograph-daikanwa-2
152     ideograph-daikanwa
153     =cbeta
154     ideograph-hanziku-1
155     ideograph-hanziku-2
156     ideograph-hanziku-3
157     ideograph-hanziku-4
158     ideograph-hanziku-5
159     ideograph-hanziku-6
160     ideograph-hanziku-7
161     ideograph-hanziku-8
162     ideograph-hanziku-9
163     ideograph-hanziku-10
164     ideograph-hanziku-11
165     ideograph-hanziku-12
166     =big5
167     =big5-eten
168     =gt-k
169     =jef-china3))
170
171 (defun char-db-make-char-spec (char)
172   (let (ret char-spec)
173     (cond ((characterp char)
174            (cond ((and (setq ret (encode-char char '=ucs 'defined-only))
175                        (not (and (<= #xE000 ret)(<= ret #xF8FF))))
176                   (setq char-spec (list (cons '=ucs ret)))
177                   (cond ((setq ret (get-char-attribute char 'name))
178                          (setq char-spec (cons (cons 'name ret) char-spec))
179                          )
180                         ((setq ret (get-char-attribute char 'name*))
181                          (setq char-spec (cons (cons 'name* ret) char-spec))
182                          ))
183                   )
184                  ((setq ret
185                         (catch 'tag
186                           (let ((rest char-db-coded-charset-priority-list)
187                                 ccs)
188                             (while rest
189                               (setq ccs (charset-name
190                                          (find-charset (car rest))))
191                               (if (setq ret
192                                         (encode-char char ccs
193                                                      'defined-only))
194                                   (throw 'tag (cons ccs ret)))
195                               (setq rest (cdr rest))))))
196                   (setq char-spec (list ret))
197                   (dolist (ccs (delq (car ret) (charset-list)))
198                     (if (and (or (charset-iso-final-char ccs)
199                                  (memq ccs
200                                        '(=daikanwa
201                                          =daikanwa-rev2
202                                          ;; =gt-k
203                                          )))
204                              (setq ret (encode-char char ccs 'defined-only)))
205                         (setq char-spec (cons (cons ccs ret) char-spec))))
206                   (if (null char-spec)
207                       (setq char-spec (split-char char)))
208                   (cond ((setq ret (get-char-attribute char 'name))
209                          (setq char-spec (cons (cons 'name ret) char-spec))
210                          )
211                         ((setq ret (get-char-attribute char 'name*))
212                          (setq char-spec (cons (cons 'name* ret) char-spec))
213                          ))
214                   ))
215            char-spec)
216           ((consp char)
217            char))))
218     
219 (defun char-db-insert-char-spec (char &optional readable column
220                                       required-features)
221   (unless column
222     (setq column (current-column)))
223   (let (char-spec temp-char)
224     (setq char-spec (char-db-make-char-spec char))
225     (unless (or (characterp char) ; char
226                 (condition-case nil
227                     (setq char (find-char char-spec))
228                   (error nil)))
229       ;; define temporary character
230       ;;   Current implementation is dirty.
231       (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
232                                          char-spec)))
233       (remove-char-attribute temp-char 'ideograph-daikanwa)
234       (setq char temp-char))
235     (insert-char-attributes char
236                             readable
237                             (union (mapcar #'car char-spec)
238                                    required-features))
239     (when temp-char
240       ;; undefine temporary character
241       ;;   Current implementation is dirty.
242       (setq char-spec (char-attribute-alist temp-char))
243       (while char-spec
244         (remove-char-attribute temp-char (car (car char-spec)))
245         (setq char-spec (cdr char-spec))))))
246
247 (defun char-db-insert-alist (alist &optional readable column)
248   (unless column
249     (setq column (current-column)))
250   (let ((line-breaking
251          (concat "\n" (make-string (1+ column) ?\ )))
252         name value
253         ret al ; cal
254         key
255         lbs cell rest separator)
256     (insert "(")
257     (while alist
258       (setq name (car (car alist))
259             value (cdr (car alist)))
260       (cond ((eq name 'char)
261              (insert "(char . ")
262              (if (setq ret (condition-case nil
263                                (find-char value)
264                              (error nil)))
265                  (progn
266                    (setq al nil
267                          ;; cal nil
268                          )
269                    (while value
270                      (setq key (car (car value)))
271                      ;; (if (find-charset key)
272                      ;;     (setq cal (cons key cal))
273                      (setq al (cons key al))
274                      ;; )
275                      (setq value (cdr value)))
276                    (insert-char-attributes ret
277                                            readable
278                                            (or al 'none) ; cal
279                                            ))
280                (insert (prin1-to-string value)))
281              (insert ")")
282              (insert line-breaking))
283             ((consp value)
284              (insert (format "(%-18s " name))
285              (setq lbs (concat "\n" (make-string (current-column) ?\ )))
286              (while (consp value)
287                (setq cell (car value))
288                (if (and (consp cell)
289                         (consp (car cell))
290                         (setq ret (condition-case nil
291                                       (find-char cell)
292                                     (error nil)))
293                         )
294                    (progn
295                      (setq rest cell
296                            al nil
297                            ;; cal nil
298                            )
299                      (while rest
300                        (setq key (car (car rest)))
301                        ;; (if (find-charset key)
302                        ;;     (setq cal (cons key cal))
303                        (setq al (cons key al))
304                        ;; )
305                        (setq rest (cdr rest)))
306                      (if separator
307                          (insert lbs))
308                      (insert-char-attributes ret
309                                              readable
310                                              al ; cal
311                                              )
312                      (setq separator lbs))
313                  (if separator
314                      (insert separator))
315                  (insert (prin1-to-string cell))
316                  (setq separator " "))
317                (setq value (cdr value)))
318              (insert ")")
319              (insert line-breaking))
320             (t
321              (insert (format "(%-18s . %S)%s"
322                              name value
323                              line-breaking))))
324       (setq alist (cdr alist))))
325   (insert ")"))
326
327 (defun char-db-insert-char-reference (plist &optional readable column)
328   (unless column
329     (setq column (current-column)))
330   (let ((line-breaking
331          (concat "\n" (make-string (1+ column) ?\ )))
332         (separator "")
333         name value)
334     (insert "(")
335     (while plist
336       (setq name (pop plist))
337       (setq value (pop plist))
338       (cond ((eq name :char)
339              (insert separator)
340              (insert ":char\t")
341              (cond ((numberp value)
342                     (setq value (decode-char '=ucs value)))
343                    ;; ((consp value)
344                    ;;  (setq value (or (find-char value)
345                    ;;                  value)))
346                    )
347              (char-db-insert-char-spec value readable)
348              (insert line-breaking)
349              (setq separator ""))
350             ((eq name :radical)
351              (insert (format "%s%s\t%d ; %c%s"
352                              separator
353                              name value
354                              (aref ideographic-radicals value)
355                              line-breaking))
356              (setq separator ""))
357             (t
358              (insert (format "%s%s\t%S" separator name value))
359              (setq separator line-breaking)))
360       ))
361   (insert ")"))
362
363 (defun char-db-decode-isolated-char (ccs code-point)
364   (let (ret)
365     (setq ret
366           (cond ((eq ccs 'arabic-iso8859-6)
367                  (decode-char ccs code-point))
368                 ((and (memq ccs '(=gt-pj-1
369                                   =gt-pj-2
370                                   =gt-pj-3
371                                   =gt-pj-4
372                                   =gt-pj-5
373                                   =gt-pj-6
374                                   =gt-pj-7
375                                   =gt-pj-8
376                                   =gt-pj-9
377                                   =gt-pj-10
378                                   =gt-pj-11))
379                       (setq ret (decode-char ccs code-point))
380                       (setq ret (encode-char ret '=gt 'defined-only)))
381                  (decode-builtin-char '=gt ret))
382                 (t
383                  (decode-builtin-char ccs code-point))))
384     (cond ((and (<= 0 (char-int ret))
385                 (<= (char-int ret) #x1F))
386            (decode-char '=ucs (+ #x2400 (char-int ret))))
387           ((= (char-int ret) #x7F)
388            ?\u2421)
389           (t ret))))
390
391 (defvar char-db-convert-obsolete-format t)
392
393 (defun insert-char-attributes (char &optional readable attributes column)
394   (unless column
395     (setq column (current-column)))
396   (let (name value has-long-ccs-name rest
397         radical strokes
398         (line-breaking
399          (concat "\n" (make-string (1+ column) ?\ )))
400         lbs cell separator ret
401         key al cal
402         dest-ccss
403         sources required-features
404         ccss)
405     (let (atr-d)
406       (setq attributes
407             (sort (if attributes
408                       (if (consp attributes)
409                           (progn
410                             (dolist (name attributes)
411                               (unless (memq name char-db-ignored-attributes)
412                                 (if (find-charset name)
413                                     (push name ccss))
414                                 (push name atr-d)))
415                             atr-d))
416                     (dolist (name (char-attribute-list))
417                       (unless (memq name char-db-ignored-attributes)
418                         (if (find-charset name)
419                             (push name ccss))
420                         (push name atr-d)))
421                     atr-d)
422                   #'char-attribute-name<)))
423     (insert "(")
424     (when (and (memq 'name attributes)
425                (setq value (get-char-attribute char 'name)))
426       (insert (format
427                (if (> (+ (current-column) (length value)) 48)
428                    "(name . %S)%s"
429                  "(name               . %S)%s")
430                value line-breaking))
431       (setq attributes (delq 'name attributes))
432       )
433     (when (and (memq 'name* attributes)
434                (setq value (get-char-attribute char 'name*)))
435       (insert (format
436                (if (> (+ (current-column) (length value)) 48)
437                    "(name* . %S)%s"
438                  "(name*              . %S)%s")
439                value line-breaking))
440       (setq attributes (delq 'name* attributes))
441       )
442     (when (and (memq 'script attributes)
443                (setq value (get-char-attribute char 'script)))
444       (insert (format "(script\t\t%s)%s"
445                       (mapconcat (function prin1-to-string)
446                                  value " ")
447                       line-breaking))
448       (setq attributes (delq 'script attributes))
449       )
450     (dolist (name '(=>ucs =>ucs*))
451       (when (and (memq name attributes)
452                  (setq value (get-char-attribute char name)))
453         (insert (format "(%-18s . #x%04X)\t; %c%s"
454                         name value (decode-char '=ucs value)
455                         line-breaking))
456         (setq attributes (delq name attributes))))
457     (dolist (name '(=>ucs@gb =>ucs@cns =>ucs@jis =>ucs@ks =>ucs@big5))
458       (when (and (memq name attributes)
459                  (setq value (get-char-attribute char name)))
460         (insert (format "(%-18s . #x%04X)\t; %c%s"
461                         name value
462                         (decode-char (intern
463                                       (concat "="
464                                               (substring
465                                                (symbol-name name) 2)))
466                                      value)
467                         line-breaking))
468         (setq attributes (delq name attributes))
469         ))
470     ;; (dolist (name '(=>ucs-gb =>ucs-cns =>ucs-jis =>ucs-ks =>ucs-big5))
471     ;;   (when (and (memq name attributes)
472     ;;              (setq value (get-char-attribute char name)))
473     ;;     (insert (format "(%-18s . #x%04X)\t; %c%s"
474     ;;                     (intern
475     ;;                      (concat "=>ucs@"
476     ;;                              (substring (symbol-name name) 6)))
477     ;;                     value
478     ;;                     (decode-char (intern
479     ;;                                   (concat "=ucs@"
480     ;;                                           (substring
481     ;;                                            (symbol-name name) 6)))
482     ;;                                  value)
483     ;;                     line-breaking))
484     ;;     (setq attributes (delq name attributes))))
485     ;; (when (and (memq '->ucs attributes)
486     ;;            (setq value (get-char-attribute char '->ucs)))
487     ;;   (insert (format (if char-db-convert-obsolete-format
488     ;;                       "(=>ucs\t\t. #x%04X)\t; %c%s"
489     ;;                     "(->ucs\t\t. #x%04X)\t; %c%s")
490     ;;                   value (decode-char '=ucs value)
491     ;;                   line-breaking))
492     ;;   (setq attributes (delq '->ucs attributes))
493     ;;   )
494     (dolist (name '(=>daikanwa))
495       (when (and (memq name attributes)
496                  (setq value (get-char-attribute char name)))
497         (insert
498          (if (integerp value)
499              (format "(%-18s . %05d)\t; %c%s"
500                      name value (decode-char '=daikanwa value)
501                      line-breaking)
502            (format "(%-18s %s)\t; %c%s"
503                    name
504                    (mapconcat (function prin1-to-string)
505                               value " ")
506                    (char-representative-of-daikanwa char)
507                    line-breaking)))
508         (setq attributes (delq name attributes))))
509     (when (and (memq 'general-category attributes)
510                (setq value (get-char-attribute char 'general-category)))
511       (insert (format
512                "(general-category\t%s) ; %s%s"
513                (mapconcat (lambda (cell)
514                             (format "%S" cell))
515                           value " ")
516                (cond ((rassoc value unidata-normative-category-alist)
517                       "Normative Category")
518                      ((rassoc value unidata-informative-category-alist)
519                       "Informative Category")
520                      (t
521                       "Unknown Category"))
522                line-breaking))
523       (setq attributes (delq 'general-category attributes))
524       )
525     (when (and (memq 'bidi-category attributes)
526                (setq value (get-char-attribute char 'bidi-category)))
527       (insert (format "(bidi-category\t. %S)%s"
528                       value
529                       line-breaking))
530       (setq attributes (delq 'bidi-category attributes))
531       )
532     (unless (or (not (memq 'mirrored attributes))
533                 (eq (setq value (get-char-attribute char 'mirrored 'empty))
534                     'empty))
535       (insert (format "(mirrored\t\t. %S)%s"
536                       value
537                       line-breaking))
538       (setq attributes (delq 'mirrored attributes))
539       )
540     (cond
541      ((and (memq 'decimal-digit-value attributes)
542            (setq value (get-char-attribute char 'decimal-digit-value)))
543       (insert (format "(decimal-digit-value . %S)%s"
544                       value
545                       line-breaking))
546       (setq attributes (delq 'decimal-digit-value attributes))
547       (when (and (memq 'digit-value attributes)
548                  (setq value (get-char-attribute char 'digit-value)))
549         (insert (format "(digit-value\t . %S)%s"
550                         value
551                         line-breaking))
552         (setq attributes (delq 'digit-value attributes))
553         )
554       (when (and (memq 'numeric-value attributes)
555                  (setq value (get-char-attribute char 'numeric-value)))
556         (insert (format "(numeric-value\t . %S)%s"
557                         value
558                         line-breaking))
559         (setq attributes (delq 'numeric-value attributes))
560         )
561       )
562      (t
563       (when (and (memq 'digit-value attributes)
564                  (setq value (get-char-attribute char 'digit-value)))
565         (insert (format "(digit-value\t. %S)%s"
566                         value
567                         line-breaking))
568         (setq attributes (delq 'digit-value attributes))
569         )
570       (when (and (memq 'numeric-value attributes)
571                  (setq value (get-char-attribute char 'numeric-value)))
572         (insert (format "(numeric-value\t. %S)%s"
573                         value
574                         line-breaking))
575         (setq attributes (delq 'numeric-value attributes))
576         )))
577     (when (and (memq 'iso-10646-comment attributes)
578                (setq value (get-char-attribute char 'iso-10646-comment)))
579       (insert (format "(iso-10646-comment\t. %S)%s"
580                       value
581                       line-breaking))
582       (setq attributes (delq 'iso-10646-comment attributes))
583       )
584     (when (and (memq 'morohashi-daikanwa attributes)
585                (setq value (get-char-attribute char 'morohashi-daikanwa)))
586       (insert (format "(morohashi-daikanwa\t%s)%s"
587                       (mapconcat (function prin1-to-string) value " ")
588                       line-breaking))
589       (setq attributes (delq 'morohashi-daikanwa attributes))
590       )
591     (setq radical nil
592           strokes nil)
593     (when (and (memq 'ideographic-radical attributes)
594                (setq value (get-char-attribute char 'ideographic-radical)))
595       (setq radical value)
596       (insert (format "(ideographic-radical . %S)\t; %c%s"
597                       radical
598                       (aref ideographic-radicals radical)
599                       line-breaking))
600       (setq attributes (delq 'ideographic-radical attributes))
601       )
602     (let (key)
603       (dolist (domain char-db-feature-domains)
604         (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
605         (when (and (memq key attributes)
606                    (setq value (get-char-attribute char key)))
607           (setq radical value)
608           (insert (format "(%s . %S)\t; %c%s"
609                           key
610                           radical
611                           (aref ideographic-radicals radical)
612                           line-breaking))
613           (setq attributes (delq key attributes))
614           )
615         (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
616         (when (and (memq key attributes)
617                    (setq value (get-char-attribute char key)))
618           (setq strokes value)
619           (insert (format "(%s . %S)%s"
620                           key
621                           strokes
622                           line-breaking))
623           (setq attributes (delq key attributes))
624           )
625         (setq key (intern (format "%s@%s" 'total-strokes domain)))
626         (when (and (memq key attributes)
627                    (setq value (get-char-attribute char key)))
628           (insert (format "(%s       . %S)%s"
629                           key
630                           value
631                           line-breaking))
632           (setq attributes (delq key attributes))
633           )
634         (dolist (feature '(ideographic-radical
635                            ideographic-strokes
636                            total-strokes))
637           (setq key (intern (format "%s@%s*sources" feature domain)))
638           (when (and (memq key attributes)
639                      (setq value (get-char-attribute char key)))
640             (insert (format "(%s%s" key line-breaking))
641             (dolist (cell value)
642               (insert (format " %s" cell)))
643             (insert ")")
644             (insert line-breaking)
645             (setq attributes (delq key attributes))
646             ))
647         ))
648     (when (and (memq 'ideographic-strokes attributes)
649                (setq value (get-char-attribute char 'ideographic-strokes)))
650       (setq strokes value)
651       (insert (format "(ideographic-strokes . %S)%s"
652                       strokes
653                       line-breaking))
654       (setq attributes (delq 'ideographic-strokes attributes))
655       )
656     (when (and (memq 'kangxi-radical attributes)
657                (setq value (get-char-attribute char 'kangxi-radical)))
658       (unless (eq value radical)
659         (insert (format "(kangxi-radical\t . %S)\t; %c%s"
660                         value
661                         (aref ideographic-radicals value)
662                         line-breaking))
663         (or radical
664             (setq radical value)))
665       (setq attributes (delq 'kangxi-radical attributes))
666       )
667     (when (and (memq 'kangxi-strokes attributes)
668                (setq value (get-char-attribute char 'kangxi-strokes)))
669       (unless (eq value strokes)
670         (insert (format "(kangxi-strokes\t . %S)%s"
671                         value
672                         line-breaking))
673         (or strokes
674             (setq strokes value)))
675       (setq attributes (delq 'kangxi-strokes attributes))
676       )
677     (when (and (memq 'japanese-radical attributes)
678                (setq value (get-char-attribute char 'japanese-radical)))
679       (unless (eq value radical)
680         (insert (format "(japanese-radical\t . %S)\t; %c%s"
681                         value
682                         (aref ideographic-radicals value)
683                         line-breaking))
684         (or radical
685             (setq radical value)))
686       (setq attributes (delq 'japanese-radical attributes))
687       )
688     (when (and (memq 'japanese-strokes attributes)
689                (setq value (get-char-attribute char 'japanese-strokes)))
690       (unless (eq value strokes)
691         (insert (format "(japanese-strokes\t . %S)%s"
692                         value
693                         line-breaking))
694         (or strokes
695             (setq strokes value)))
696       (setq attributes (delq 'japanese-strokes attributes))
697       )
698     (when (and (memq 'cns-radical attributes)
699                (setq value (get-char-attribute char 'cns-radical)))
700       (insert (format "(cns-radical\t . %S)\t; %c%s"
701                       value
702                       (aref ideographic-radicals value)
703                       line-breaking))
704       (setq attributes (delq 'cns-radical attributes))
705       )
706     (when (and (memq 'cns-strokes attributes)
707                (setq value (get-char-attribute char 'cns-strokes)))
708       (unless (eq value strokes)
709         (insert (format "(cns-strokes\t . %S)%s"
710                         value
711                         line-breaking))
712         (or strokes
713             (setq strokes value)))
714       (setq attributes (delq 'cns-strokes attributes))
715       )
716     (when (and (memq 'shinjigen-1-radical attributes)
717                (setq value (get-char-attribute char 'shinjigen-1-radical)))
718       (unless (eq value radical)
719         (insert (format "(shinjigen-1-radical . %S)\t; %c%s"
720                         value
721                         (aref ideographic-radicals value)
722                         line-breaking))
723         (or radical
724             (setq radical value)))
725       (setq attributes (delq 'shinjigen-1-radical attributes))
726       )
727     (when (and (memq 'ideographic- attributes)
728                (setq value (get-char-attribute char 'ideographic-)))
729       (insert "(ideographic-       ")
730       (setq lbs (concat "\n" (make-string (current-column) ?\ ))
731             separator nil)
732       (while (consp value)
733         (setq cell (car value))
734         (if (integerp cell)
735             (setq cell (decode-char '=ucs cell)))
736         (cond ((characterp cell)
737                (if separator
738                    (insert lbs))
739                (if readable
740                    (insert (format "%S" cell))
741                  (char-db-insert-char-spec cell readable))
742                (setq separator lbs))
743               ((consp cell)
744                (if separator
745                    (insert lbs))
746                (if (consp (car cell))
747                    (char-db-insert-char-spec cell readable)
748                  (char-db-insert-char-reference cell readable))
749                (setq separator lbs))
750               (t
751                (if separator
752                    (insert separator))
753                (insert (prin1-to-string cell))
754                (setq separator " ")))
755         (setq value (cdr value)))
756       (insert ")")
757       (insert line-breaking)
758       (setq attributes (delq 'ideographic- attributes)))
759     (when (and (memq 'total-strokes attributes)
760                (setq value (get-char-attribute char 'total-strokes)))
761       (insert (format "(total-strokes       . %S)%s"
762                       value
763                       line-breaking))
764       (setq attributes (delq 'total-strokes attributes))
765       )
766     (when (and (memq '->ideograph attributes)
767                (setq value (get-char-attribute char '->ideograph)))
768       (insert (format "(->ideograph\t%s)%s"
769                       (mapconcat (lambda (code)
770                                    (cond ((symbolp code)
771                                           (symbol-name code))
772                                          ((integerp code)
773                                           (format "#x%04X" code))
774                                          (t
775                                           (format "%s %S"
776                                                   line-breaking code))))
777                                  value " ")
778                       line-breaking))
779       (setq attributes (delq '->ideograph attributes))
780       )
781     (when (and (memq '->decomposition attributes)
782                (setq value (get-char-attribute char '->decomposition)))
783       (insert (format "(->decomposition\t%s)%s"
784                       (mapconcat (lambda (code)
785                                    (cond ((symbolp code)
786                                           (symbol-name code))
787                                          ((characterp code)
788                                           (if readable
789                                               (format "%S" code)
790                                             (format "#x%04X"
791                                                     (char-int code))
792                                             ))
793                                          ((integerp code)
794                                           (format "#x%04X" code))
795                                          (t
796                                           (format "%s%S" line-breaking code))))
797                                  value " ")
798                       line-breaking))
799       (setq attributes (delq '->decomposition attributes))
800       )
801     (if (equal (get-char-attribute char '->titlecase)
802                (get-char-attribute char '->uppercase))
803         (setq attributes (delq '->titlecase attributes)))
804     (when (and (memq '->mojikyo attributes)
805                (setq value (get-char-attribute char '->mojikyo)))
806       (insert (format "(->mojikyo\t\t. %06d)\t; %c%s"
807                       value (decode-char 'mojikyo value)
808                       line-breaking))
809       (setq attributes (delq '->mojikyo attributes))
810       )
811     (when (and (memq 'hanyu-dazidian-vol attributes)
812                (setq value (get-char-attribute char 'hanyu-dazidian-vol)))
813       (insert (format "(hanyu-dazidian-vol  . %d)%s"
814                       value line-breaking))
815       (setq attributes (delq 'hanyu-dazidian-vol attributes))
816       )
817     (when (and (memq 'hanyu-dazidian-page attributes)
818                (setq value (get-char-attribute char 'hanyu-dazidian-page)))
819       (insert (format "(hanyu-dazidian-page . %d)%s"
820                       value line-breaking))
821       (setq attributes (delq 'hanyu-dazidian-page attributes))
822       )
823     (when (and (memq 'hanyu-dazidian-char attributes)
824                (setq value (get-char-attribute char 'hanyu-dazidian-char)))
825       (insert (format "(hanyu-dazidian-char . %d)%s"
826                       value line-breaking))
827       (setq attributes (delq 'hanyu-dazidian-char attributes))
828       )
829     (unless readable
830       (dolist (ignored '(composition
831                          ->denotational <-subsumptive ->ucs-unified))
832         (setq attributes (delq ignored attributes))))
833     ;; (setq rest ccs-attributes)
834     ;; (while (and rest
835     ;;             (progn
836     ;;               (setq value (get-char-attribute char (car rest)))
837     ;;               (if value
838     ;;                   (if (>= (length (symbol-name (car rest))) 19)
839     ;;                       (progn
840     ;;                         (setq has-long-ccs-name t)
841     ;;                         nil)
842     ;;                     t)
843     ;;                 t)))
844     ;;   (setq rest (cdr rest)))
845     (while attributes
846       (setq name (car attributes))
847       (if (setq value (get-char-attribute char name))
848           (cond ((setq ret (find-charset name))
849                  (setq name (charset-name ret))
850                  (if (and (not (memq name dest-ccss))
851                           (prog1
852                               (setq value (get-char-attribute char name))
853                             (setq dest-ccss (cons name dest-ccss))))
854                      (insert
855                       (format
856                        (cond ((memq name '(=daikanwa
857                                            =daikanwa-rev1 =daikanwa-rev2
858                                            =gt =gt-k =cbeta))
859                               (if has-long-ccs-name
860                                   "(%-26s . %05d)\t; %c%s"
861                                 "(%-18s . %05d)\t; %c%s"))
862                              ((eq name 'mojikyo)
863                               (if has-long-ccs-name
864                                   "(%-26s . %06d)\t; %c%s"
865                                 "(%-18s . %06d)\t; %c%s"))
866                              ((>= (charset-dimension name) 2)
867                               (if has-long-ccs-name
868                                   "(%-26s . #x%04X)\t; %c%s"
869                                 "(%-18s . #x%04X)\t; %c%s"))
870                              (t
871                               (if has-long-ccs-name
872                                   "(%-26s . #x%02X)\t; %c%s"
873                                 "(%-18s . #x%02X)\t; %c%s")))
874                        name
875                        (if (= (charset-iso-graphic-plane name) 1)
876                            (logior value
877                                    (cond ((= (charset-dimension name) 1)
878                                           #x80)
879                                          ((= (charset-dimension name) 2)
880                                           #x8080)
881                                          ((= (charset-dimension name) 3)
882                                           #x808080)
883                                          (t 0)))
884                          value)
885                        (char-db-decode-isolated-char name value)
886                        line-breaking)))
887                  )
888                 ((string-match "^=>ucs@" (symbol-name name))
889                  (insert (format "(%-18s . #x%04X)\t; %c%s"
890                                  name value (decode-char '=ucs value)
891                                  line-breaking))
892                  )
893                 ((eq name 'jisx0208-1978/4X)
894                  (insert (format "(%-18s . #x%04X)%s"
895                                  name value
896                                  line-breaking))
897                  )
898                 ((and (not readable)
899                       (or (eq name '<-identical)
900                           (string-match "^->simplified" (symbol-name name))))
901                  )
902                 ((or (eq name 'ideographic-structure)
903                      (eq name 'ideographic-)
904                      (string-match "^\\(->\\|<-\\)" (symbol-name name)))
905                  (insert (format "(%-18s%s " name line-breaking))
906                  (setq lbs (concat "\n" (make-string (current-column) ?\ ))
907                        separator nil)
908                  (while (consp value)
909                    (setq cell (car value))
910                    (if (integerp cell)
911                        (setq cell (decode-char '=ucs cell)))
912                    (cond ((eq name '->subsumptive)
913                           (when (or (not
914                                      (some (lambda (atr)
915                                              (get-char-attribute cell atr))
916                                            char-db-ignored-attributes))
917                                     (some (lambda (ccs)
918                                             (encode-char cell ccs
919                                                          'defined-only))
920                                           ccss))
921                             (if separator
922                                 (insert lbs))
923                             (let ((char-db-ignored-attributes
924                                    (cons '<-subsumptive
925                                          char-db-ignored-attributes)))
926                               (insert-char-attributes cell readable))
927                             (setq separator lbs))
928                           )
929                          ((characterp cell)
930                           (setq sources
931                                 (get-char-attribute
932                                  char
933                                  (intern (format "%s*sources" name))))
934                           (setq required-features nil)
935                           (dolist (source sources)
936                             (setq required-features
937                                   (cons
938                                    (if (find-charset
939                                         (setq ret (intern
940                                                    (format "=%s" source))))
941                                        ret
942                                      source)
943                                    required-features)))
944                           (when (string-match "@JP" (symbol-name name))
945                             (setq required-features
946                                   (union required-features
947                                          '(=jis-x0208
948                                            =jis-x0208-1990
949                                            =jis-x0213-1-2000
950                                            =jis-x0213-2-2000
951                                            =jis-x0212
952                                            =jis-x0208-1983
953                                            =jis-x0208-1978))))
954                           (if separator
955                               (insert lbs))
956                           (if readable
957                               (insert (format "%S" cell))
958                             (char-db-insert-char-spec cell readable
959                                                       nil
960                                                       required-features))
961                           (setq separator lbs))
962                          ((consp cell)
963                           (if separator
964                               (insert lbs))
965                           (if (consp (car cell))
966                               (char-db-insert-char-spec cell readable)
967                             (char-db-insert-char-reference cell readable))
968                           (setq separator lbs))
969                          (t
970                           (if separator
971                               (insert separator))
972                           (insert (prin1-to-string cell))
973                           (setq separator " ")))
974                    (setq value (cdr value)))
975                  (insert ")")
976                  (insert line-breaking))
977                 ((memq name '(ideograph=
978                               original-ideograph-of
979                               ancient-ideograph-of
980                               vulgar-ideograph-of
981                               wrong-ideograph-of
982                               ;; simplified-ideograph-of
983                               ideographic-variants
984                               ;; ideographic-different-form-of
985                               ))
986                  (insert (format "(%-18s%s " name line-breaking))
987                  (setq lbs (concat "\n" (make-string (current-column) ?\ ))
988                        separator nil)
989                  (while (consp value)
990                    (setq cell (car value))
991                    (if (and (consp cell)
992                             (consp (car cell)))
993                        (progn
994                          (if separator
995                              (insert lbs))
996                          (char-db-insert-alist cell readable)
997                          (setq separator lbs))
998                      (if separator
999                          (insert separator))
1000                      (insert (prin1-to-string cell))
1001                      (setq separator " "))
1002                    (setq value (cdr value)))
1003                  (insert ")")
1004                  (insert line-breaking))
1005                 ((consp value)
1006                  (insert (format "(%-18s " name))
1007                  (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1008                        separator nil)
1009                  (while (consp value)
1010                    (setq cell (car value))
1011                    (if (and (consp cell)
1012                             (consp (car cell))
1013                             (setq ret (condition-case nil
1014                                           (find-char cell)
1015                                         (error nil))))
1016                        (progn
1017                          (setq rest cell
1018                                al nil
1019                                cal nil)
1020                          (while rest
1021                            (setq key (car (car rest)))
1022                            (if (find-charset key)
1023                                (setq cal (cons key cal))
1024                              (setq al (cons key al)))
1025                            (setq rest (cdr rest)))
1026                          (if separator
1027                              (insert lbs))
1028                          (insert-char-attributes ret
1029                                                  readable
1030                                                  al cal)
1031                          (setq separator lbs))
1032                      (setq ret (prin1-to-string cell))
1033                      (if separator
1034                          (if (< (+ (current-column)
1035                                    (length ret)
1036                                    (length separator))
1037                                 76)
1038                              (insert separator)
1039                            (insert lbs)))
1040                      (insert ret)
1041                      (setq separator " "))
1042                    (setq value (cdr value)))
1043                  (insert ")")
1044                  (insert line-breaking))
1045                 (t
1046                  (insert (format "(%-18s . %S)%s"
1047                                  name value
1048                                  line-breaking)))
1049                 ))
1050       (setq attributes (cdr attributes)))
1051     ;; (while ccs-attributes
1052     ;;   (setq name (charset-name (car ccs-attributes)))
1053     ;;   (if (and (not (memq name dest-ccss))
1054     ;;            (prog1
1055     ;;                (setq value (get-char-attribute char name))
1056     ;;              (setq dest-ccss (cons name dest-ccss))))
1057     ;;       (insert
1058     ;;        (format
1059     ;;         (cond ((memq name '(=daikanwa
1060     ;;                             =daikanwa-rev1 =daikanwa-rev2
1061     ;;                             =gt =gt-k =cbeta))
1062     ;;                (if has-long-ccs-name
1063     ;;                    "(%-26s . %05d)\t; %c%s"
1064     ;;                  "(%-18s . %05d)\t; %c%s"))
1065     ;;               ((eq name 'mojikyo)
1066     ;;                (if has-long-ccs-name
1067     ;;                    "(%-26s . %06d)\t; %c%s"
1068     ;;                  "(%-18s . %06d)\t; %c%s"))
1069     ;;               ((>= (charset-dimension name) 2)
1070     ;;                (if has-long-ccs-name
1071     ;;                    "(%-26s . #x%04X)\t; %c%s"
1072     ;;                  "(%-18s . #x%04X)\t; %c%s"))
1073     ;;               (t
1074     ;;                (if has-long-ccs-name
1075     ;;                    "(%-26s . #x%02X)\t; %c%s"
1076     ;;                  "(%-18s . #x%02X)\t; %c%s")))
1077     ;;         name
1078     ;;         (if (= (charset-iso-graphic-plane name) 1)
1079     ;;             (logior value
1080     ;;                     (cond ((= (charset-dimension name) 1)
1081     ;;                            #x80)
1082     ;;                           ((= (charset-dimension name) 2)
1083     ;;                            #x8080)
1084     ;;                           ((= (charset-dimension name) 3)
1085     ;;                            #x808080)
1086     ;;                           (t 0)))
1087     ;;           value)
1088     ;;         (char-db-decode-isolated-char name value)
1089     ;;         line-breaking)))
1090     ;;   (setq ccs-attributes (cdr ccs-attributes)))
1091     (insert ")")))
1092
1093 (defun insert-char-data (char &optional readable
1094                               attributes)
1095   (save-restriction
1096     (narrow-to-region (point)(point))
1097     (insert "(define-char
1098   '")
1099     (insert-char-attributes char readable attributes)
1100     (insert ")\n")
1101     (goto-char (point-min))
1102     (while (re-search-forward "[ \t]+$" nil t)
1103       (replace-match ""))
1104     ;; from tabify.
1105     (goto-char (point-min))
1106     (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
1107       (let ((column (current-column))
1108             (indent-tabs-mode t))
1109         (delete-region (match-beginning 0) (point))
1110         (indent-to column)))
1111     (goto-char (point-max))
1112     ;; (tabify (point-min)(point-max))
1113     ))
1114
1115 (defun insert-char-data-with-variant (char &optional printable
1116                                            no-ucs-unified
1117                                            script excluded-script)
1118   (insert-char-data char printable)
1119   (let ((variants (or (char-variants char)
1120                       (let ((ucs (get-char-attribute char '->ucs)))
1121                         (if ucs
1122                             (delete char (char-variants (int-char ucs)))))))
1123         variant vs)
1124     (setq variants (sort variants #'<))
1125     (while variants
1126       (setq variant (car variants))
1127       (if (and (or (null script)
1128                    (null (setq vs (get-char-attribute variant 'script)))
1129                    (memq script vs))
1130                (or (null excluded-script)
1131                    (null (setq vs (get-char-attribute variant 'script)))
1132                    (not (memq excluded-script vs))))
1133           (or (and no-ucs-unified (get-char-attribute variant '=ucs))
1134               (insert-char-data variant printable)))
1135       (setq variants (cdr variants))
1136       )))
1137
1138 (defun insert-char-range-data (min max &optional script excluded-script)
1139   (let ((code min)
1140         char)
1141     (while (<= code max)
1142       (setq char (decode-char '=ucs code))
1143       (if (encode-char char '=ucs 'defined-only)
1144           (insert-char-data-with-variant char nil 'no-ucs-unified
1145                                          script excluded-script))
1146       (setq code (1+ code)))))
1147
1148 (defun write-char-range-data-to-file (min max file
1149                                           &optional script excluded-script)
1150   (let ((coding-system-for-write 'utf-8-mcs))
1151     (with-temp-buffer
1152       (insert ";; -*- coding: utf-8-mcs -*-\n")
1153       (insert-char-range-data min max script excluded-script)
1154       (write-region (point-min)(point-max) file))))
1155
1156 (defvar what-character-original-window-configuration)
1157
1158 ;;;###autoload
1159 (defun what-char-definition (char)
1160   (interactive (list (char-after)))
1161   (let ((buf (get-buffer-create "*Character Description*"))
1162         (the-buf (current-buffer))
1163         (win-conf (current-window-configuration)))
1164     (pop-to-buffer buf)
1165     (make-local-variable 'what-character-original-window-configuration)
1166     (setq what-character-original-window-configuration win-conf)
1167     (setq buffer-read-only nil)
1168     (erase-buffer)
1169     (condition-case err
1170         (progn
1171           (insert-char-data-with-variant char 'printable)
1172           (unless (char-attribute-alist char)
1173             (insert (format ";; = %c\n"
1174                             (let* ((rest (split-char char))
1175                                    (ccs (pop rest))
1176                                    (code (pop rest)))
1177                               (while rest
1178                                 (setq code (logior (lsh code 8)
1179                                                    (pop rest))))
1180                               (decode-char ccs code)))))
1181           ;; (char-db-update-comment)
1182           (set-buffer-modified-p nil)
1183           (view-mode the-buf (lambda (buf)
1184                                (set-window-configuration
1185                                 what-character-original-window-configuration)
1186                                ))
1187           (goto-char (point-min)))
1188       (error (progn
1189                (set-window-configuration
1190                 what-character-original-window-configuration)
1191                (signal (car err) (cdr err)))))))
1192
1193 (provide 'char-db-util)
1194
1195 ;;; char-db-util.el ends here