Sync up with r21-4-15-chise-0_21-33.
[chise/xemacs-chise.git] / lisp / utf-2000 / char-db-util.el
1 ;;; char-db-util.el --- Character Database utility
2
3 ;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004,2005 MORIOKA Tomohiko.
4
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, Unicode, UCS-4, MULE.
7
8 ;; This file is part of XEmacs CHISE.
9
10 ;; XEmacs CHISE is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
14
15 ;; XEmacs CHISE is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 ;; General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;;; Code:
26
27 (require 'alist)
28
29 (defconst unidata-normative-category-alist
30   '(("Lu" letter        uppercase)
31     ("Ll" letter        lowercase)
32     ("Lt" letter        titlecase)
33     ("Mn" mark          non-spacing)
34     ("Mc" mark          spacing-combining)
35     ("Me" mark          enclosing)
36     ("Nd" number        decimal-digit)
37     ("Nl" number        letter)
38     ("No" number        other)
39     ("Zs" separator     space)
40     ("Zl" separator     line)
41     ("Zp" separator     paragraph)
42     ("Cc" other         control)
43     ("Cf" other         format)
44     ("Cs" other         surrogate)
45     ("Co" other         private-use)
46     ("Cn" other         not-assigned)))
47
48 (defconst unidata-informative-category-alist
49   '(("Lm" letter        modifier)
50     ("Lo" letter        other)
51     ("Pc" punctuation   connector)
52     ("Pd" punctuation   dash)
53     ("Ps" punctuation   open)
54     ("Pe" punctuation   close)
55     ("Pi" punctuation   initial-quote)
56     ("Pf" punctuation   final-quote)
57     ("Po" punctuation   other)
58     ("Sm" symbol        math)
59     ("Sc" symbol        currency)
60     ("Sk" symbol        modifier)
61     ("So" symbol        other)
62     ))
63
64 (defconst ideographic-radicals
65   (let ((v (make-vector 215 nil))
66         (i 1))
67     (while (< i 215)
68       (aset v i (decode-char '=ucs (+ #x2EFF i)))
69       (setq i (1+ i)))
70     v))
71
72 (defvar char-db-file-coding-system 'utf-8-mcs-er)
73
74 (defvar char-db-feature-domains
75   '(ucs daikanwa cns gt jis jis/alt jis/a jis/b
76         jis-x0212 jis-x0213 cdp shinjigen misc unknown))
77
78 (defvar char-db-ignored-attributes nil)
79
80 (defun char-attribute-name< (ka kb)
81   (cond
82    ((eq '->denotational kb)
83     t)
84    ((eq '->subsumptive kb)
85     (not (eq '->denotational ka)))
86    ((eq '->denotational ka)
87     nil)
88    ((eq '->subsumptive ka)
89     nil)
90    ((find-charset ka)
91     (if (find-charset kb)
92         (if (<= (charset-id ka) 1)
93             (if (<= (charset-id kb) 1)
94                 (cond
95                  ((= (charset-dimension ka)
96                      (charset-dimension kb))
97                   (> (charset-id ka)(charset-id kb)))
98                  (t
99                   (> (charset-dimension ka)
100                      (charset-dimension kb))
101                   ))
102               t)
103           (if (<= (charset-id kb) 1)
104               nil
105             (< (charset-id ka)(charset-id kb))))
106       nil))
107    ((find-charset kb)
108     t)
109    ((symbolp ka)
110     (cond ((symbolp kb)
111            (string< (symbol-name ka)
112                     (symbol-name kb)))
113           (t)))
114    ((symbolp kb)
115     nil)))
116
117 (defvar char-db-coded-charset-priority-list
118   '(ascii
119     control-1
120     latin-iso8859-1
121     latin-iso8859-2
122     latin-iso8859-3
123     latin-iso8859-4
124     latin-iso8859-9
125     latin-jisx0201
126     cyrillic-iso8859-5
127     greek-iso8859-7
128     thai-tis620
129     =jis-x0208
130     =jis-x0208@1978
131     =jis-x0208@1983
132     japanese-jisx0212
133     chinese-gb2312
134     chinese-cns11643-1
135     chinese-cns11643-2
136     chinese-cns11643-3
137     chinese-cns11643-4
138     chinese-cns11643-5
139     chinese-cns11643-6
140     chinese-cns11643-7
141     =jis-x0208@1990
142     =jis-x0213-1-2000
143     =jis-x0213-2-2000
144     korean-ksc5601
145     chinese-isoir165
146     katakana-jisx0201
147     hebrew-iso8859-8
148     chinese-gb12345
149     latin-viscii
150     ethiopic-ucs
151     =big5-cdp
152     =gt
153     ideograph-daikanwa-2
154     ideograph-daikanwa
155     =cbeta
156     ideograph-hanziku-1
157     ideograph-hanziku-2
158     ideograph-hanziku-3
159     ideograph-hanziku-4
160     ideograph-hanziku-5
161     ideograph-hanziku-6
162     ideograph-hanziku-7
163     ideograph-hanziku-8
164     ideograph-hanziku-9
165     ideograph-hanziku-10
166     ideograph-hanziku-11
167     ideograph-hanziku-12
168     =gt-k
169     =ucs@unicode
170     =big5
171     =big5-eten
172     =jis-x0208@1997
173     =jef-china3))
174
175 (defun char-db-make-char-spec (char)
176   (let (ret char-spec)
177     (cond ((characterp char)
178            (cond ((and (setq ret (encode-char char '=ucs 'defined-only))
179                        (not (and (<= #xE000 ret)(<= ret #xF8FF))))
180                   (setq char-spec (list (cons '=ucs ret)))
181                   (cond ((setq ret (get-char-attribute char 'name))
182                          (setq char-spec (cons (cons 'name ret) char-spec))
183                          )
184                         ((setq ret (get-char-attribute char 'name*))
185                          (setq char-spec (cons (cons 'name* ret) char-spec))
186                          ))
187                   )
188                  ((setq ret
189                         (catch 'tag
190                           (let ((rest char-db-coded-charset-priority-list)
191                                 ccs)
192                             (while rest
193                               (setq ccs (charset-name
194                                          (find-charset (car rest))))
195                               (if (setq ret
196                                         (encode-char char ccs
197                                                      'defined-only))
198                                   (throw 'tag (cons ccs ret)))
199                               (setq rest (cdr rest))))))
200                   (setq char-spec (list ret))
201                   (dolist (ccs (delq (car ret) (charset-list)))
202                     (if (and (or (charset-iso-final-char ccs)
203                                  (memq ccs
204                                        '(=daikanwa
205                                          =daikanwa@rev2
206                                          ;; =gt-k
207                                          )))
208                              (setq ccs (charset-name ccs))
209                              (null (assq ccs char-spec))
210                              (setq ret (encode-char char ccs 'defined-only)))
211                         (setq char-spec (cons (cons ccs ret) char-spec))))
212                   (if (null char-spec)
213                       (setq char-spec (split-char char)))
214                   (cond ((setq ret (get-char-attribute char 'name))
215                          (setq char-spec (cons (cons 'name ret) char-spec))
216                          )
217                         ((setq ret (get-char-attribute char 'name*))
218                          (setq char-spec (cons (cons 'name* ret) char-spec))
219                          ))
220                   ))
221            char-spec)
222           ((consp char)
223            char))))
224     
225 (defun char-db-insert-char-spec (char &optional readable column
226                                       required-features)
227   (unless column
228     (setq column (current-column)))
229   (let (char-spec temp-char)
230     (setq char-spec (char-db-make-char-spec char))
231     (unless (or (characterp char) ; char
232                 (condition-case nil
233                     (setq char (find-char char-spec))
234                   (error nil)))
235       ;; define temporary character
236       ;;   Current implementation is dirty.
237       (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
238                                          char-spec)))
239       (remove-char-attribute temp-char 'ideograph-daikanwa)
240       (setq char temp-char))
241     (insert-char-attributes char
242                             readable
243                             (union (mapcar #'car char-spec)
244                                    required-features))
245     (when temp-char
246       ;; undefine temporary character
247       ;;   Current implementation is dirty.
248       (setq char-spec (char-attribute-alist temp-char))
249       (while char-spec
250         (remove-char-attribute temp-char (car (car char-spec)))
251         (setq char-spec (cdr char-spec))))))
252
253 (defun char-db-insert-alist (alist &optional readable column)
254   (unless column
255     (setq column (current-column)))
256   (let ((line-breaking
257          (concat "\n" (make-string (1+ column) ?\ )))
258         name value
259         ret al ; cal
260         key
261         lbs cell rest separator)
262     (insert "(")
263     (while alist
264       (setq name (car (car alist))
265             value (cdr (car alist)))
266       (cond ((eq name 'char)
267              (insert "(char . ")
268              (if (setq ret (condition-case nil
269                                (find-char value)
270                              (error nil)))
271                  (progn
272                    (setq al nil
273                          ;; cal nil
274                          )
275                    (while value
276                      (setq key (car (car value)))
277                      ;; (if (find-charset key)
278                      ;;     (setq cal (cons key cal))
279                      (setq al (cons key al))
280                      ;; )
281                      (setq value (cdr value)))
282                    (insert-char-attributes ret
283                                            readable
284                                            (or al 'none) ; cal
285                                            ))
286                (insert (prin1-to-string value)))
287              (insert ")")
288              (insert line-breaking))
289             ((consp value)
290              (insert (format "(%-18s " name))
291              (setq lbs (concat "\n" (make-string (current-column) ?\ )))
292              (while (consp value)
293                (setq cell (car value))
294                (if (and (consp cell)
295                         (consp (car cell))
296                         (setq ret (condition-case nil
297                                       (find-char cell)
298                                     (error nil)))
299                         )
300                    (progn
301                      (setq rest cell
302                            al nil
303                            ;; cal nil
304                            )
305                      (while rest
306                        (setq key (car (car rest)))
307                        ;; (if (find-charset key)
308                        ;;     (setq cal (cons key cal))
309                        (setq al (cons key al))
310                        ;; )
311                        (setq rest (cdr rest)))
312                      (if separator
313                          (insert lbs))
314                      (insert-char-attributes ret
315                                              readable
316                                              al ; cal
317                                              )
318                      (setq separator lbs))
319                  (if separator
320                      (insert separator))
321                  (insert (prin1-to-string cell))
322                  (setq separator " "))
323                (setq value (cdr value)))
324              (insert ")")
325              (insert line-breaking))
326             (t
327              (insert (format "(%-18s . %S)%s"
328                              name value
329                              line-breaking))))
330       (setq alist (cdr alist))))
331   (insert ")"))
332
333 (defun char-db-insert-char-reference (plist &optional readable column)
334   (unless column
335     (setq column (current-column)))
336   (let ((line-breaking
337          (concat "\n" (make-string (1+ column) ?\ )))
338         (separator "")
339         name value)
340     (insert "(")
341     (while plist
342       (setq name (pop plist))
343       (setq value (pop plist))
344       (cond ((eq name :char)
345              (insert separator)
346              (insert ":char\t")
347              (cond ((numberp value)
348                     (setq value (decode-char '=ucs value)))
349                    ;; ((consp value)
350                    ;;  (setq value (or (find-char value)
351                    ;;                  value)))
352                    )
353              (char-db-insert-char-spec value readable)
354              (insert line-breaking)
355              (setq separator ""))
356             ((eq name :radical)
357              (insert (format "%s%s\t%d ; %c%s"
358                              separator
359                              name value
360                              (aref ideographic-radicals value)
361                              line-breaking))
362              (setq separator ""))
363             (t
364              (insert (format "%s%s\t%S" separator name value))
365              (setq separator line-breaking)))
366       ))
367   (insert ")"))
368
369 (defun char-db-decode-isolated-char (ccs code-point)
370   (let (ret)
371     (setq ret
372           (cond ((eq ccs 'arabic-iso8859-6)
373                  (decode-char ccs code-point))
374                 ((and (memq ccs '(=gt-pj-1
375                                   =gt-pj-2
376                                   =gt-pj-3
377                                   =gt-pj-4
378                                   =gt-pj-5
379                                   =gt-pj-6
380                                   =gt-pj-7
381                                   =gt-pj-8
382                                   =gt-pj-9
383                                   =gt-pj-10
384                                   =gt-pj-11))
385                       (setq ret (decode-char ccs code-point))
386                       (setq ret (encode-char ret '=gt 'defined-only)))
387                  (decode-builtin-char '=gt ret))
388                 (t
389                  (decode-builtin-char ccs code-point))))
390     (cond ((and (<= 0 (char-int ret))
391                 (<= (char-int ret) #x1F))
392            (decode-char '=ucs (+ #x2400 (char-int ret))))
393           ((= (char-int ret) #x7F)
394            ?\u2421)
395           (t ret))))
396
397 (defvar char-db-convert-obsolete-format t)
398
399 (defun char-db-insert-ccs-feature (name value line-breaking)
400   (insert
401    (format
402     (cond ((memq name '(=daikanwa
403                         =daikanwa@rev1 =daikanwa@rev2
404                         =gt =gt-k =cbeta))
405            "(%-18s . %05d)\t; %c")
406           ((eq name 'mojikyo)
407            "(%-18s . %06d)\t; %c")
408           ((>= (charset-dimension name) 2)
409            "(%-18s . #x%04X)\t; %c")
410           (t
411            "(%-18s . #x%02X)\t; %c"))
412     name
413     (if (= (charset-iso-graphic-plane name) 1)
414         (logior value
415                 (cond ((= (charset-dimension name) 1)
416                        #x80)
417                       ((= (charset-dimension name) 2)
418                        #x8080)
419                       ((= (charset-dimension name) 3)
420                        #x808080)
421                       (t 0)))
422       value)
423     (char-db-decode-isolated-char name value)))
424   (if (and (= (charset-chars name) 94)
425            (= (charset-dimension name) 2))
426       (insert (format " [%02d-%02d]"
427                       (- (lsh value -8) 32)
428                       (- (logand value 255) 32))))
429   (insert line-breaking))
430
431 (defun insert-char-attributes (char &optional readable attributes column)
432   (unless column
433     (setq column (current-column)))
434   (let (name value ; has-long-ccs-name
435         rest
436         radical strokes
437         (line-breaking
438          (concat "\n" (make-string (1+ column) ?\ )))
439         lbs cell separator ret
440         key al cal
441         dest-ccss
442         sources required-features
443         ccss)
444     (let (atr-d)
445       (setq attributes
446             (sort (if attributes
447                       (if (consp attributes)
448                           (progn
449                             (dolist (name attributes)
450                               (unless (memq name char-db-ignored-attributes)
451                                 (if (find-charset name)
452                                     (push name ccss))
453                                 (push name atr-d)))
454                             atr-d))
455                     (dolist (name (char-attribute-list))
456                       (unless (memq name char-db-ignored-attributes)
457                         (if (find-charset name)
458                             (push name ccss))
459                         (push name atr-d)))
460                     atr-d)
461                   #'char-attribute-name<)))
462     (insert "(")
463     (when (and (memq 'name attributes)
464                (setq value (get-char-attribute char 'name)))
465       (insert (format
466                (if (> (+ (current-column) (length value)) 48)
467                    "(name . %S)%s"
468                  "(name               . %S)%s")
469                value line-breaking))
470       (setq attributes (delq 'name attributes))
471       )
472     (when (and (memq 'name* attributes)
473                (setq value (get-char-attribute char 'name*)))
474       (insert (format
475                (if (> (+ (current-column) (length value)) 48)
476                    "(name* . %S)%s"
477                  "(name*              . %S)%s")
478                value line-breaking))
479       (setq attributes (delq 'name* attributes))
480       )
481     (when (and (memq 'script attributes)
482                (setq value (get-char-attribute char 'script)))
483       (insert (format "(script\t\t%s)%s"
484                       (mapconcat (function prin1-to-string)
485                                  value " ")
486                       line-breaking))
487       (setq attributes (delq 'script attributes))
488       )
489     ;; (when (and (memq '<-denotational attributes)
490     ;;            (setq value (get-char-attribute char '<-denotational))
491     ;;            (null (cdr value))
492     ;;            (setq value (encode-char (car value) 'ucs 'defined-only)))
493     ;;   (insert (format "(%-18s . #x%04X)\t; %c%s"
494     ;;                   '=>ucs value (decode-char 'ucs value)
495     ;;                   line-breaking))
496     ;;   (setq attributes (delq '<-denotational attributes)))
497     (dolist (name '(=>ucs =>ucs*))
498       (when (and (memq name attributes)
499                  (setq value (get-char-attribute char name)))
500         (insert (format "(%-18s . #x%04X)\t; %c%s"
501                         name value (decode-char '=ucs value)
502                         line-breaking))
503         (setq attributes (delq name attributes))))
504     (dolist (name '(=>ucs@gb =>ucs@cns =>ucs@jis =>ucs@ks =>ucs@big5))
505       (when (and (memq name attributes)
506                  (setq value (get-char-attribute char name)))
507         (insert (format "(%-18s . #x%04X)\t; %c%s"
508                         name value
509                         (decode-char (intern
510                                       (concat "="
511                                               (substring
512                                                (symbol-name name) 2)))
513                                      value)
514                         line-breaking))
515         (setq attributes (delq name attributes))
516         ))
517     (dolist (name '(=>daikanwa))
518       (when (and (memq name attributes)
519                  (setq value (get-char-attribute char name)))
520         (insert
521          (if (integerp value)
522              (format "(%-18s . %05d)\t; %c%s"
523                      name value (decode-char '=daikanwa value)
524                      line-breaking)
525            (format "(%-18s %s)\t; %c%s"
526                    name
527                    (mapconcat (function prin1-to-string)
528                               value " ")
529                    (char-representative-of-daikanwa char)
530                    line-breaking)))
531         (setq attributes (delq name attributes))))
532     (when (and (memq 'general-category attributes)
533                (setq value (get-char-attribute char 'general-category)))
534       (insert (format
535                "(general-category\t%s) ; %s%s"
536                (mapconcat (lambda (cell)
537                             (format "%S" cell))
538                           value " ")
539                (cond ((rassoc value unidata-normative-category-alist)
540                       "Normative Category")
541                      ((rassoc value unidata-informative-category-alist)
542                       "Informative Category")
543                      (t
544                       "Unknown Category"))
545                line-breaking))
546       (setq attributes (delq 'general-category attributes))
547       )
548     (when (and (memq 'bidi-category attributes)
549                (setq value (get-char-attribute char 'bidi-category)))
550       (insert (format "(bidi-category\t. %S)%s"
551                       value
552                       line-breaking))
553       (setq attributes (delq 'bidi-category attributes))
554       )
555     (unless (or (not (memq 'mirrored attributes))
556                 (eq (setq value (get-char-attribute char 'mirrored 'empty))
557                     'empty))
558       (insert (format "(mirrored\t\t. %S)%s"
559                       value
560                       line-breaking))
561       (setq attributes (delq 'mirrored attributes))
562       )
563     (cond
564      ((and (memq 'decimal-digit-value attributes)
565            (setq value (get-char-attribute char 'decimal-digit-value)))
566       (insert (format "(decimal-digit-value . %S)%s"
567                       value
568                       line-breaking))
569       (setq attributes (delq 'decimal-digit-value attributes))
570       (when (and (memq 'digit-value attributes)
571                  (setq value (get-char-attribute char 'digit-value)))
572         (insert (format "(digit-value\t . %S)%s"
573                         value
574                         line-breaking))
575         (setq attributes (delq 'digit-value attributes))
576         )
577       (when (and (memq 'numeric-value attributes)
578                  (setq value (get-char-attribute char 'numeric-value)))
579         (insert (format "(numeric-value\t . %S)%s"
580                         value
581                         line-breaking))
582         (setq attributes (delq 'numeric-value attributes))
583         )
584       )
585      (t
586       (when (and (memq 'digit-value attributes)
587                  (setq value (get-char-attribute char 'digit-value)))
588         (insert (format "(digit-value\t. %S)%s"
589                         value
590                         line-breaking))
591         (setq attributes (delq 'digit-value attributes))
592         )
593       (when (and (memq 'numeric-value attributes)
594                  (setq value (get-char-attribute char 'numeric-value)))
595         (insert (format "(numeric-value\t. %S)%s"
596                         value
597                         line-breaking))
598         (setq attributes (delq 'numeric-value attributes))
599         )))
600     (when (and (memq 'iso-10646-comment attributes)
601                (setq value (get-char-attribute char 'iso-10646-comment)))
602       (insert (format "(iso-10646-comment\t. %S)%s"
603                       value
604                       line-breaking))
605       (setq attributes (delq 'iso-10646-comment attributes))
606       )
607     (when (and (memq 'morohashi-daikanwa attributes)
608                (setq value (get-char-attribute char 'morohashi-daikanwa)))
609       (insert (format "(morohashi-daikanwa\t%s)%s"
610                       (mapconcat (function prin1-to-string) value " ")
611                       line-breaking))
612       (setq attributes (delq 'morohashi-daikanwa attributes))
613       )
614     (setq radical nil
615           strokes nil)
616     (when (and (memq 'ideographic-radical attributes)
617                (setq value (get-char-attribute char 'ideographic-radical)))
618       (setq radical value)
619       (insert (format "(ideographic-radical . %S)\t; %c%s"
620                       radical
621                       (aref ideographic-radicals radical)
622                       line-breaking))
623       (setq attributes (delq 'ideographic-radical attributes))
624       )
625     (let (key)
626       (dolist (domain
627                (append
628                 char-db-feature-domains
629                 (let (dest domain)
630                   (dolist (feature (char-attribute-list))
631                     (setq feature (symbol-name feature))
632                     (when (string-match
633                            "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
634                            feature)
635                       (setq domain (intern (match-string 2 feature)))
636                      (unless (memq domain dest)
637                        (setq dest (cons domain dest)))))
638                   (sort dest #'string<))))
639         (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
640         (when (and (memq key attributes)
641                    (setq value (get-char-attribute char key)))
642           (setq radical value)
643           (insert (format "(%s . %S)\t; %c%s"
644                           key
645                           radical
646                           (aref ideographic-radicals radical)
647                           line-breaking))
648           (setq attributes (delq key attributes))
649           )
650         (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
651         (when (and (memq key attributes)
652                    (setq value (get-char-attribute char key)))
653           (setq strokes value)
654           (insert (format "(%s . %S)%s"
655                           key
656                           strokes
657                           line-breaking))
658           (setq attributes (delq key attributes))
659           )
660         (setq key (intern (format "%s@%s" 'total-strokes domain)))
661         (when (and (memq key attributes)
662                    (setq value (get-char-attribute char key)))
663           (insert (format "(%s       . %S)%s"
664                           key
665                           value
666                           line-breaking))
667           (setq attributes (delq key attributes))
668           )
669         (dolist (feature '(ideographic-radical
670                            ideographic-strokes
671                            total-strokes))
672           (setq key (intern (format "%s@%s*sources" feature domain)))
673           (when (and (memq key attributes)
674                      (setq value (get-char-attribute char key)))
675             (insert (format "(%s%s" key line-breaking))
676             (dolist (cell value)
677               (insert (format " %s" cell)))
678             (insert ")")
679             (insert line-breaking)
680             (setq attributes (delq key attributes))
681             ))
682         ))
683     (when (and (memq 'ideographic-strokes attributes)
684                (setq value (get-char-attribute char 'ideographic-strokes)))
685       (setq strokes value)
686       (insert (format "(ideographic-strokes . %S)%s"
687                       strokes
688                       line-breaking))
689       (setq attributes (delq 'ideographic-strokes attributes))
690       )
691     (when (and (memq 'kangxi-radical attributes)
692                (setq value (get-char-attribute char 'kangxi-radical)))
693       (unless (eq value radical)
694         (insert (format "(kangxi-radical\t . %S)\t; %c%s"
695                         value
696                         (aref ideographic-radicals value)
697                         line-breaking))
698         (or radical
699             (setq radical value)))
700       (setq attributes (delq 'kangxi-radical attributes))
701       )
702     (when (and (memq 'kangxi-strokes attributes)
703                (setq value (get-char-attribute char 'kangxi-strokes)))
704       (unless (eq value strokes)
705         (insert (format "(kangxi-strokes\t . %S)%s"
706                         value
707                         line-breaking))
708         (or strokes
709             (setq strokes value)))
710       (setq attributes (delq 'kangxi-strokes attributes))
711       )
712     (when (and (memq 'japanese-radical attributes)
713                (setq value (get-char-attribute char 'japanese-radical)))
714       (unless (eq value radical)
715         (insert (format "(japanese-radical\t . %S)\t; %c%s"
716                         value
717                         (aref ideographic-radicals value)
718                         line-breaking))
719         (or radical
720             (setq radical value)))
721       (setq attributes (delq 'japanese-radical attributes))
722       )
723     (when (and (memq 'japanese-strokes attributes)
724                (setq value (get-char-attribute char 'japanese-strokes)))
725       (unless (eq value strokes)
726         (insert (format "(japanese-strokes\t . %S)%s"
727                         value
728                         line-breaking))
729         (or strokes
730             (setq strokes value)))
731       (setq attributes (delq 'japanese-strokes attributes))
732       )
733     (when (and (memq 'cns-radical attributes)
734                (setq value (get-char-attribute char 'cns-radical)))
735       (insert (format "(cns-radical\t . %S)\t; %c%s"
736                       value
737                       (aref ideographic-radicals value)
738                       line-breaking))
739       (setq attributes (delq 'cns-radical attributes))
740       )
741     (when (and (memq 'cns-strokes attributes)
742                (setq value (get-char-attribute char 'cns-strokes)))
743       (unless (eq value strokes)
744         (insert (format "(cns-strokes\t . %S)%s"
745                         value
746                         line-breaking))
747         (or strokes
748             (setq strokes value)))
749       (setq attributes (delq 'cns-strokes attributes))
750       )
751     (when (and (memq 'shinjigen-1-radical attributes)
752                (setq value (get-char-attribute char 'shinjigen-1-radical)))
753       (unless (eq value radical)
754         (insert (format "(shinjigen-1-radical . %S)\t; %c%s"
755                         value
756                         (aref ideographic-radicals value)
757                         line-breaking))
758         (or radical
759             (setq radical value)))
760       (setq attributes (delq 'shinjigen-1-radical attributes))
761       )
762     (when (and (memq 'ideographic- attributes)
763                (setq value (get-char-attribute char 'ideographic-)))
764       (insert "(ideographic-       ")
765       (setq lbs (concat "\n" (make-string (current-column) ?\ ))
766             separator nil)
767       (while (consp value)
768         (setq cell (car value))
769         (if (integerp cell)
770             (setq cell (decode-char '=ucs cell)))
771         (cond ((characterp cell)
772                (if separator
773                    (insert lbs))
774                (if readable
775                    (insert (format "%S" cell))
776                  (char-db-insert-char-spec cell readable))
777                (setq separator lbs))
778               ((consp cell)
779                (if separator
780                    (insert lbs))
781                (if (consp (car cell))
782                    (char-db-insert-char-spec cell readable)
783                  (char-db-insert-char-reference cell readable))
784                (setq separator lbs))
785               (t
786                (if separator
787                    (insert separator))
788                (insert (prin1-to-string cell))
789                (setq separator " ")))
790         (setq value (cdr value)))
791       (insert ")")
792       (insert line-breaking)
793       (setq attributes (delq 'ideographic- attributes)))
794     (when (and (memq 'total-strokes attributes)
795                (setq value (get-char-attribute char 'total-strokes)))
796       (insert (format "(total-strokes       . %S)%s"
797                       value
798                       line-breaking))
799       (setq attributes (delq 'total-strokes attributes))
800       )
801     (when (and (memq '->ideograph attributes)
802                (setq value (get-char-attribute char '->ideograph)))
803       (insert (format "(->ideograph\t%s)%s"
804                       (mapconcat (lambda (code)
805                                    (cond ((symbolp code)
806                                           (symbol-name code))
807                                          ((integerp code)
808                                           (format "#x%04X" code))
809                                          (t
810                                           (format "%s %S"
811                                                   line-breaking code))))
812                                  value " ")
813                       line-breaking))
814       (setq attributes (delq '->ideograph attributes))
815       )
816     (when (and (memq '->decomposition attributes)
817                (setq value (get-char-attribute char '->decomposition)))
818       (insert (format "(->decomposition\t%s)%s"
819                       (mapconcat (lambda (code)
820                                    (cond ((symbolp code)
821                                           (symbol-name code))
822                                          ((characterp code)
823                                           (if readable
824                                               (format "%S" code)
825                                             (format "#x%04X"
826                                                     (char-int code))
827                                             ))
828                                          ((integerp code)
829                                           (format "#x%04X" code))
830                                          (t
831                                           (format "%s%S" line-breaking code))))
832                                  value " ")
833                       line-breaking))
834       (setq attributes (delq '->decomposition attributes))
835       )
836     (if (equal (get-char-attribute char '->titlecase)
837                (get-char-attribute char '->uppercase))
838         (setq attributes (delq '->titlecase attributes)))
839     (when (and (memq '->mojikyo attributes)
840                (setq value (get-char-attribute char '->mojikyo)))
841       (insert (format "(->mojikyo\t\t. %06d)\t; %c%s"
842                       value (decode-char 'mojikyo value)
843                       line-breaking))
844       (setq attributes (delq '->mojikyo attributes))
845       )
846     (when (and (memq 'hanyu-dazidian-vol attributes)
847                (setq value (get-char-attribute char 'hanyu-dazidian-vol)))
848       (insert (format "(hanyu-dazidian-vol  . %d)%s"
849                       value line-breaking))
850       (setq attributes (delq 'hanyu-dazidian-vol attributes))
851       )
852     (when (and (memq 'hanyu-dazidian-page attributes)
853                (setq value (get-char-attribute char 'hanyu-dazidian-page)))
854       (insert (format "(hanyu-dazidian-page . %d)%s"
855                       value line-breaking))
856       (setq attributes (delq 'hanyu-dazidian-page attributes))
857       )
858     (when (and (memq 'hanyu-dazidian-char attributes)
859                (setq value (get-char-attribute char 'hanyu-dazidian-char)))
860       (insert (format "(hanyu-dazidian-char . %d)%s"
861                       value line-breaking))
862       (setq attributes (delq 'hanyu-dazidian-char attributes))
863       )
864     (unless readable
865       (dolist (ignored '(composition
866                          ->denotational <-subsumptive ->ucs-unified
867                          ->ideographic-component-forms))
868         (setq attributes (delq ignored attributes))))
869     (while attributes
870       (setq name (car attributes))
871       (if (setq value (get-char-attribute char name))
872           (cond ((setq ret (find-charset name))
873                  (setq name (charset-name ret))
874                  (if (and (not (memq name dest-ccss))
875                           (prog1
876                               (setq value (get-char-attribute char name))
877                             (setq dest-ccss (cons name dest-ccss))))
878                      (char-db-insert-ccs-feature name value line-breaking))
879                  )
880                 ((string-match "^=>ucs@" (symbol-name name))
881                  (insert (format "(%-18s . #x%04X)\t; %c%s"
882                                  name value (decode-char '=ucs value)
883                                  line-breaking))
884                  )
885                 ((eq name 'jisx0208-1978/4X)
886                  (insert (format "(%-18s . #x%04X)%s"
887                                  name value
888                                  line-breaking))
889                  )
890                 ((and (not readable)
891                       (null (get-char-attribute
892                              char
893                              (intern (format "%s*sources" name))))
894                       (not (string-match "\\*sources$" (symbol-name name)))
895                       (or (eq name '<-identical)
896                           (string-match "^->halfwidth" (symbol-name name))
897                           (and
898                            (string-match "^->fullwidth" (symbol-name name))
899                            (not
900                             (and (consp value)
901                                  (characterp (car value))
902                                  (encode-char
903                                   (car value) '=ucs 'defined-only))))
904                           (string-match "^->simplified" (symbol-name name))
905                           (string-match "^->vulgar" (symbol-name name))
906                           (string-match "^->wrong" (symbol-name name))
907                           (string-match "^->same" (symbol-name name))
908                           (string-match "^->original" (symbol-name name))
909                           (string-match "^->ancient" (symbol-name name))
910                           ))
911                  )
912                 ((or (eq name 'ideographic-structure)
913                      (eq name 'ideographic-)
914                      (string-match "^\\(->\\|<-\\)" (symbol-name name)))
915                  (insert (format "(%-18s%s " name line-breaking))
916                  (setq lbs (concat "\n" (make-string (current-column) ?\ ))
917                        separator nil)
918                  (while (consp value)
919                    (setq cell (car value))
920                    (if (integerp cell)
921                        (setq cell (decode-char '=ucs cell)))
922                    (cond ((eq name '->subsumptive)
923                           (when (or (not
924                                      (some (lambda (atr)
925                                              (get-char-attribute cell atr))
926                                            char-db-ignored-attributes))
927                                     (some (lambda (ccs)
928                                             (encode-char cell ccs
929                                                          'defined-only))
930                                           ccss))
931                             (if separator
932                                 (insert lbs))
933                             (let ((char-db-ignored-attributes
934                                    (cons '<-subsumptive
935                                          char-db-ignored-attributes)))
936                               (insert-char-attributes cell readable))
937                             (setq separator lbs))
938                           )
939                          ((characterp cell)
940                           (setq sources
941                                 (get-char-attribute
942                                  char
943                                  (intern (format "%s*sources" name))))
944                           (setq required-features nil)
945                           (dolist (source sources)
946                             (cond
947                              ((memq source '(JP JP/Jouyou
948                                                 shinjigen-1))
949                               (setq required-features
950                                     (union required-features
951                                            '(=jis-x0208
952                                              =jis-x0208@1990
953                                              =jis-x0213-1-2000
954                                              =jis-x0213-2-2000
955                                              =jis-x0212
956                                              =jis-x0208@1983
957                                              =jis-x0208@1978))))
958                              ((eq source 'CN)
959                               (setq required-features
960                                     (union required-features
961                                            '(=gb2312
962                                              =gb12345
963                                              =iso-ir165)))))
964                             (cond
965                              ((find-charset
966                                (setq ret (intern (format "=%s" source))))
967                               (setq required-features
968                                     (cons ret required-features)))
969                              (t (setq required-features
970                                       (cons source required-features)))))
971                           (cond ((string-match "@JP" (symbol-name name))
972                                  (setq required-features
973                                        (union required-features
974                                               '(=jis-x0208
975                                                 =jis-x0208@1990
976                                                 =jis-x0213-1-2000
977                                                 =jis-x0213-2-2000
978                                                 =jis-x0212
979                                                 =jis-x0208@1983
980                                                 =jis-x0208@1978))))
981                                 ((string-match "@CN" (symbol-name name))
982                                  (setq required-features
983                                        (union required-features
984                                               '(=gb2312
985                                                 =gb12345
986                                                 =iso-ir165)))))
987                           (if separator
988                               (insert lbs))
989                           (if readable
990                               (insert (format "%S" cell))
991                             (char-db-insert-char-spec cell readable
992                                                       nil
993                                                       required-features))
994                           (setq separator lbs))
995                          ((consp cell)
996                           (if separator
997                               (insert lbs))
998                           (if (consp (car cell))
999                               (char-db-insert-char-spec cell readable)
1000                             (char-db-insert-char-reference cell readable))
1001                           (setq separator lbs))
1002                          (t
1003                           (if separator
1004                               (insert separator))
1005                           (insert (prin1-to-string cell))
1006                           (setq separator " ")))
1007                    (setq value (cdr value)))
1008                  (insert ")")
1009                  (insert line-breaking))
1010                 ((memq name '(ideograph=
1011                               original-ideograph-of
1012                               ancient-ideograph-of
1013                               vulgar-ideograph-of
1014                               wrong-ideograph-of
1015                               ;; simplified-ideograph-of
1016                               ideographic-variants
1017                               ;; ideographic-different-form-of
1018                               ))
1019                  (insert (format "(%-18s%s " name line-breaking))
1020                  (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1021                        separator nil)
1022                  (while (consp value)
1023                    (setq cell (car value))
1024                    (if (and (consp cell)
1025                             (consp (car cell)))
1026                        (progn
1027                          (if separator
1028                              (insert lbs))
1029                          (char-db-insert-alist cell readable)
1030                          (setq separator lbs))
1031                      (if separator
1032                          (insert separator))
1033                      (insert (prin1-to-string cell))
1034                      (setq separator " "))
1035                    (setq value (cdr value)))
1036                  (insert ")")
1037                  (insert line-breaking))
1038                 ((consp value)
1039                  (insert (format "(%-18s " name))
1040                  (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1041                        separator nil)
1042                  (while (consp value)
1043                    (setq cell (car value))
1044                    (if (and (consp cell)
1045                             (consp (car cell))
1046                             (setq ret (condition-case nil
1047                                           (find-char cell)
1048                                         (error nil))))
1049                        (progn
1050                          (setq rest cell
1051                                al nil
1052                                cal nil)
1053                          (while rest
1054                            (setq key (car (car rest)))
1055                            (if (find-charset key)
1056                                (setq cal (cons key cal))
1057                              (setq al (cons key al)))
1058                            (setq rest (cdr rest)))
1059                          (if separator
1060                              (insert lbs))
1061                          (insert-char-attributes ret
1062                                                  readable
1063                                                  al cal)
1064                          (setq separator lbs))
1065                      (setq ret (prin1-to-string cell))
1066                      (if separator
1067                          (if (< (+ (current-column)
1068                                    (length ret)
1069                                    (length separator))
1070                                 76)
1071                              (insert separator)
1072                            (insert lbs)))
1073                      (insert ret)
1074                      (setq separator " "))
1075                    (setq value (cdr value)))
1076                  (insert ")")
1077                  (insert line-breaking))
1078                 (t
1079                  (insert (format "(%-18s . %S)%s"
1080                                  name value
1081                                  line-breaking)))
1082                 ))
1083       (setq attributes (cdr attributes)))
1084     (insert ")")))
1085
1086 (defun insert-char-data (char &optional readable
1087                               attributes)
1088   (save-restriction
1089     (narrow-to-region (point)(point))
1090     (insert "(define-char
1091   '")
1092     (insert-char-attributes char readable attributes)
1093     (insert ")\n")
1094     (goto-char (point-min))
1095     (while (re-search-forward "[ \t]+$" nil t)
1096       (replace-match ""))
1097     ;; from tabify.
1098     (goto-char (point-min))
1099     (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
1100       (let ((column (current-column))
1101             (indent-tabs-mode t))
1102         (delete-region (match-beginning 0) (point))
1103         (indent-to column)))
1104     (goto-char (point-max))
1105     ;; (tabify (point-min)(point-max))
1106     ))
1107
1108 (defun insert-char-data-with-variant (char &optional printable
1109                                            no-ucs-unified
1110                                            script excluded-script)
1111   (insert-char-data char printable)
1112   (let ((variants (char-variants char))
1113         rest
1114         variant vs ret)
1115     (setq variants (sort variants #'<))
1116     (setq rest variants)
1117     (setq variants (cons char variants))
1118     (while rest
1119       (setq variant (car rest))
1120       (unless (get-char-attribute variant '<-subsumptive)
1121         (if (and (or (null script)
1122                      (null (setq vs (get-char-attribute variant 'script)))
1123                      (memq script vs))
1124                  (or (null excluded-script)
1125                      (null (setq vs (get-char-attribute variant 'script)))
1126                      (not (memq excluded-script vs))))
1127             (unless (and no-ucs-unified (get-char-attribute variant '=ucs))
1128               (insert-char-data variant printable)
1129               (if (setq ret (char-variants variant))
1130                   (while ret
1131                     (or (memq (car ret) variants)
1132                         ;; (get-char-attribute (car ret) '<-subsumptive)
1133                         (setq rest (nconc rest (list (car ret)))))
1134                     (setq ret (cdr ret)))))))
1135       (setq rest (cdr rest)))))
1136
1137 (defun insert-char-range-data (min max &optional script excluded-script)
1138   (let ((code min)
1139         char)
1140     (while (<= code max)
1141       (setq char (decode-char '=ucs code))
1142       (if (encode-char char '=ucs 'defined-only)
1143           (insert-char-data-with-variant char nil 'no-ucs-unified
1144                                          script excluded-script))
1145       (setq code (1+ code)))))
1146
1147 (defun write-char-range-data-to-file (min max file
1148                                           &optional script excluded-script)
1149   (let ((coding-system-for-write char-db-file-coding-system))
1150     (with-temp-buffer
1151       (insert (format ";; -*- coding: %s -*-\n"
1152                       char-db-file-coding-system))
1153       (insert-char-range-data min max script excluded-script)
1154       (write-region (point-min)(point-max) file))))
1155
1156 (defvar what-character-original-window-configuration)
1157
1158 ;;;###autoload
1159 (defun what-char-definition (char)
1160   (interactive (list (char-after)))
1161   (let ((buf (get-buffer-create "*Character Description*"))
1162         (the-buf (current-buffer))
1163         (win-conf (current-window-configuration)))
1164     (pop-to-buffer buf)
1165     (make-local-variable 'what-character-original-window-configuration)
1166     (setq what-character-original-window-configuration win-conf)
1167     (setq buffer-read-only nil)
1168     (erase-buffer)
1169     (condition-case err
1170         (progn
1171           (insert-char-data-with-variant char 'printable)
1172           (unless (char-attribute-alist char)
1173             (insert (format ";; = %c\n"
1174                             (let* ((rest (split-char char))
1175                                    (ccs (pop rest))
1176                                    (code (pop rest)))
1177                               (while rest
1178                                 (setq code (logior (lsh code 8)
1179                                                    (pop rest))))
1180                               (decode-char ccs code)))))
1181           ;; (char-db-update-comment)
1182           (set-buffer-modified-p nil)
1183           (view-mode the-buf (lambda (buf)
1184                                (set-window-configuration
1185                                 what-character-original-window-configuration)
1186                                ))
1187           (goto-char (point-min)))
1188       (error (progn
1189                (set-window-configuration
1190                 what-character-original-window-configuration)
1191                (signal (car err) (cdr err)))))))
1192
1193 (provide 'char-db-util)
1194
1195 ;;; char-db-util.el ends here