Sync up with r21-4-14-chise-0_21-25-si-same.
[chise/xemacs-chise.git.1] / lisp / utf-2000 / char-db-util.el
1 ;;; char-db-util.el --- Character Database utility
2
3 ;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004 MORIOKA Tomohiko.
4
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, Unicode, UCS-4, MULE.
7
8 ;; This file is part of XEmacs CHISE.
9
10 ;; XEmacs CHISE is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
14
15 ;; XEmacs CHISE is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 ;; General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;;; Code:
26
27 (require 'alist)
28
29 (defconst unidata-normative-category-alist
30   '(("Lu" letter        uppercase)
31     ("Ll" letter        lowercase)
32     ("Lt" letter        titlecase)
33     ("Mn" mark          non-spacing)
34     ("Mc" mark          spacing-combining)
35     ("Me" mark          enclosing)
36     ("Nd" number        decimal-digit)
37     ("Nl" number        letter)
38     ("No" number        other)
39     ("Zs" separator     space)
40     ("Zl" separator     line)
41     ("Zp" separator     paragraph)
42     ("Cc" other         control)
43     ("Cf" other         format)
44     ("Cs" other         surrogate)
45     ("Co" other         private-use)
46     ("Cn" other         not-assigned)))
47
48 (defconst unidata-informative-category-alist
49   '(("Lm" letter        modifier)
50     ("Lo" letter        other)
51     ("Pc" punctuation   connector)
52     ("Pd" punctuation   dash)
53     ("Ps" punctuation   open)
54     ("Pe" punctuation   close)
55     ("Pi" punctuation   initial-quote)
56     ("Pf" punctuation   final-quote)
57     ("Po" punctuation   other)
58     ("Sm" symbol        math)
59     ("Sc" symbol        currency)
60     ("Sk" symbol        modifier)
61     ("So" symbol        other)
62     ))
63
64 (defconst ideographic-radicals
65   (let ((v (make-vector 215 nil))
66         (i 1))
67     (while (< i 215)
68       (aset v i (decode-char '=ucs (+ #x2EFF i)))
69       (setq i (1+ i)))
70     v))
71
72 (defvar char-db-feature-domains
73   '(ucs daikanwa cns gt jis jis/alt jis/a jis/b
74         jis-x0213 misc unknown))
75
76 (defvar char-db-ignored-attributes nil)
77
78 (defun char-attribute-name< (ka kb)
79   (cond
80    ((eq '->denotational kb)
81     t)
82    ((eq '->subsumptive kb)
83     (not (eq '->denotational ka)))
84    ((eq '->denotational ka)
85     nil)
86    ((eq '->subsumptive ka)
87     nil)
88    ((find-charset ka)
89     (if (find-charset kb)
90         (if (<= (charset-id ka) 0)
91             (if (<= (charset-id kb) 0)
92                 (cond
93                  ((= (charset-dimension ka)
94                      (charset-dimension kb))
95                   (> (charset-id ka)(charset-id kb)))
96                  (t
97                   (> (charset-dimension ka)
98                      (charset-dimension kb))
99                   ))
100               t)
101           (if (<= (charset-id kb) 0)
102               nil
103             (< (charset-id ka)(charset-id kb))))
104       nil))
105    ((find-charset kb)
106     t)
107    ((symbolp ka)
108     (cond ((symbolp kb)
109            (string< (symbol-name ka)
110                     (symbol-name kb)))
111           (t)))
112    ((symbolp kb)
113     nil)))
114
115 (defvar char-db-coded-charset-priority-list
116   '(ascii
117     control-1
118     latin-iso8859-1
119     latin-iso8859-2
120     latin-iso8859-3
121     latin-iso8859-4
122     latin-iso8859-9
123     latin-jisx0201
124     cyrillic-iso8859-5
125     greek-iso8859-7
126     thai-tis620
127     =jis-x0208
128     japanese-jisx0208
129     japanese-jisx0212
130     japanese-jisx0208-1978
131     chinese-gb2312
132     chinese-cns11643-1
133     chinese-cns11643-2
134     chinese-cns11643-3
135     chinese-cns11643-4
136     chinese-cns11643-5
137     chinese-cns11643-6
138     chinese-cns11643-7
139     =jis-x0208-1990
140     =jis-x0213-1-2000
141     =jis-x0213-2-2000
142     korean-ksc5601
143     chinese-isoir165
144     katakana-jisx0201
145     hebrew-iso8859-8
146     chinese-gb12345
147     latin-viscii
148     ethiopic-ucs
149     =big5-cdp
150     =gt
151     ideograph-daikanwa-2
152     ideograph-daikanwa
153     =cbeta
154     ideograph-hanziku-1
155     ideograph-hanziku-2
156     ideograph-hanziku-3
157     ideograph-hanziku-4
158     ideograph-hanziku-5
159     ideograph-hanziku-6
160     ideograph-hanziku-7
161     ideograph-hanziku-8
162     ideograph-hanziku-9
163     ideograph-hanziku-10
164     ideograph-hanziku-11
165     ideograph-hanziku-12
166     =big5
167     =big5-eten
168     =gt-k
169     =jef-china3))
170
171 (defun char-db-make-char-spec (char)
172   (let (ret char-spec)
173     (cond ((characterp char)
174            (cond ((and (setq ret (encode-char char '=ucs 'defined-only))
175                        (not (and (<= #xE000 ret)(<= ret #xF8FF))))
176                   (setq char-spec (list (cons '=ucs ret)))
177                   (cond ((setq ret (get-char-attribute char 'name))
178                          (setq char-spec (cons (cons 'name ret) char-spec))
179                          )
180                         ((setq ret (get-char-attribute char 'name*))
181                          (setq char-spec (cons (cons 'name* ret) char-spec))
182                          ))
183                   )
184                  ((setq ret
185                         (catch 'tag
186                           (let ((rest char-db-coded-charset-priority-list)
187                                 ccs)
188                             (while rest
189                               (setq ccs (charset-name
190                                          (find-charset (car rest))))
191                               (if (setq ret
192                                         (encode-char char ccs
193                                                      'defined-only))
194                                   (throw 'tag (cons ccs ret)))
195                               (setq rest (cdr rest))))))
196                   (setq char-spec (list ret))
197                   (dolist (ccs (delq (car ret) (charset-list)))
198                     (if (and (or (charset-iso-final-char ccs)
199                                  (memq ccs
200                                        '(=daikanwa
201                                          =daikanwa-rev2
202                                          ;; =gt-k
203                                          )))
204                              (setq ret (encode-char char ccs 'defined-only)))
205                         (setq char-spec (cons (cons ccs ret) char-spec))))
206                   (if (null char-spec)
207                       (setq char-spec (split-char char)))
208                   (cond ((setq ret (get-char-attribute char 'name))
209                          (setq char-spec (cons (cons 'name ret) char-spec))
210                          )
211                         ((setq ret (get-char-attribute char 'name*))
212                          (setq char-spec (cons (cons 'name* ret) char-spec))
213                          ))
214                   ))
215            char-spec)
216           ((consp char)
217            char))))
218     
219 (defun char-db-insert-char-spec (char &optional readable column
220                                       required-features)
221   (unless column
222     (setq column (current-column)))
223   (let (char-spec temp-char)
224     (setq char-spec (char-db-make-char-spec char))
225     (unless (or (characterp char) ; char
226                 (condition-case nil
227                     (setq char (find-char char-spec))
228                   (error nil)))
229       ;; define temporary character
230       ;;   Current implementation is dirty.
231       (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
232                                          char-spec)))
233       (remove-char-attribute temp-char 'ideograph-daikanwa)
234       (setq char temp-char))
235     (insert-char-attributes char
236                             readable
237                             (union (mapcar #'car char-spec)
238                                    required-features))
239     (when temp-char
240       ;; undefine temporary character
241       ;;   Current implementation is dirty.
242       (setq char-spec (char-attribute-alist temp-char))
243       (while char-spec
244         (remove-char-attribute temp-char (car (car char-spec)))
245         (setq char-spec (cdr char-spec))))))
246
247 (defun char-db-insert-alist (alist &optional readable column)
248   (unless column
249     (setq column (current-column)))
250   (let ((line-breaking
251          (concat "\n" (make-string (1+ column) ?\ )))
252         name value
253         ret al ; cal
254         key
255         lbs cell rest separator)
256     (insert "(")
257     (while alist
258       (setq name (car (car alist))
259             value (cdr (car alist)))
260       (cond ((eq name 'char)
261              (insert "(char . ")
262              (if (setq ret (condition-case nil
263                                (find-char value)
264                              (error nil)))
265                  (progn
266                    (setq al nil
267                          ;; cal nil
268                          )
269                    (while value
270                      (setq key (car (car value)))
271                      ;; (if (find-charset key)
272                      ;;     (setq cal (cons key cal))
273                      (setq al (cons key al))
274                      ;; )
275                      (setq value (cdr value)))
276                    (insert-char-attributes ret
277                                            readable
278                                            (or al 'none) ; cal
279                                            ))
280                (insert (prin1-to-string value)))
281              (insert ")")
282              (insert line-breaking))
283             ((consp value)
284              (insert (format "(%-18s " name))
285              (setq lbs (concat "\n" (make-string (current-column) ?\ )))
286              (while (consp value)
287                (setq cell (car value))
288                (if (and (consp cell)
289                         (consp (car cell))
290                         (setq ret (condition-case nil
291                                       (find-char cell)
292                                     (error nil)))
293                         )
294                    (progn
295                      (setq rest cell
296                            al nil
297                            ;; cal nil
298                            )
299                      (while rest
300                        (setq key (car (car rest)))
301                        ;; (if (find-charset key)
302                        ;;     (setq cal (cons key cal))
303                        (setq al (cons key al))
304                        ;; )
305                        (setq rest (cdr rest)))
306                      (if separator
307                          (insert lbs))
308                      (insert-char-attributes ret
309                                              readable
310                                              al ; cal
311                                              )
312                      (setq separator lbs))
313                  (if separator
314                      (insert separator))
315                  (insert (prin1-to-string cell))
316                  (setq separator " "))
317                (setq value (cdr value)))
318              (insert ")")
319              (insert line-breaking))
320             (t
321              (insert (format "(%-18s . %S)%s"
322                              name value
323                              line-breaking))))
324       (setq alist (cdr alist))))
325   (insert ")"))
326
327 (defun char-db-insert-char-reference (plist &optional readable column)
328   (unless column
329     (setq column (current-column)))
330   (let ((line-breaking
331          (concat "\n" (make-string (1+ column) ?\ )))
332         (separator "")
333         name value)
334     (insert "(")
335     (while plist
336       (setq name (pop plist))
337       (setq value (pop plist))
338       (cond ((eq name :char)
339              (insert separator)
340              (insert ":char\t")
341              (cond ((numberp value)
342                     (setq value (decode-char '=ucs value)))
343                    ;; ((consp value)
344                    ;;  (setq value (or (find-char value)
345                    ;;                  value)))
346                    )
347              (char-db-insert-char-spec value readable)
348              (insert line-breaking)
349              (setq separator ""))
350             ((eq name :radical)
351              (insert (format "%s%s\t%d ; %c%s"
352                              separator
353                              name value
354                              (aref ideographic-radicals value)
355                              line-breaking))
356              (setq separator ""))
357             (t
358              (insert (format "%s%s\t%S" separator name value))
359              (setq separator line-breaking)))
360       ))
361   (insert ")"))
362
363 (defun char-db-decode-isolated-char (ccs code-point)
364   (let (ret)
365     (setq ret
366           (cond ((eq ccs 'arabic-iso8859-6)
367                  (decode-char ccs code-point))
368                 ((and (memq ccs '(=gt-pj-1
369                                   =gt-pj-2
370                                   =gt-pj-3
371                                   =gt-pj-4
372                                   =gt-pj-5
373                                   =gt-pj-6
374                                   =gt-pj-7
375                                   =gt-pj-8
376                                   =gt-pj-9
377                                   =gt-pj-10
378                                   =gt-pj-11))
379                       (setq ret (decode-char ccs code-point))
380                       (setq ret (encode-char ret '=gt 'defined-only)))
381                  (decode-builtin-char '=gt ret))
382                 (t
383                  (decode-builtin-char ccs code-point))))
384     (cond ((and (<= 0 (char-int ret))
385                 (<= (char-int ret) #x1F))
386            (decode-char '=ucs (+ #x2400 (char-int ret))))
387           ((= (char-int ret) #x7F)
388            ?\u2421)
389           (t ret))))
390
391 (defvar char-db-convert-obsolete-format t)
392
393 (defun insert-char-attributes (char &optional readable attributes column)
394   (unless column
395     (setq column (current-column)))
396   (let (name value has-long-ccs-name rest
397         radical strokes
398         (line-breaking
399          (concat "\n" (make-string (1+ column) ?\ )))
400         lbs cell separator ret
401         key al cal
402         dest-ccss
403         sources required-features
404         ccss)
405     (let (atr-d)
406       (setq attributes
407             (sort (if attributes
408                       (if (consp attributes)
409                           (progn
410                             (dolist (name attributes)
411                               (unless (memq name char-db-ignored-attributes)
412                                 (if (find-charset name)
413                                     (push name ccss))
414                                 (push name atr-d)))
415                             atr-d))
416                     (dolist (name (char-attribute-list))
417                       (unless (memq name char-db-ignored-attributes)
418                         (if (find-charset name)
419                             (push name ccss))
420                         (push name atr-d)))
421                     atr-d)
422                   #'char-attribute-name<)))
423     (insert "(")
424     (when (and (memq 'name attributes)
425                (setq value (get-char-attribute char 'name)))
426       (insert (format
427                (if (> (+ (current-column) (length value)) 48)
428                    "(name . %S)%s"
429                  "(name               . %S)%s")
430                value line-breaking))
431       (setq attributes (delq 'name attributes))
432       )
433     (when (and (memq 'name* attributes)
434                (setq value (get-char-attribute char 'name*)))
435       (insert (format
436                (if (> (+ (current-column) (length value)) 48)
437                    "(name* . %S)%s"
438                  "(name*              . %S)%s")
439                value line-breaking))
440       (setq attributes (delq 'name* attributes))
441       )
442     (when (and (memq 'script attributes)
443                (setq value (get-char-attribute char 'script)))
444       (insert (format "(script\t\t%s)%s"
445                       (mapconcat (function prin1-to-string)
446                                  value " ")
447                       line-breaking))
448       (setq attributes (delq 'script attributes))
449       )
450     ;; (when (and (memq '<-denotational attributes)
451     ;;            (setq value (get-char-attribute char '<-denotational))
452     ;;            (null (cdr value))
453     ;;            (setq value (encode-char (car value) 'ucs 'defined-only)))
454     ;;   (insert (format "(%-18s . #x%04X)\t; %c%s"
455     ;;                   '=>ucs value (decode-char 'ucs value)
456     ;;                   line-breaking))
457     ;;   (setq attributes (delq '<-denotational attributes)))
458     (dolist (name '(=>ucs =>ucs*))
459       (when (and (memq name attributes)
460                  (setq value (get-char-attribute char name)))
461         (insert (format "(%-18s . #x%04X)\t; %c%s"
462                         name value (decode-char '=ucs value)
463                         line-breaking))
464         (setq attributes (delq name attributes))))
465     (dolist (name '(=>ucs@gb =>ucs@cns =>ucs@jis =>ucs@ks =>ucs@big5))
466       (when (and (memq name attributes)
467                  (setq value (get-char-attribute char name)))
468         (insert (format "(%-18s . #x%04X)\t; %c%s"
469                         name value
470                         (decode-char (intern
471                                       (concat "="
472                                               (substring
473                                                (symbol-name name) 2)))
474                                      value)
475                         line-breaking))
476         (setq attributes (delq name attributes))
477         ))
478     ;; (dolist (name '(=>ucs-gb =>ucs-cns =>ucs-jis =>ucs-ks =>ucs-big5))
479     ;;   (when (and (memq name attributes)
480     ;;              (setq value (get-char-attribute char name)))
481     ;;     (insert (format "(%-18s . #x%04X)\t; %c%s"
482     ;;                     (intern
483     ;;                      (concat "=>ucs@"
484     ;;                              (substring (symbol-name name) 6)))
485     ;;                     value
486     ;;                     (decode-char (intern
487     ;;                                   (concat "=ucs@"
488     ;;                                           (substring
489     ;;                                            (symbol-name name) 6)))
490     ;;                                  value)
491     ;;                     line-breaking))
492     ;;     (setq attributes (delq name attributes))))
493     ;; (when (and (memq '->ucs attributes)
494     ;;            (setq value (get-char-attribute char '->ucs)))
495     ;;   (insert (format (if char-db-convert-obsolete-format
496     ;;                       "(=>ucs\t\t. #x%04X)\t; %c%s"
497     ;;                     "(->ucs\t\t. #x%04X)\t; %c%s")
498     ;;                   value (decode-char '=ucs value)
499     ;;                   line-breaking))
500     ;;   (setq attributes (delq '->ucs attributes))
501     ;;   )
502     (dolist (name '(=>daikanwa))
503       (when (and (memq name attributes)
504                  (setq value (get-char-attribute char name)))
505         (insert
506          (if (integerp value)
507              (format "(%-18s . %05d)\t; %c%s"
508                      name value (decode-char '=daikanwa value)
509                      line-breaking)
510            (format "(%-18s %s)\t; %c%s"
511                    name
512                    (mapconcat (function prin1-to-string)
513                               value " ")
514                    (char-representative-of-daikanwa char)
515                    line-breaking)))
516         (setq attributes (delq name attributes))))
517     (when (and (memq 'general-category attributes)
518                (setq value (get-char-attribute char 'general-category)))
519       (insert (format
520                "(general-category\t%s) ; %s%s"
521                (mapconcat (lambda (cell)
522                             (format "%S" cell))
523                           value " ")
524                (cond ((rassoc value unidata-normative-category-alist)
525                       "Normative Category")
526                      ((rassoc value unidata-informative-category-alist)
527                       "Informative Category")
528                      (t
529                       "Unknown Category"))
530                line-breaking))
531       (setq attributes (delq 'general-category attributes))
532       )
533     (when (and (memq 'bidi-category attributes)
534                (setq value (get-char-attribute char 'bidi-category)))
535       (insert (format "(bidi-category\t. %S)%s"
536                       value
537                       line-breaking))
538       (setq attributes (delq 'bidi-category attributes))
539       )
540     (unless (or (not (memq 'mirrored attributes))
541                 (eq (setq value (get-char-attribute char 'mirrored 'empty))
542                     'empty))
543       (insert (format "(mirrored\t\t. %S)%s"
544                       value
545                       line-breaking))
546       (setq attributes (delq 'mirrored attributes))
547       )
548     (cond
549      ((and (memq 'decimal-digit-value attributes)
550            (setq value (get-char-attribute char 'decimal-digit-value)))
551       (insert (format "(decimal-digit-value . %S)%s"
552                       value
553                       line-breaking))
554       (setq attributes (delq 'decimal-digit-value attributes))
555       (when (and (memq 'digit-value attributes)
556                  (setq value (get-char-attribute char 'digit-value)))
557         (insert (format "(digit-value\t . %S)%s"
558                         value
559                         line-breaking))
560         (setq attributes (delq 'digit-value attributes))
561         )
562       (when (and (memq 'numeric-value attributes)
563                  (setq value (get-char-attribute char 'numeric-value)))
564         (insert (format "(numeric-value\t . %S)%s"
565                         value
566                         line-breaking))
567         (setq attributes (delq 'numeric-value attributes))
568         )
569       )
570      (t
571       (when (and (memq 'digit-value attributes)
572                  (setq value (get-char-attribute char 'digit-value)))
573         (insert (format "(digit-value\t. %S)%s"
574                         value
575                         line-breaking))
576         (setq attributes (delq 'digit-value attributes))
577         )
578       (when (and (memq 'numeric-value attributes)
579                  (setq value (get-char-attribute char 'numeric-value)))
580         (insert (format "(numeric-value\t. %S)%s"
581                         value
582                         line-breaking))
583         (setq attributes (delq 'numeric-value attributes))
584         )))
585     (when (and (memq 'iso-10646-comment attributes)
586                (setq value (get-char-attribute char 'iso-10646-comment)))
587       (insert (format "(iso-10646-comment\t. %S)%s"
588                       value
589                       line-breaking))
590       (setq attributes (delq 'iso-10646-comment attributes))
591       )
592     (when (and (memq 'morohashi-daikanwa attributes)
593                (setq value (get-char-attribute char 'morohashi-daikanwa)))
594       (insert (format "(morohashi-daikanwa\t%s)%s"
595                       (mapconcat (function prin1-to-string) value " ")
596                       line-breaking))
597       (setq attributes (delq 'morohashi-daikanwa attributes))
598       )
599     (setq radical nil
600           strokes nil)
601     (when (and (memq 'ideographic-radical attributes)
602                (setq value (get-char-attribute char 'ideographic-radical)))
603       (setq radical value)
604       (insert (format "(ideographic-radical . %S)\t; %c%s"
605                       radical
606                       (aref ideographic-radicals radical)
607                       line-breaking))
608       (setq attributes (delq 'ideographic-radical attributes))
609       )
610     (let (key)
611       (dolist (domain char-db-feature-domains)
612         (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
613         (when (and (memq key attributes)
614                    (setq value (get-char-attribute char key)))
615           (setq radical value)
616           (insert (format "(%s . %S)\t; %c%s"
617                           key
618                           radical
619                           (aref ideographic-radicals radical)
620                           line-breaking))
621           (setq attributes (delq key attributes))
622           )
623         (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
624         (when (and (memq key attributes)
625                    (setq value (get-char-attribute char key)))
626           (setq strokes value)
627           (insert (format "(%s . %S)%s"
628                           key
629                           strokes
630                           line-breaking))
631           (setq attributes (delq key attributes))
632           )
633         (setq key (intern (format "%s@%s" 'total-strokes domain)))
634         (when (and (memq key attributes)
635                    (setq value (get-char-attribute char key)))
636           (insert (format "(%s       . %S)%s"
637                           key
638                           value
639                           line-breaking))
640           (setq attributes (delq key attributes))
641           )
642         (dolist (feature '(ideographic-radical
643                            ideographic-strokes
644                            total-strokes))
645           (setq key (intern (format "%s@%s*sources" feature domain)))
646           (when (and (memq key attributes)
647                      (setq value (get-char-attribute char key)))
648             (insert (format "(%s%s" key line-breaking))
649             (dolist (cell value)
650               (insert (format " %s" cell)))
651             (insert ")")
652             (insert line-breaking)
653             (setq attributes (delq key attributes))
654             ))
655         ))
656     (when (and (memq 'ideographic-strokes attributes)
657                (setq value (get-char-attribute char 'ideographic-strokes)))
658       (setq strokes value)
659       (insert (format "(ideographic-strokes . %S)%s"
660                       strokes
661                       line-breaking))
662       (setq attributes (delq 'ideographic-strokes attributes))
663       )
664     (when (and (memq 'kangxi-radical attributes)
665                (setq value (get-char-attribute char 'kangxi-radical)))
666       (unless (eq value radical)
667         (insert (format "(kangxi-radical\t . %S)\t; %c%s"
668                         value
669                         (aref ideographic-radicals value)
670                         line-breaking))
671         (or radical
672             (setq radical value)))
673       (setq attributes (delq 'kangxi-radical attributes))
674       )
675     (when (and (memq 'kangxi-strokes attributes)
676                (setq value (get-char-attribute char 'kangxi-strokes)))
677       (unless (eq value strokes)
678         (insert (format "(kangxi-strokes\t . %S)%s"
679                         value
680                         line-breaking))
681         (or strokes
682             (setq strokes value)))
683       (setq attributes (delq 'kangxi-strokes attributes))
684       )
685     (when (and (memq 'japanese-radical attributes)
686                (setq value (get-char-attribute char 'japanese-radical)))
687       (unless (eq value radical)
688         (insert (format "(japanese-radical\t . %S)\t; %c%s"
689                         value
690                         (aref ideographic-radicals value)
691                         line-breaking))
692         (or radical
693             (setq radical value)))
694       (setq attributes (delq 'japanese-radical attributes))
695       )
696     (when (and (memq 'japanese-strokes attributes)
697                (setq value (get-char-attribute char 'japanese-strokes)))
698       (unless (eq value strokes)
699         (insert (format "(japanese-strokes\t . %S)%s"
700                         value
701                         line-breaking))
702         (or strokes
703             (setq strokes value)))
704       (setq attributes (delq 'japanese-strokes attributes))
705       )
706     (when (and (memq 'cns-radical attributes)
707                (setq value (get-char-attribute char 'cns-radical)))
708       (insert (format "(cns-radical\t . %S)\t; %c%s"
709                       value
710                       (aref ideographic-radicals value)
711                       line-breaking))
712       (setq attributes (delq 'cns-radical attributes))
713       )
714     (when (and (memq 'cns-strokes attributes)
715                (setq value (get-char-attribute char 'cns-strokes)))
716       (unless (eq value strokes)
717         (insert (format "(cns-strokes\t . %S)%s"
718                         value
719                         line-breaking))
720         (or strokes
721             (setq strokes value)))
722       (setq attributes (delq 'cns-strokes attributes))
723       )
724     (when (and (memq 'shinjigen-1-radical attributes)
725                (setq value (get-char-attribute char 'shinjigen-1-radical)))
726       (unless (eq value radical)
727         (insert (format "(shinjigen-1-radical . %S)\t; %c%s"
728                         value
729                         (aref ideographic-radicals value)
730                         line-breaking))
731         (or radical
732             (setq radical value)))
733       (setq attributes (delq 'shinjigen-1-radical attributes))
734       )
735     (when (and (memq 'ideographic- attributes)
736                (setq value (get-char-attribute char 'ideographic-)))
737       (insert "(ideographic-       ")
738       (setq lbs (concat "\n" (make-string (current-column) ?\ ))
739             separator nil)
740       (while (consp value)
741         (setq cell (car value))
742         (if (integerp cell)
743             (setq cell (decode-char '=ucs cell)))
744         (cond ((characterp cell)
745                (if separator
746                    (insert lbs))
747                (if readable
748                    (insert (format "%S" cell))
749                  (char-db-insert-char-spec cell readable))
750                (setq separator lbs))
751               ((consp cell)
752                (if separator
753                    (insert lbs))
754                (if (consp (car cell))
755                    (char-db-insert-char-spec cell readable)
756                  (char-db-insert-char-reference cell readable))
757                (setq separator lbs))
758               (t
759                (if separator
760                    (insert separator))
761                (insert (prin1-to-string cell))
762                (setq separator " ")))
763         (setq value (cdr value)))
764       (insert ")")
765       (insert line-breaking)
766       (setq attributes (delq 'ideographic- attributes)))
767     (when (and (memq 'total-strokes attributes)
768                (setq value (get-char-attribute char 'total-strokes)))
769       (insert (format "(total-strokes       . %S)%s"
770                       value
771                       line-breaking))
772       (setq attributes (delq 'total-strokes attributes))
773       )
774     (when (and (memq '->ideograph attributes)
775                (setq value (get-char-attribute char '->ideograph)))
776       (insert (format "(->ideograph\t%s)%s"
777                       (mapconcat (lambda (code)
778                                    (cond ((symbolp code)
779                                           (symbol-name code))
780                                          ((integerp code)
781                                           (format "#x%04X" code))
782                                          (t
783                                           (format "%s %S"
784                                                   line-breaking code))))
785                                  value " ")
786                       line-breaking))
787       (setq attributes (delq '->ideograph attributes))
788       )
789     (when (and (memq '->decomposition attributes)
790                (setq value (get-char-attribute char '->decomposition)))
791       (insert (format "(->decomposition\t%s)%s"
792                       (mapconcat (lambda (code)
793                                    (cond ((symbolp code)
794                                           (symbol-name code))
795                                          ((characterp code)
796                                           (if readable
797                                               (format "%S" code)
798                                             (format "#x%04X"
799                                                     (char-int code))
800                                             ))
801                                          ((integerp code)
802                                           (format "#x%04X" code))
803                                          (t
804                                           (format "%s%S" line-breaking code))))
805                                  value " ")
806                       line-breaking))
807       (setq attributes (delq '->decomposition attributes))
808       )
809     (if (equal (get-char-attribute char '->titlecase)
810                (get-char-attribute char '->uppercase))
811         (setq attributes (delq '->titlecase attributes)))
812     (when (and (memq '->mojikyo attributes)
813                (setq value (get-char-attribute char '->mojikyo)))
814       (insert (format "(->mojikyo\t\t. %06d)\t; %c%s"
815                       value (decode-char 'mojikyo value)
816                       line-breaking))
817       (setq attributes (delq '->mojikyo attributes))
818       )
819     (when (and (memq 'hanyu-dazidian-vol attributes)
820                (setq value (get-char-attribute char 'hanyu-dazidian-vol)))
821       (insert (format "(hanyu-dazidian-vol  . %d)%s"
822                       value line-breaking))
823       (setq attributes (delq 'hanyu-dazidian-vol attributes))
824       )
825     (when (and (memq 'hanyu-dazidian-page attributes)
826                (setq value (get-char-attribute char 'hanyu-dazidian-page)))
827       (insert (format "(hanyu-dazidian-page . %d)%s"
828                       value line-breaking))
829       (setq attributes (delq 'hanyu-dazidian-page attributes))
830       )
831     (when (and (memq 'hanyu-dazidian-char attributes)
832                (setq value (get-char-attribute char 'hanyu-dazidian-char)))
833       (insert (format "(hanyu-dazidian-char . %d)%s"
834                       value line-breaking))
835       (setq attributes (delq 'hanyu-dazidian-char attributes))
836       )
837     (unless readable
838       (dolist (ignored '(composition
839                          ->denotational <-subsumptive ->ucs-unified
840                          ->ideographic-component-forms
841                          <-same))
842         (setq attributes (delq ignored attributes))))
843     ;; (setq rest ccs-attributes)
844     ;; (while (and rest
845     ;;             (progn
846     ;;               (setq value (get-char-attribute char (car rest)))
847     ;;               (if value
848     ;;                   (if (>= (length (symbol-name (car rest))) 19)
849     ;;                       (progn
850     ;;                         (setq has-long-ccs-name t)
851     ;;                         nil)
852     ;;                     t)
853     ;;                 t)))
854     ;;   (setq rest (cdr rest)))
855     (while attributes
856       (setq name (car attributes))
857       (if (setq value (get-char-attribute char name))
858           (cond ((setq ret (find-charset name))
859                  (setq name (charset-name ret))
860                  (if (and (not (memq name dest-ccss))
861                           (prog1
862                               (setq value (get-char-attribute char name))
863                             (setq dest-ccss (cons name dest-ccss))))
864                      (insert
865                       (format
866                        (cond ((memq name '(=daikanwa
867                                            =daikanwa-rev1 =daikanwa-rev2
868                                            =gt =gt-k =cbeta))
869                               (if has-long-ccs-name
870                                   "(%-26s . %05d)\t; %c%s"
871                                 "(%-18s . %05d)\t; %c%s"))
872                              ((eq name 'mojikyo)
873                               (if has-long-ccs-name
874                                   "(%-26s . %06d)\t; %c%s"
875                                 "(%-18s . %06d)\t; %c%s"))
876                              ((>= (charset-dimension name) 2)
877                               (if has-long-ccs-name
878                                   "(%-26s . #x%04X)\t; %c%s"
879                                 "(%-18s . #x%04X)\t; %c%s"))
880                              (t
881                               (if has-long-ccs-name
882                                   "(%-26s . #x%02X)\t; %c%s"
883                                 "(%-18s . #x%02X)\t; %c%s")))
884                        name
885                        (if (= (charset-iso-graphic-plane name) 1)
886                            (logior value
887                                    (cond ((= (charset-dimension name) 1)
888                                           #x80)
889                                          ((= (charset-dimension name) 2)
890                                           #x8080)
891                                          ((= (charset-dimension name) 3)
892                                           #x808080)
893                                          (t 0)))
894                          value)
895                        (char-db-decode-isolated-char name value)
896                        line-breaking)))
897                  )
898                 ((string-match "^=>ucs@" (symbol-name name))
899                  (insert (format "(%-18s . #x%04X)\t; %c%s"
900                                  name value (decode-char '=ucs value)
901                                  line-breaking))
902                  )
903                 ((eq name 'jisx0208-1978/4X)
904                  (insert (format "(%-18s . #x%04X)%s"
905                                  name value
906                                  line-breaking))
907                  )
908                 ((and (not readable)
909                       (or (eq name '<-identical)
910                           (string-match "^->simplified" (symbol-name name))))
911                  )
912                 ((or (eq name 'ideographic-structure)
913                      (eq name 'ideographic-)
914                      (string-match "^\\(->\\|<-\\)" (symbol-name name)))
915                  (insert (format "(%-18s%s " name line-breaking))
916                  (setq lbs (concat "\n" (make-string (current-column) ?\ ))
917                        separator nil)
918                  (while (consp value)
919                    (setq cell (car value))
920                    (if (integerp cell)
921                        (setq cell (decode-char '=ucs cell)))
922                    (cond ((eq name '->subsumptive)
923                           (when (or (not
924                                      (some (lambda (atr)
925                                              (get-char-attribute cell atr))
926                                            char-db-ignored-attributes))
927                                     (some (lambda (ccs)
928                                             (encode-char cell ccs
929                                                          'defined-only))
930                                           ccss))
931                             (if separator
932                                 (insert lbs))
933                             (let ((char-db-ignored-attributes
934                                    (cons '<-subsumptive
935                                          char-db-ignored-attributes)))
936                               (insert-char-attributes cell readable))
937                             (setq separator lbs))
938                           )
939                          ((characterp cell)
940                           (setq sources
941                                 (get-char-attribute
942                                  char
943                                  (intern (format "%s*sources" name))))
944                           (setq required-features nil)
945                           (dolist (source sources)
946                             (setq required-features
947                                   (cons
948                                    (if (find-charset
949                                         (setq ret (intern
950                                                    (format "=%s" source))))
951                                        ret
952                                      source)
953                                    required-features)))
954                           (when (string-match "@JP" (symbol-name name))
955                             (setq required-features
956                                   (union required-features
957                                          '(=jis-x0208
958                                            =jis-x0208-1990
959                                            =jis-x0213-1-2000
960                                            =jis-x0213-2-2000
961                                            =jis-x0212
962                                            =jis-x0208-1983
963                                            =jis-x0208-1978))))
964                           (if separator
965                               (insert lbs))
966                           (if readable
967                               (insert (format "%S" cell))
968                             (char-db-insert-char-spec cell readable
969                                                       nil
970                                                       required-features))
971                           (setq separator lbs))
972                          ((consp cell)
973                           (if separator
974                               (insert lbs))
975                           (if (consp (car cell))
976                               (char-db-insert-char-spec cell readable)
977                             (char-db-insert-char-reference cell readable))
978                           (setq separator lbs))
979                          (t
980                           (if separator
981                               (insert separator))
982                           (insert (prin1-to-string cell))
983                           (setq separator " ")))
984                    (setq value (cdr value)))
985                  (insert ")")
986                  (insert line-breaking))
987                 ((memq name '(ideograph=
988                               original-ideograph-of
989                               ancient-ideograph-of
990                               vulgar-ideograph-of
991                               wrong-ideograph-of
992                               ;; simplified-ideograph-of
993                               ideographic-variants
994                               ;; ideographic-different-form-of
995                               ))
996                  (insert (format "(%-18s%s " name line-breaking))
997                  (setq lbs (concat "\n" (make-string (current-column) ?\ ))
998                        separator nil)
999                  (while (consp value)
1000                    (setq cell (car value))
1001                    (if (and (consp cell)
1002                             (consp (car cell)))
1003                        (progn
1004                          (if separator
1005                              (insert lbs))
1006                          (char-db-insert-alist cell readable)
1007                          (setq separator lbs))
1008                      (if separator
1009                          (insert separator))
1010                      (insert (prin1-to-string cell))
1011                      (setq separator " "))
1012                    (setq value (cdr value)))
1013                  (insert ")")
1014                  (insert line-breaking))
1015                 ((consp value)
1016                  (insert (format "(%-18s " name))
1017                  (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1018                        separator nil)
1019                  (while (consp value)
1020                    (setq cell (car value))
1021                    (if (and (consp cell)
1022                             (consp (car cell))
1023                             (setq ret (condition-case nil
1024                                           (find-char cell)
1025                                         (error nil))))
1026                        (progn
1027                          (setq rest cell
1028                                al nil
1029                                cal nil)
1030                          (while rest
1031                            (setq key (car (car rest)))
1032                            (if (find-charset key)
1033                                (setq cal (cons key cal))
1034                              (setq al (cons key al)))
1035                            (setq rest (cdr rest)))
1036                          (if separator
1037                              (insert lbs))
1038                          (insert-char-attributes ret
1039                                                  readable
1040                                                  al cal)
1041                          (setq separator lbs))
1042                      (setq ret (prin1-to-string cell))
1043                      (if separator
1044                          (if (< (+ (current-column)
1045                                    (length ret)
1046                                    (length separator))
1047                                 76)
1048                              (insert separator)
1049                            (insert lbs)))
1050                      (insert ret)
1051                      (setq separator " "))
1052                    (setq value (cdr value)))
1053                  (insert ")")
1054                  (insert line-breaking))
1055                 (t
1056                  (insert (format "(%-18s . %S)%s"
1057                                  name value
1058                                  line-breaking)))
1059                 ))
1060       (setq attributes (cdr attributes)))
1061     ;; (while ccs-attributes
1062     ;;   (setq name (charset-name (car ccs-attributes)))
1063     ;;   (if (and (not (memq name dest-ccss))
1064     ;;            (prog1
1065     ;;                (setq value (get-char-attribute char name))
1066     ;;              (setq dest-ccss (cons name dest-ccss))))
1067     ;;       (insert
1068     ;;        (format
1069     ;;         (cond ((memq name '(=daikanwa
1070     ;;                             =daikanwa-rev1 =daikanwa-rev2
1071     ;;                             =gt =gt-k =cbeta))
1072     ;;                (if has-long-ccs-name
1073     ;;                    "(%-26s . %05d)\t; %c%s"
1074     ;;                  "(%-18s . %05d)\t; %c%s"))
1075     ;;               ((eq name 'mojikyo)
1076     ;;                (if has-long-ccs-name
1077     ;;                    "(%-26s . %06d)\t; %c%s"
1078     ;;                  "(%-18s . %06d)\t; %c%s"))
1079     ;;               ((>= (charset-dimension name) 2)
1080     ;;                (if has-long-ccs-name
1081     ;;                    "(%-26s . #x%04X)\t; %c%s"
1082     ;;                  "(%-18s . #x%04X)\t; %c%s"))
1083     ;;               (t
1084     ;;                (if has-long-ccs-name
1085     ;;                    "(%-26s . #x%02X)\t; %c%s"
1086     ;;                  "(%-18s . #x%02X)\t; %c%s")))
1087     ;;         name
1088     ;;         (if (= (charset-iso-graphic-plane name) 1)
1089     ;;             (logior value
1090     ;;                     (cond ((= (charset-dimension name) 1)
1091     ;;                            #x80)
1092     ;;                           ((= (charset-dimension name) 2)
1093     ;;                            #x8080)
1094     ;;                           ((= (charset-dimension name) 3)
1095     ;;                            #x808080)
1096     ;;                           (t 0)))
1097     ;;           value)
1098     ;;         (char-db-decode-isolated-char name value)
1099     ;;         line-breaking)))
1100     ;;   (setq ccs-attributes (cdr ccs-attributes)))
1101     (insert ")")))
1102
1103 (defun insert-char-data (char &optional readable
1104                               attributes)
1105   (save-restriction
1106     (narrow-to-region (point)(point))
1107     (insert "(define-char
1108   '")
1109     (insert-char-attributes char readable attributes)
1110     (insert ")\n")
1111     (goto-char (point-min))
1112     (while (re-search-forward "[ \t]+$" nil t)
1113       (replace-match ""))
1114     ;; from tabify.
1115     (goto-char (point-min))
1116     (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
1117       (let ((column (current-column))
1118             (indent-tabs-mode t))
1119         (delete-region (match-beginning 0) (point))
1120         (indent-to column)))
1121     (goto-char (point-max))
1122     ;; (tabify (point-min)(point-max))
1123     ))
1124
1125 (defun insert-char-data-with-variant (char &optional printable
1126                                            no-ucs-unified
1127                                            script excluded-script)
1128   (insert-char-data char printable)
1129   (let ((variants (or (char-variants char)
1130                       (let ((ucs (get-char-attribute char '->ucs)))
1131                         (if ucs
1132                             (delete char (char-variants (int-char ucs)))))))
1133         variant vs)
1134     (setq variants (sort variants #'<))
1135     (while variants
1136       (setq variant (car variants))
1137       (if (and (or (null script)
1138                    (null (setq vs (get-char-attribute variant 'script)))
1139                    (memq script vs))
1140                (or (null excluded-script)
1141                    (null (setq vs (get-char-attribute variant 'script)))
1142                    (not (memq excluded-script vs))))
1143           (or (and no-ucs-unified (get-char-attribute variant '=ucs))
1144               (insert-char-data variant printable)))
1145       (setq variants (cdr variants))
1146       )))
1147
1148 (defun insert-char-range-data (min max &optional script excluded-script)
1149   (let ((code min)
1150         char)
1151     (while (<= code max)
1152       (setq char (decode-char '=ucs code))
1153       (if (encode-char char '=ucs 'defined-only)
1154           (insert-char-data-with-variant char nil 'no-ucs-unified
1155                                          script excluded-script))
1156       (setq code (1+ code)))))
1157
1158 (defun write-char-range-data-to-file (min max file
1159                                           &optional script excluded-script)
1160   (let ((coding-system-for-write 'utf-8-mcs))
1161     (with-temp-buffer
1162       (insert ";; -*- coding: utf-8-mcs -*-\n")
1163       (insert-char-range-data min max script excluded-script)
1164       (write-region (point-min)(point-max) file))))
1165
1166 (defvar what-character-original-window-configuration)
1167
1168 ;;;###autoload
1169 (defun what-char-definition (char)
1170   (interactive (list (char-after)))
1171   (let ((buf (get-buffer-create "*Character Description*"))
1172         (the-buf (current-buffer))
1173         (win-conf (current-window-configuration)))
1174     (pop-to-buffer buf)
1175     (make-local-variable 'what-character-original-window-configuration)
1176     (setq what-character-original-window-configuration win-conf)
1177     (setq buffer-read-only nil)
1178     (erase-buffer)
1179     (condition-case err
1180         (progn
1181           (insert-char-data-with-variant char 'printable)
1182           (unless (char-attribute-alist char)
1183             (insert (format ";; = %c\n"
1184                             (let* ((rest (split-char char))
1185                                    (ccs (pop rest))
1186                                    (code (pop rest)))
1187                               (while rest
1188                                 (setq code (logior (lsh code 8)
1189                                                    (pop rest))))
1190                               (decode-char ccs code)))))
1191           ;; (char-db-update-comment)
1192           (set-buffer-modified-p nil)
1193           (view-mode the-buf (lambda (buf)
1194                                (set-window-configuration
1195                                 what-character-original-window-configuration)
1196                                ))
1197           (goto-char (point-min)))
1198       (error (progn
1199                (set-window-configuration
1200                 what-character-original-window-configuration)
1201                (signal (car err) (cdr err)))))))
1202
1203 (provide 'char-db-util)
1204
1205 ;;; char-db-util.el ends here