17049aa0e7004b2a6b43e825877eb51aa3709905
[chise/xemacs-chise.git.1] / lisp / utf-2000 / char-db-util.el
1 ;;; char-db-util.el --- Character Database utility -*- coding: utf-8-er; -*-
2
3 ;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004,2005 MORIOKA Tomohiko.
4
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
7
8 ;; This file is part of XEmacs CHISE.
9
10 ;; XEmacs CHISE is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
14
15 ;; XEmacs CHISE is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 ;; General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;;; Code:
26
27 (require 'alist)
28
29 (defconst unidata-normative-category-alist
30   '(("Lu" letter        uppercase)
31     ("Ll" letter        lowercase)
32     ("Lt" letter        titlecase)
33     ("Mn" mark          non-spacing)
34     ("Mc" mark          spacing-combining)
35     ("Me" mark          enclosing)
36     ("Nd" number        decimal-digit)
37     ("Nl" number        letter)
38     ("No" number        other)
39     ("Zs" separator     space)
40     ("Zl" separator     line)
41     ("Zp" separator     paragraph)
42     ("Cc" other         control)
43     ("Cf" other         format)
44     ("Cs" other         surrogate)
45     ("Co" other         private-use)
46     ("Cn" other         not-assigned)))
47
48 (defconst unidata-informative-category-alist
49   '(("Lm" letter        modifier)
50     ("Lo" letter        other)
51     ("Pc" punctuation   connector)
52     ("Pd" punctuation   dash)
53     ("Ps" punctuation   open)
54     ("Pe" punctuation   close)
55     ("Pi" punctuation   initial-quote)
56     ("Pf" punctuation   final-quote)
57     ("Po" punctuation   other)
58     ("Sm" symbol        math)
59     ("Sc" symbol        currency)
60     ("Sk" symbol        modifier)
61     ("So" symbol        other)
62     ))
63
64 (defconst ideographic-radicals
65   (let ((v (make-vector 215 nil))
66         (i 1))
67     (while (< i 215)
68       (aset v i (decode-char '=ucs (+ #x2EFF i)))
69       (setq i (1+ i)))
70     v))
71
72 (defun ideographic-radical (number)
73   (aref ideographic-radicals number))
74
75 (defconst shuowen-radicals
76   [?一 ?上 ?示 ?三 ?王 ?玉 ?玨 ?气 ?士 ?丨 ?屮 ?艸 ?茻])
77
78 (defun shuowen-radical (number)
79   (aref shuowen-radicals (1- number)))
80
81 (defvar char-db-file-coding-system 'utf-8-mcs-er)
82
83 (defvar char-db-feature-domains
84   '(ucs daikanwa cns gt jis jis/alt jis/a jis/b
85         jis-x0212 jis-x0213 cdp shinjigen misc unknown))
86
87 (defvar char-db-ignored-attributes '(ideographic-products))
88
89 (defun char-attribute-name< (ka kb)
90   (cond
91    ((eq '->denotational kb)
92     t)
93    ((eq '->subsumptive kb)
94     (not (eq '->denotational ka)))
95    ((eq '->denotational ka)
96     nil)
97    ((eq '->subsumptive ka)
98     nil)
99    ((and (symbolp ka)
100          (string-match "^->" (symbol-name ka)))
101     (cond ((and (symbolp kb)
102                 (string-match "^->" (symbol-name kb)))
103            (string< (symbol-name ka)
104                     (symbol-name kb))
105            ))
106     )
107    ((and (symbolp kb)
108          (string-match "^->" (symbol-name kb)))
109     t)
110    ((find-charset ka)
111     (if (find-charset kb)
112         (if (<= (charset-id ka) 1)
113             (if (<= (charset-id kb) 1)
114                 (cond
115                  ((= (charset-dimension ka)
116                      (charset-dimension kb))
117                   (> (charset-id ka)(charset-id kb)))
118                  (t
119                   (> (charset-dimension ka)
120                      (charset-dimension kb))
121                   ))
122               t)
123           (if (<= (charset-id kb) 1)
124               nil
125             (< (charset-id ka)(charset-id kb))))
126       nil))
127    ((find-charset kb)
128     t)
129    ((symbolp ka)
130     (cond ((symbolp kb)
131            (string< (symbol-name ka)
132                     (symbol-name kb)))
133           (t)))
134    ((symbolp kb)
135     nil)))
136
137 (defvar char-db-coded-charset-priority-list
138   '(ascii
139     control-1
140     latin-iso8859-1
141     latin-iso8859-2
142     latin-iso8859-3
143     latin-iso8859-4
144     latin-iso8859-9
145     latin-jisx0201
146     cyrillic-iso8859-5
147     greek-iso8859-7
148     thai-tis620
149     =jis-x0208
150     =jis-x0208@1978
151     =jis-x0208@1983
152     japanese-jisx0212
153     chinese-gb2312
154     chinese-cns11643-1
155     chinese-cns11643-2
156     chinese-cns11643-3
157     chinese-cns11643-4
158     chinese-cns11643-5
159     chinese-cns11643-6
160     chinese-cns11643-7
161     =jis-x0208@1990
162     =jis-x0213-1-2000
163     =jis-x0213-2-2000
164     korean-ksc5601
165     chinese-isoir165
166     katakana-jisx0201
167     hebrew-iso8859-8
168     chinese-gb12345
169     latin-viscii
170     ethiopic-ucs
171     =big5-cdp
172     =gt
173     ideograph-daikanwa-2
174     ideograph-daikanwa
175     =cbeta
176     ideograph-hanziku-1
177     ideograph-hanziku-2
178     ideograph-hanziku-3
179     ideograph-hanziku-4
180     ideograph-hanziku-5
181     ideograph-hanziku-6
182     ideograph-hanziku-7
183     ideograph-hanziku-8
184     ideograph-hanziku-9
185     ideograph-hanziku-10
186     ideograph-hanziku-11
187     ideograph-hanziku-12
188     =gt-k
189     =ucs@iso
190     =ucs@unicode
191     =big5
192     =big5-eten
193     =jis-x0208@1997
194     =zinbun-oracle
195     =jef-china3))
196
197 (defun char-db-make-char-spec (char)
198   (let (ret char-spec)
199     (cond ((characterp char)
200            (cond ((and (setq ret (encode-char char '=ucs 'defined-only))
201                        (not (and (<= #xE000 ret)(<= ret #xF8FF))))
202                   (setq char-spec (list (cons '=ucs ret)))
203                   (cond ((setq ret (get-char-attribute char 'name))
204                          (setq char-spec (cons (cons 'name ret) char-spec))
205                          )
206                         ((setq ret (get-char-attribute char 'name*))
207                          (setq char-spec (cons (cons 'name* ret) char-spec))
208                          ))
209                   )
210                  ((setq ret
211                         (catch 'tag
212                           (let ((rest char-db-coded-charset-priority-list)
213                                 ccs)
214                             (while rest
215                               (setq ccs (charset-name
216                                          (find-charset (car rest))))
217                               (if (setq ret
218                                         (encode-char char ccs
219                                                      'defined-only))
220                                   (throw 'tag (cons ccs ret)))
221                               (setq rest (cdr rest))))))
222                   (setq char-spec (list ret))
223                   (dolist (ccs (delq (car ret) (charset-list)))
224                     (if (and (or (charset-iso-final-char ccs)
225                                  (memq ccs
226                                        '(=daikanwa
227                                          =daikanwa@rev2
228                                          ;; =gt-k
229                                          )))
230                              (setq ccs (charset-name ccs))
231                              (null (assq ccs char-spec))
232                              (setq ret (encode-char char ccs 'defined-only)))
233                         (setq char-spec (cons (cons ccs ret) char-spec))))
234                   (if (null char-spec)
235                       (setq char-spec (split-char char)))
236                   (cond ((setq ret (get-char-attribute char 'name))
237                          (setq char-spec (cons (cons 'name ret) char-spec))
238                          )
239                         ((setq ret (get-char-attribute char 'name*))
240                          (setq char-spec (cons (cons 'name* ret) char-spec))
241                          ))
242                   ))
243            char-spec)
244           ((consp char)
245            char))))
246     
247 (defun char-db-insert-char-spec (char &optional readable column
248                                       required-features)
249   (unless column
250     (setq column (current-column)))
251   (let (char-spec temp-char)
252     (setq char-spec (char-db-make-char-spec char))
253     (unless (or (characterp char) ; char
254                 (condition-case nil
255                     (setq char (find-char char-spec))
256                   (error nil)))
257       ;; define temporary character
258       ;;   Current implementation is dirty.
259       (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
260                                          char-spec)))
261       (remove-char-attribute temp-char 'ideograph-daikanwa)
262       (setq char temp-char))
263     (insert-char-attributes char
264                             readable
265                             (union (mapcar #'car char-spec)
266                                    required-features))
267     (when temp-char
268       ;; undefine temporary character
269       ;;   Current implementation is dirty.
270       (setq char-spec (char-attribute-alist temp-char))
271       (while char-spec
272         (remove-char-attribute temp-char (car (car char-spec)))
273         (setq char-spec (cdr char-spec))))))
274
275 (defun char-db-insert-alist (alist &optional readable column)
276   (unless column
277     (setq column (current-column)))
278   (let ((line-breaking
279          (concat "\n" (make-string (1+ column) ?\ )))
280         name value
281         ret al ; cal
282         key
283         lbs cell rest separator)
284     (insert "(")
285     (while alist
286       (setq name (car (car alist))
287             value (cdr (car alist)))
288       (cond ((eq name 'char)
289              (insert "(char . ")
290              (if (setq ret (condition-case nil
291                                (find-char value)
292                              (error nil)))
293                  (progn
294                    (setq al nil
295                          ;; cal nil
296                          )
297                    (while value
298                      (setq key (car (car value)))
299                      ;; (if (find-charset key)
300                      ;;     (setq cal (cons key cal))
301                      (setq al (cons key al))
302                      ;; )
303                      (setq value (cdr value)))
304                    (insert-char-attributes ret
305                                            readable
306                                            (or al 'none) ; cal
307                                            ))
308                (insert (prin1-to-string value)))
309              (insert ")")
310              (insert line-breaking))
311             ((consp value)
312              (insert (format "(%-18s " name))
313              (setq lbs (concat "\n" (make-string (current-column) ?\ )))
314              (while (consp value)
315                (setq cell (car value))
316                (if (and (consp cell)
317                         (consp (car cell))
318                         (setq ret (condition-case nil
319                                       (find-char cell)
320                                     (error nil)))
321                         )
322                    (progn
323                      (setq rest cell
324                            al nil
325                            ;; cal nil
326                            )
327                      (while rest
328                        (setq key (car (car rest)))
329                        ;; (if (find-charset key)
330                        ;;     (setq cal (cons key cal))
331                        (setq al (cons key al))
332                        ;; )
333                        (setq rest (cdr rest)))
334                      (if separator
335                          (insert lbs))
336                      (insert-char-attributes ret
337                                              readable
338                                              al ; cal
339                                              )
340                      (setq separator lbs))
341                  (if separator
342                      (insert separator))
343                  (insert (prin1-to-string cell))
344                  (setq separator " "))
345                (setq value (cdr value)))
346              (insert ")")
347              (insert line-breaking))
348             (t
349              (insert (format "(%-18s . %S)%s"
350                              name value
351                              line-breaking))))
352       (setq alist (cdr alist))))
353   (insert ")"))
354
355 (defun char-db-insert-char-reference (plist &optional readable column)
356   (unless column
357     (setq column (current-column)))
358   (let ((line-breaking
359          (concat "\n" (make-string (1+ column) ?\ )))
360         (separator "")
361         name value)
362     (insert "(")
363     (while plist
364       (setq name (pop plist))
365       (setq value (pop plist))
366       (cond ((eq name :char)
367              (insert separator)
368              (insert ":char\t")
369              (cond ((numberp value)
370                     (setq value (decode-char '=ucs value)))
371                    ;; ((consp value)
372                    ;;  (setq value (or (find-char value)
373                    ;;                  value)))
374                    )
375              (char-db-insert-char-spec value readable)
376              (insert line-breaking)
377              (setq separator ""))
378             ((eq name :radical)
379              (insert (format "%s%s\t%d ; %c%s"
380                              separator
381                              name value
382                              (ideographic-radical value)
383                              line-breaking))
384              (setq separator ""))
385             (t
386              (insert (format "%s%s\t%S" separator name value))
387              (setq separator line-breaking)))
388       ))
389   (insert ")"))
390
391 (defun char-db-decode-isolated-char (ccs code-point)
392   (let (ret)
393     (setq ret
394           (cond ((eq ccs 'arabic-iso8859-6)
395                  (decode-char ccs code-point))
396                 ((and (memq ccs '(=gt-pj-1
397                                   =gt-pj-2
398                                   =gt-pj-3
399                                   =gt-pj-4
400                                   =gt-pj-5
401                                   =gt-pj-6
402                                   =gt-pj-7
403                                   =gt-pj-8
404                                   =gt-pj-9
405                                   =gt-pj-10
406                                   =gt-pj-11))
407                       (setq ret (decode-char ccs code-point))
408                       (setq ret (encode-char ret '=gt 'defined-only)))
409                  (decode-builtin-char '=gt ret))
410                 (t
411                  (decode-builtin-char ccs code-point))))
412     (cond ((and (<= 0 (char-int ret))
413                 (<= (char-int ret) #x1F))
414            (decode-char '=ucs (+ #x2400 (char-int ret))))
415           ((= (char-int ret) #x7F)
416            ?\u2421)
417           (t ret))))
418
419 (defvar char-db-convert-obsolete-format t)
420
421 (defun char-db-insert-ccs-feature (name value line-breaking)
422   (insert
423    (format
424     (cond ((memq name '(=daikanwa
425                         =daikanwa@rev1 =daikanwa@rev2
426                         =gt =gt-k =cbeta =zinbun-oracle))
427            "(%-18s . %05d)\t; %c")
428           ((eq name 'mojikyo)
429            "(%-18s . %06d)\t; %c")
430           ((>= (charset-dimension name) 2)
431            "(%-18s . #x%04X)\t; %c")
432           (t
433            "(%-18s . #x%02X)\t; %c"))
434     name
435     (if (= (charset-iso-graphic-plane name) 1)
436         (logior value
437                 (cond ((= (charset-dimension name) 1)
438                        #x80)
439                       ((= (charset-dimension name) 2)
440                        #x8080)
441                       ((= (charset-dimension name) 3)
442                        #x808080)
443                       (t 0)))
444       value)
445     (char-db-decode-isolated-char name value)))
446   (if (and (= (charset-chars name) 94)
447            (= (charset-dimension name) 2))
448       (insert (format " [%02d-%02d]"
449                       (- (lsh value -8) 32)
450                       (- (logand value 255) 32))))
451   (insert line-breaking))
452
453 (defun char-db-insert-relation-feature (char name value line-breaking
454                                              ccss readable)
455   (insert (format "(%-18s%s " name line-breaking))
456   (let ((lbs (concat "\n" (make-string (current-column) ?\ )))
457         separator cell sources required-features
458         ret)
459     (while (consp value)
460       (setq cell (car value))
461       (if (integerp cell)
462           (setq cell (decode-char '=ucs cell)))
463       (cond
464        ((eq name '->subsumptive)
465         (when (or (not (some (lambda (atr)
466                                (get-char-attribute cell atr))
467                              char-db-ignored-attributes))
468                   (some (lambda (ccs)
469                           (encode-char cell ccs 'defined-only))
470                         ccss))
471           (if separator
472               (insert lbs))
473           (let ((char-db-ignored-attributes
474                  (cons '<-subsumptive
475                        char-db-ignored-attributes)))
476             (insert-char-attributes cell readable))
477           (setq separator lbs))
478         )
479        ((characterp cell)
480         (setq sources
481               (get-char-attribute
482                char (intern (format "%s*sources" name))))
483         (setq required-features nil)
484         (dolist (source sources)
485           (cond
486            ((memq source '(JP JP/Jouyou shinjigen-1))
487             (setq required-features
488                   (union required-features
489                          '(=jis-x0208
490                            =jis-x0208@1990
491                            =jis-x0213-1-2000
492                            =jis-x0213-2-2000
493                            =jis-x0212
494                            =jis-x0208@1983
495                            =jis-x0208@1978))))
496            ((eq source 'CN)
497             (setq required-features
498                   (union required-features
499                          '(=gb2312
500                            =gb12345
501                            =iso-ir165)))))
502           (cond
503            ((find-charset (setq ret (intern (format "=%s" source))))
504             (setq required-features
505                   (cons ret required-features)))
506            (t (setq required-features
507                     (cons source required-features)))))
508         (cond ((string-match "@JP" (symbol-name name))
509                (setq required-features
510                      (union required-features
511                             '(=jis-x0208
512                               =jis-x0208@1990
513                               =jis-x0213-1-2000
514                               =jis-x0213-2-2000
515                               =jis-x0212
516                               =jis-x0208@1983
517                               =jis-x0208@1978))))
518               ((string-match "@CN" (symbol-name name))
519                (setq required-features
520                      (union required-features
521                             '(=gb2312
522                               =gb12345
523                               =iso-ir165)))))
524         (if separator
525             (insert lbs))
526         (if readable
527             (insert (format "%S" cell))
528           (char-db-insert-char-spec cell readable
529                                     nil
530                                     required-features))
531         (setq separator lbs))
532        ((consp cell)
533         (if separator
534             (insert lbs))
535         (if (consp (car cell))
536             (char-db-insert-char-spec cell readable)
537           (char-db-insert-char-reference cell readable))
538         (setq separator lbs))
539        (t
540         (if separator
541             (insert separator))
542         (insert (prin1-to-string cell))
543         (setq separator " ")))
544       (setq value (cdr value)))
545     (insert ")")
546     (insert line-breaking)))
547
548 (defun insert-char-attributes (char &optional readable attributes column)
549   (unless column
550     (setq column (current-column)))
551   (let (name value ; has-long-ccs-name
552         rest
553         radical strokes
554         (line-breaking
555          (concat "\n" (make-string (1+ column) ?\ )))
556         lbs cell separator ret
557         key al cal
558         dest-ccss ; sources required-features
559         ccss)
560     (let (atr-d)
561       (setq attributes
562             (sort (if attributes
563                       (if (consp attributes)
564                           (progn
565                             (dolist (name attributes)
566                               (unless (memq name char-db-ignored-attributes)
567                                 (if (find-charset name)
568                                     (push name ccss))
569                                 (push name atr-d)))
570                             atr-d))
571                     (dolist (name (char-attribute-list))
572                       (unless (memq name char-db-ignored-attributes)
573                         (if (find-charset name)
574                             (push name ccss))
575                         (push name atr-d)))
576                     atr-d)
577                   #'char-attribute-name<)))
578     (insert "(")
579     (when (memq '<-subsumptive attributes)
580       (when readable
581         (when (setq value (get-char-attribute char '<-subsumptive))
582           (char-db-insert-relation-feature char '<-subsumptive value
583                                            line-breaking
584                                            ccss readable)))
585       (setq attributes (delq '<-subsumptive attributes)))
586     (when (and (memq '<-denotational attributes)
587                (setq value (get-char-attribute char '<-denotational)))
588       (char-db-insert-relation-feature char '<-denotational value
589                                        line-breaking
590                                        ccss readable)
591       (setq attributes (delq '<-denotational attributes)))
592     (when (and (memq 'name attributes)
593                (setq value (get-char-attribute char 'name)))
594       (insert (format
595                (if (> (+ (current-column) (length value)) 48)
596                    "(name . %S)%s"
597                  "(name               . %S)%s")
598                value line-breaking))
599       (setq attributes (delq 'name attributes))
600       )
601     (when (and (memq 'name* attributes)
602                (setq value (get-char-attribute char 'name*)))
603       (insert (format
604                (if (> (+ (current-column) (length value)) 48)
605                    "(name* . %S)%s"
606                  "(name*              . %S)%s")
607                value line-breaking))
608       (setq attributes (delq 'name* attributes))
609       )
610     (when (and (memq 'script attributes)
611                (setq value (get-char-attribute char 'script)))
612       (insert (format "(script\t\t%s)%s"
613                       (mapconcat (function prin1-to-string)
614                                  value " ")
615                       line-breaking))
616       (setq attributes (delq 'script attributes))
617       )
618     (dolist (name '(=>ucs =>ucs*))
619       (when (and (memq name attributes)
620                  (setq value (get-char-attribute char name)))
621         (insert (format "(%-18s . #x%04X)\t; %c%s"
622                         name value (decode-char '=ucs value)
623                         line-breaking))
624         (setq attributes (delq name attributes))))
625     (dolist (name '(=>ucs@gb =>ucs@cns =>ucs@jis =>ucs@ks =>ucs@big5))
626       (when (and (memq name attributes)
627                  (setq value (get-char-attribute char name)))
628         (insert (format "(%-18s . #x%04X)\t; %c%s"
629                         name value
630                         (decode-char (intern
631                                       (concat "="
632                                               (substring
633                                                (symbol-name name) 2)))
634                                      value)
635                         line-breaking))
636         (setq attributes (delq name attributes))
637         ))
638     (dolist (name '(=>daikanwa))
639       (when (and (memq name attributes)
640                  (setq value (get-char-attribute char name)))
641         (insert
642          (if (integerp value)
643              (format "(%-18s . %05d)\t; %c%s"
644                      name value (decode-char '=daikanwa value)
645                      line-breaking)
646            (format "(%-18s %s)\t; %c%s"
647                    name
648                    (mapconcat (function prin1-to-string)
649                               value " ")
650                    (char-representative-of-daikanwa char)
651                    line-breaking)))
652         (setq attributes (delq name attributes))))
653     (when (and (memq 'general-category attributes)
654                (setq value (get-char-attribute char 'general-category)))
655       (insert (format
656                "(general-category\t%s) ; %s%s"
657                (mapconcat (lambda (cell)
658                             (format "%S" cell))
659                           value " ")
660                (cond ((rassoc value unidata-normative-category-alist)
661                       "Normative Category")
662                      ((rassoc value unidata-informative-category-alist)
663                       "Informative Category")
664                      (t
665                       "Unknown Category"))
666                line-breaking))
667       (setq attributes (delq 'general-category attributes))
668       )
669     (when (and (memq 'bidi-category attributes)
670                (setq value (get-char-attribute char 'bidi-category)))
671       (insert (format "(bidi-category\t. %S)%s"
672                       value
673                       line-breaking))
674       (setq attributes (delq 'bidi-category attributes))
675       )
676     (unless (or (not (memq 'mirrored attributes))
677                 (eq (setq value (get-char-attribute char 'mirrored 'empty))
678                     'empty))
679       (insert (format "(mirrored\t\t. %S)%s"
680                       value
681                       line-breaking))
682       (setq attributes (delq 'mirrored attributes))
683       )
684     (cond
685      ((and (memq 'decimal-digit-value attributes)
686            (setq value (get-char-attribute char 'decimal-digit-value)))
687       (insert (format "(decimal-digit-value . %S)%s"
688                       value
689                       line-breaking))
690       (setq attributes (delq 'decimal-digit-value attributes))
691       (when (and (memq 'digit-value attributes)
692                  (setq value (get-char-attribute char 'digit-value)))
693         (insert (format "(digit-value\t . %S)%s"
694                         value
695                         line-breaking))
696         (setq attributes (delq 'digit-value attributes))
697         )
698       (when (and (memq 'numeric-value attributes)
699                  (setq value (get-char-attribute char 'numeric-value)))
700         (insert (format "(numeric-value\t . %S)%s"
701                         value
702                         line-breaking))
703         (setq attributes (delq 'numeric-value attributes))
704         )
705       )
706      (t
707       (when (and (memq 'digit-value attributes)
708                  (setq value (get-char-attribute char 'digit-value)))
709         (insert (format "(digit-value\t. %S)%s"
710                         value
711                         line-breaking))
712         (setq attributes (delq 'digit-value attributes))
713         )
714       (when (and (memq 'numeric-value attributes)
715                  (setq value (get-char-attribute char 'numeric-value)))
716         (insert (format "(numeric-value\t. %S)%s"
717                         value
718                         line-breaking))
719         (setq attributes (delq 'numeric-value attributes))
720         )))
721     (when (and (memq 'iso-10646-comment attributes)
722                (setq value (get-char-attribute char 'iso-10646-comment)))
723       (insert (format "(iso-10646-comment\t. %S)%s"
724                       value
725                       line-breaking))
726       (setq attributes (delq 'iso-10646-comment attributes))
727       )
728     (when (and (memq 'morohashi-daikanwa attributes)
729                (setq value (get-char-attribute char 'morohashi-daikanwa)))
730       (insert (format "(morohashi-daikanwa\t%s)%s"
731                       (mapconcat (function prin1-to-string) value " ")
732                       line-breaking))
733       (setq attributes (delq 'morohashi-daikanwa attributes))
734       )
735     (setq radical nil
736           strokes nil)
737     (when (and (memq 'ideographic-radical attributes)
738                (setq value (get-char-attribute char 'ideographic-radical)))
739       (setq radical value)
740       (insert (format "(ideographic-radical . %S)\t; %c%s"
741                       radical
742                       (ideographic-radical radical)
743                       line-breaking))
744       (setq attributes (delq 'ideographic-radical attributes))
745       )
746     (when (and (memq 'shuowen-radical attributes)
747                (setq value (get-char-attribute char 'shuowen-radical)))
748       (insert (format "(shuowen-radical\t. %S)\t; %c%s"
749                       value
750                       (shuowen-radical value)
751                       line-breaking))
752       (setq attributes (delq 'shuowen-radical attributes))
753       )
754     (let (key)
755       (dolist (domain
756                (append
757                 char-db-feature-domains
758                 (let (dest domain)
759                   (dolist (feature (char-attribute-list))
760                     (setq feature (symbol-name feature))
761                     (when (string-match
762                            "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
763                            feature)
764                       (setq domain (intern (match-string 2 feature)))
765                      (unless (memq domain dest)
766                        (setq dest (cons domain dest)))))
767                   (sort dest #'string<))))
768         (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
769         (when (and (memq key attributes)
770                    (setq value (get-char-attribute char key)))
771           (setq radical value)
772           (insert (format "(%s . %S)\t; %c%s"
773                           key
774                           radical
775                           (ideographic-radical radical)
776                           line-breaking))
777           (setq attributes (delq key attributes))
778           )
779         (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
780         (when (and (memq key attributes)
781                    (setq value (get-char-attribute char key)))
782           (setq strokes value)
783           (insert (format "(%s . %S)%s"
784                           key
785                           strokes
786                           line-breaking))
787           (setq attributes (delq key attributes))
788           )
789         (setq key (intern (format "%s@%s" 'total-strokes domain)))
790         (when (and (memq key attributes)
791                    (setq value (get-char-attribute char key)))
792           (insert (format "(%s       . %S)%s"
793                           key
794                           value
795                           line-breaking))
796           (setq attributes (delq key attributes))
797           )
798         (dolist (feature '(ideographic-radical
799                            ideographic-strokes
800                            total-strokes))
801           (setq key (intern (format "%s@%s*sources" feature domain)))
802           (when (and (memq key attributes)
803                      (setq value (get-char-attribute char key)))
804             (insert (format "(%s%s" key line-breaking))
805             (dolist (cell value)
806               (insert (format " %s" cell)))
807             (insert ")")
808             (insert line-breaking)
809             (setq attributes (delq key attributes))
810             ))
811         ))
812     (when (and (memq 'ideographic-strokes attributes)
813                (setq value (get-char-attribute char 'ideographic-strokes)))
814       (setq strokes value)
815       (insert (format "(ideographic-strokes . %S)%s"
816                       strokes
817                       line-breaking))
818       (setq attributes (delq 'ideographic-strokes attributes))
819       )
820     (when (and (memq 'kangxi-radical attributes)
821                (setq value (get-char-attribute char 'kangxi-radical)))
822       (unless (eq value radical)
823         (insert (format "(kangxi-radical\t . %S)\t; %c%s"
824                         value
825                         (ideographic-radical value)
826                         line-breaking))
827         (or radical
828             (setq radical value)))
829       (setq attributes (delq 'kangxi-radical attributes))
830       )
831     (when (and (memq 'kangxi-strokes attributes)
832                (setq value (get-char-attribute char 'kangxi-strokes)))
833       (unless (eq value strokes)
834         (insert (format "(kangxi-strokes\t . %S)%s"
835                         value
836                         line-breaking))
837         (or strokes
838             (setq strokes value)))
839       (setq attributes (delq 'kangxi-strokes attributes))
840       )
841     (when (and (memq 'japanese-radical attributes)
842                (setq value (get-char-attribute char 'japanese-radical)))
843       (unless (eq value radical)
844         (insert (format "(japanese-radical\t . %S)\t; %c%s"
845                         value
846                         (ideographic-radical value)
847                         line-breaking))
848         (or radical
849             (setq radical value)))
850       (setq attributes (delq 'japanese-radical attributes))
851       )
852     (when (and (memq 'japanese-strokes attributes)
853                (setq value (get-char-attribute char 'japanese-strokes)))
854       (unless (eq value strokes)
855         (insert (format "(japanese-strokes\t . %S)%s"
856                         value
857                         line-breaking))
858         (or strokes
859             (setq strokes value)))
860       (setq attributes (delq 'japanese-strokes attributes))
861       )
862     (when (and (memq 'cns-radical attributes)
863                (setq value (get-char-attribute char 'cns-radical)))
864       (insert (format "(cns-radical\t . %S)\t; %c%s"
865                       value
866                       (ideographic-radical value)
867                       line-breaking))
868       (setq attributes (delq 'cns-radical attributes))
869       )
870     (when (and (memq 'cns-strokes attributes)
871                (setq value (get-char-attribute char 'cns-strokes)))
872       (unless (eq value strokes)
873         (insert (format "(cns-strokes\t . %S)%s"
874                         value
875                         line-breaking))
876         (or strokes
877             (setq strokes value)))
878       (setq attributes (delq 'cns-strokes attributes))
879       )
880     (when (and (memq 'shinjigen-1-radical attributes)
881                (setq value (get-char-attribute char 'shinjigen-1-radical)))
882       (unless (eq value radical)
883         (insert (format "(shinjigen-1-radical . %S)\t; %c%s"
884                         value
885                         (ideographic-radical value)
886                         line-breaking))
887         (or radical
888             (setq radical value)))
889       (setq attributes (delq 'shinjigen-1-radical attributes))
890       )
891     (when (and (memq 'ideographic- attributes)
892                (setq value (get-char-attribute char 'ideographic-)))
893       (insert "(ideographic-       ")
894       (setq lbs (concat "\n" (make-string (current-column) ?\ ))
895             separator nil)
896       (while (consp value)
897         (setq cell (car value))
898         (if (integerp cell)
899             (setq cell (decode-char '=ucs cell)))
900         (cond ((characterp cell)
901                (if separator
902                    (insert lbs))
903                (if readable
904                    (insert (format "%S" cell))
905                  (char-db-insert-char-spec cell readable))
906                (setq separator lbs))
907               ((consp cell)
908                (if separator
909                    (insert lbs))
910                (if (consp (car cell))
911                    (char-db-insert-char-spec cell readable)
912                  (char-db-insert-char-reference cell readable))
913                (setq separator lbs))
914               (t
915                (if separator
916                    (insert separator))
917                (insert (prin1-to-string cell))
918                (setq separator " ")))
919         (setq value (cdr value)))
920       (insert ")")
921       (insert line-breaking)
922       (setq attributes (delq 'ideographic- attributes)))
923     (when (and (memq 'total-strokes attributes)
924                (setq value (get-char-attribute char 'total-strokes)))
925       (insert (format "(total-strokes       . %S)%s"
926                       value
927                       line-breaking))
928       (setq attributes (delq 'total-strokes attributes))
929       )
930     (when (and (memq '->ideograph attributes)
931                (setq value (get-char-attribute char '->ideograph)))
932       (insert (format "(->ideograph\t%s)%s"
933                       (mapconcat (lambda (code)
934                                    (cond ((symbolp code)
935                                           (symbol-name code))
936                                          ((integerp code)
937                                           (format "#x%04X" code))
938                                          (t
939                                           (format "%s %S"
940                                                   line-breaking code))))
941                                  value " ")
942                       line-breaking))
943       (setq attributes (delq '->ideograph attributes))
944       )
945     ;; (when (and (memq '->decomposition attributes)
946     ;;            (setq value (get-char-attribute char '->decomposition)))
947     ;;   (insert (format "(->decomposition\t%s)%s"
948     ;;                   (mapconcat (lambda (code)
949     ;;                                (cond ((symbolp code)
950     ;;                                       (symbol-name code))
951     ;;                                      ((characterp code)
952     ;;                                       (if readable
953     ;;                                           (format "%S" code)
954     ;;                                         (format "#x%04X"
955     ;;                                                 (char-int code))
956     ;;                                         ))
957     ;;                                      ((integerp code)
958     ;;                                       (format "#x%04X" code))
959     ;;                                      (t
960     ;;                                       (format "%s%S" line-breaking code))))
961     ;;                              value " ")
962     ;;                   line-breaking))
963     ;;   (setq attributes (delq '->decomposition attributes))
964     ;;   )
965     (if (equal (get-char-attribute char '->titlecase)
966                (get-char-attribute char '->uppercase))
967         (setq attributes (delq '->titlecase attributes)))
968     (when (and (memq '->mojikyo attributes)
969                (setq value (get-char-attribute char '->mojikyo)))
970       (insert (format "(->mojikyo\t\t. %06d)\t; %c%s"
971                       value (decode-char 'mojikyo value)
972                       line-breaking))
973       (setq attributes (delq '->mojikyo attributes))
974       )
975     (when (and (memq 'hanyu-dazidian-vol attributes)
976                (setq value (get-char-attribute char 'hanyu-dazidian-vol)))
977       (insert (format "(hanyu-dazidian-vol  . %d)%s"
978                       value line-breaking))
979       (setq attributes (delq 'hanyu-dazidian-vol attributes))
980       )
981     (when (and (memq 'hanyu-dazidian-page attributes)
982                (setq value (get-char-attribute char 'hanyu-dazidian-page)))
983       (insert (format "(hanyu-dazidian-page . %d)%s"
984                       value line-breaking))
985       (setq attributes (delq 'hanyu-dazidian-page attributes))
986       )
987     (when (and (memq 'hanyu-dazidian-char attributes)
988                (setq value (get-char-attribute char 'hanyu-dazidian-char)))
989       (insert (format "(hanyu-dazidian-char . %d)%s"
990                       value line-breaking))
991       (setq attributes (delq 'hanyu-dazidian-char attributes))
992       )
993     (unless readable
994       (dolist (ignored '(composition
995                          ->denotational <-subsumptive ->ucs-unified
996                          ->ideographic-component-forms))
997         (setq attributes (delq ignored attributes))))
998     (while attributes
999       (setq name (car attributes))
1000       (if (setq value (get-char-attribute char name))
1001           (cond ((setq ret (find-charset name))
1002                  (setq name (charset-name ret))
1003                  (if (and (not (memq name dest-ccss))
1004                           (prog1
1005                               (setq value (get-char-attribute char name))
1006                             (setq dest-ccss (cons name dest-ccss))))
1007                      (char-db-insert-ccs-feature name value line-breaking))
1008                  )
1009                 ((string-match "^=>ucs@" (symbol-name name))
1010                  (insert (format "(%-18s . #x%04X)\t; %c%s"
1011                                  name value (decode-char '=ucs value)
1012                                  line-breaking))
1013                  )
1014                 ((eq name 'jisx0208-1978/4X)
1015                  (insert (format "(%-18s . #x%04X)%s"
1016                                  name value
1017                                  line-breaking))
1018                  )
1019                 ((and (not readable)
1020                       (null (get-char-attribute
1021                              char
1022                              (intern (format "%s*sources" name))))
1023                       (not (string-match "\\*sources$" (symbol-name name)))
1024                       (or (eq name '<-identical)
1025                           (eq name '<-canonical)
1026                           (eq name '->superscript)
1027                           (eq name '->subscript)
1028                           (eq name '->circled)
1029                           (string-match "^->font" (symbol-name name))
1030                           (string-match "^->compat" (symbol-name name))
1031                           (string-match "^->halfwidth" (symbol-name name))
1032                           (and
1033                            (string-match "^->fullwidth" (symbol-name name))
1034                            (not
1035                             (and (consp value)
1036                                  (characterp (car value))
1037                                  (encode-char
1038                                   (car value) '=ucs 'defined-only))))
1039                           (string-match "^->simplified" (symbol-name name))
1040                           (string-match "^->vulgar" (symbol-name name))
1041                           (string-match "^->wrong" (symbol-name name))
1042                           (string-match "^->same" (symbol-name name))
1043                           (string-match "^->formed" (symbol-name name))
1044                           (string-match "^->original" (symbol-name name))
1045                           (string-match "^->ancient" (symbol-name name))
1046                           (string-match "^->Oracle-Bones" (symbol-name name))
1047                           ))
1048                  )
1049                 ((or (eq name 'ideographic-structure)
1050                      (eq name 'ideographic-combination)
1051                      (eq name 'ideographic-)
1052                      (eq name '=decomposition)
1053                      (string-match "^=>decomposition" (symbol-name name))
1054                      (string-match "^\\(->\\|<-\\)" (symbol-name name)))
1055                  (char-db-insert-relation-feature char name value
1056                                                   line-breaking
1057                                                   ccss readable))
1058                 ((memq name '(ideograph=
1059                               original-ideograph-of
1060                               ancient-ideograph-of
1061                               vulgar-ideograph-of
1062                               wrong-ideograph-of
1063                               ;; simplified-ideograph-of
1064                               ideographic-variants
1065                               ;; ideographic-different-form-of
1066                               ))
1067                  (insert (format "(%-18s%s " name line-breaking))
1068                  (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1069                        separator nil)
1070                  (while (consp value)
1071                    (setq cell (car value))
1072                    (if (and (consp cell)
1073                             (consp (car cell)))
1074                        (progn
1075                          (if separator
1076                              (insert lbs))
1077                          (char-db-insert-alist cell readable)
1078                          (setq separator lbs))
1079                      (if separator
1080                          (insert separator))
1081                      (insert (prin1-to-string cell))
1082                      (setq separator " "))
1083                    (setq value (cdr value)))
1084                  (insert ")")
1085                  (insert line-breaking))
1086                 ((consp value)
1087                  (insert (format "(%-18s " name))
1088                  (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1089                        separator nil)
1090                  (while (consp value)
1091                    (setq cell (car value))
1092                    (if (and (consp cell)
1093                             (consp (car cell))
1094                             (setq ret (condition-case nil
1095                                           (find-char cell)
1096                                         (error nil))))
1097                        (progn
1098                          (setq rest cell
1099                                al nil
1100                                cal nil)
1101                          (while rest
1102                            (setq key (car (car rest)))
1103                            (if (find-charset key)
1104                                (setq cal (cons key cal))
1105                              (setq al (cons key al)))
1106                            (setq rest (cdr rest)))
1107                          (if separator
1108                              (insert lbs))
1109                          (insert-char-attributes ret
1110                                                  readable
1111                                                  al cal)
1112                          (setq separator lbs))
1113                      (setq ret (prin1-to-string cell))
1114                      (if separator
1115                          (if (< (+ (current-column)
1116                                    (length ret)
1117                                    (length separator))
1118                                 76)
1119                              (insert separator)
1120                            (insert lbs)))
1121                      (insert ret)
1122                      (setq separator " "))
1123                    (setq value (cdr value)))
1124                  (insert ")")
1125                  (insert line-breaking))
1126                 (t
1127                  (insert (format "(%-18s . %S)%s"
1128                                  name value
1129                                  line-breaking)))
1130                 ))
1131       (setq attributes (cdr attributes)))
1132     (insert ")")))
1133
1134 (defun insert-char-data (char &optional readable
1135                               attributes)
1136   (save-restriction
1137     (narrow-to-region (point)(point))
1138     (insert "(define-char
1139   '")
1140     (insert-char-attributes char readable attributes)
1141     (insert ")\n")
1142     (goto-char (point-min))
1143     (while (re-search-forward "[ \t]+$" nil t)
1144       (replace-match ""))
1145     ;; from tabify.
1146     (goto-char (point-min))
1147     (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
1148       (let ((column (current-column))
1149             (indent-tabs-mode t))
1150         (delete-region (match-beginning 0) (point))
1151         (indent-to column)))
1152     (goto-char (point-max))
1153     ;; (tabify (point-min)(point-max))
1154     ))
1155
1156 (defun insert-char-data-with-variant (char &optional printable
1157                                            no-ucs-unified
1158                                            script excluded-script)
1159   (insert-char-data char printable)
1160   (let ((variants (char-variants char))
1161         rest
1162         variant vs ret)
1163     (setq variants (sort variants #'<))
1164     (setq rest variants)
1165     (setq variants (cons char variants))
1166     (while rest
1167       (setq variant (car rest))
1168       (unless (get-char-attribute variant '<-subsumptive)
1169         (if (and (or (null script)
1170                      (null (setq vs (get-char-attribute variant 'script)))
1171                      (memq script vs))
1172                  (or (null excluded-script)
1173                      (null (setq vs (get-char-attribute variant 'script)))
1174                      (not (memq excluded-script vs))))
1175             (unless (and no-ucs-unified (get-char-attribute variant '=ucs))
1176               (insert-char-data variant printable)
1177               (if (setq ret (char-variants variant))
1178                   (while ret
1179                     (or (memq (car ret) variants)
1180                         ;; (get-char-attribute (car ret) '<-subsumptive)
1181                         (setq rest (nconc rest (list (car ret)))))
1182                     (setq ret (cdr ret)))))))
1183       (setq rest (cdr rest)))))
1184
1185 (defun insert-char-range-data (min max &optional script excluded-script)
1186   (let ((code min)
1187         char)
1188     (while (<= code max)
1189       (setq char (decode-char '=ucs code))
1190       (if (encode-char char '=ucs 'defined-only)
1191           (insert-char-data-with-variant char nil 'no-ucs-unified
1192                                          script excluded-script))
1193       (setq code (1+ code)))))
1194
1195 (defun write-char-range-data-to-file (min max file
1196                                           &optional script excluded-script)
1197   (let ((coding-system-for-write char-db-file-coding-system))
1198     (with-temp-buffer
1199       (insert (format ";; -*- coding: %s -*-\n"
1200                       char-db-file-coding-system))
1201       (insert-char-range-data min max script excluded-script)
1202       (write-region (point-min)(point-max) file))))
1203
1204 (defvar what-character-original-window-configuration)
1205
1206 ;;;###autoload
1207 (defun what-char-definition (char)
1208   (interactive (list (char-after)))
1209   (let ((buf (get-buffer-create "*Character Description*"))
1210         (the-buf (current-buffer))
1211         (win-conf (current-window-configuration)))
1212     (pop-to-buffer buf)
1213     (make-local-variable 'what-character-original-window-configuration)
1214     (setq what-character-original-window-configuration win-conf)
1215     (setq buffer-read-only nil)
1216     (erase-buffer)
1217     (condition-case err
1218         (progn
1219           (insert-char-data-with-variant char 'printable)
1220           (unless (char-attribute-alist char)
1221             (insert (format ";; = %c\n"
1222                             (let* ((rest (split-char char))
1223                                    (ccs (pop rest))
1224                                    (code (pop rest)))
1225                               (while rest
1226                                 (setq code (logior (lsh code 8)
1227                                                    (pop rest))))
1228                               (decode-char ccs code)))))
1229           ;; (char-db-update-comment)
1230           (set-buffer-modified-p nil)
1231           (view-mode the-buf (lambda (buf)
1232                                (set-window-configuration
1233                                 what-character-original-window-configuration)
1234                                ))
1235           (goto-char (point-min)))
1236       (error (progn
1237                (set-window-configuration
1238                 what-character-original-window-configuration)
1239                (signal (car err) (cdr err)))))))
1240
1241 (provide 'char-db-util)
1242
1243 ;;; char-db-util.el ends here