7e9a87defcaa8805554890568680b47c94100b5d
[chise/xemacs-chise.git] / lisp / utf-2000 / char-db-util.el
1 ;;; char-db-util.el --- Character Database utility -*- coding: utf-8-er; -*-
2
3 ;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004,2005 MORIOKA Tomohiko.
4
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
7
8 ;; This file is part of XEmacs CHISE.
9
10 ;; XEmacs CHISE is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
14
15 ;; XEmacs CHISE is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 ;; General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;;; Code:
26
27 (require 'alist)
28
29 (defconst unidata-normative-category-alist
30   '(("Lu" letter        uppercase)
31     ("Ll" letter        lowercase)
32     ("Lt" letter        titlecase)
33     ("Mn" mark          non-spacing)
34     ("Mc" mark          spacing-combining)
35     ("Me" mark          enclosing)
36     ("Nd" number        decimal-digit)
37     ("Nl" number        letter)
38     ("No" number        other)
39     ("Zs" separator     space)
40     ("Zl" separator     line)
41     ("Zp" separator     paragraph)
42     ("Cc" other         control)
43     ("Cf" other         format)
44     ("Cs" other         surrogate)
45     ("Co" other         private-use)
46     ("Cn" other         not-assigned)))
47
48 (defconst unidata-informative-category-alist
49   '(("Lm" letter        modifier)
50     ("Lo" letter        other)
51     ("Pc" punctuation   connector)
52     ("Pd" punctuation   dash)
53     ("Ps" punctuation   open)
54     ("Pe" punctuation   close)
55     ("Pi" punctuation   initial-quote)
56     ("Pf" punctuation   final-quote)
57     ("Po" punctuation   other)
58     ("Sm" symbol        math)
59     ("Sc" symbol        currency)
60     ("Sk" symbol        modifier)
61     ("So" symbol        other)
62     ))
63
64 (defconst ideographic-radicals
65   (let ((v (make-vector 215 nil))
66         (i 1))
67     (while (< i 215)
68       (aset v i (decode-char '=ucs (+ #x2EFF i)))
69       (setq i (1+ i)))
70     v))
71
72 (defun ideographic-radical (number)
73   (aref ideographic-radicals number))
74
75 (defconst shuowen-radicals
76   [?一 ?上 ?示 ?三 ?王 ?玉 ?玨 ?气 ?士 ?丨 ?屮 ?艸 ?茻])
77
78 (defun shuowen-radical (number)
79   (aref shuowen-radicals (1- number)))
80
81 (defvar char-db-file-coding-system 'utf-8-mcs-er)
82
83 (defvar char-db-feature-domains
84   '(ucs daikanwa cns gt jis jis/alt jis/a jis/b
85         jis-x0212 jis-x0213 cdp shinjigen misc unknown))
86
87 (defvar char-db-ignored-attributes '(ideographic-products))
88
89 (defun char-attribute-name< (ka kb)
90   (cond
91    ((eq '->denotational kb)
92     t)
93    ((eq '->subsumptive kb)
94     (not (eq '->denotational ka)))
95    ((eq '->denotational ka)
96     nil)
97    ((eq '->subsumptive ka)
98     nil)
99    ((find-charset ka)
100     (if (find-charset kb)
101         (if (<= (charset-id ka) 1)
102             (if (<= (charset-id kb) 1)
103                 (cond
104                  ((= (charset-dimension ka)
105                      (charset-dimension kb))
106                   (> (charset-id ka)(charset-id kb)))
107                  (t
108                   (> (charset-dimension ka)
109                      (charset-dimension kb))
110                   ))
111               t)
112           (if (<= (charset-id kb) 1)
113               nil
114             (< (charset-id ka)(charset-id kb))))
115       nil))
116    ((find-charset kb)
117     t)
118    ((symbolp ka)
119     (cond ((symbolp kb)
120            (string< (symbol-name ka)
121                     (symbol-name kb)))
122           (t)))
123    ((symbolp kb)
124     nil)))
125
126 (defvar char-db-coded-charset-priority-list
127   '(ascii
128     control-1
129     latin-iso8859-1
130     latin-iso8859-2
131     latin-iso8859-3
132     latin-iso8859-4
133     latin-iso8859-9
134     latin-jisx0201
135     cyrillic-iso8859-5
136     greek-iso8859-7
137     thai-tis620
138     =jis-x0208
139     =jis-x0208@1978
140     =jis-x0208@1983
141     japanese-jisx0212
142     chinese-gb2312
143     chinese-cns11643-1
144     chinese-cns11643-2
145     chinese-cns11643-3
146     chinese-cns11643-4
147     chinese-cns11643-5
148     chinese-cns11643-6
149     chinese-cns11643-7
150     =jis-x0208@1990
151     =jis-x0213-1-2000
152     =jis-x0213-2-2000
153     korean-ksc5601
154     chinese-isoir165
155     katakana-jisx0201
156     hebrew-iso8859-8
157     chinese-gb12345
158     latin-viscii
159     ethiopic-ucs
160     =big5-cdp
161     =gt
162     ideograph-daikanwa-2
163     ideograph-daikanwa
164     =cbeta
165     ideograph-hanziku-1
166     ideograph-hanziku-2
167     ideograph-hanziku-3
168     ideograph-hanziku-4
169     ideograph-hanziku-5
170     ideograph-hanziku-6
171     ideograph-hanziku-7
172     ideograph-hanziku-8
173     ideograph-hanziku-9
174     ideograph-hanziku-10
175     ideograph-hanziku-11
176     ideograph-hanziku-12
177     =gt-k
178     =ucs@iso
179     =ucs@unicode
180     =big5
181     =big5-eten
182     =jis-x0208@1997
183     =zinbun-oracle
184     =jef-china3))
185
186 (defun char-db-make-char-spec (char)
187   (let (ret char-spec)
188     (cond ((characterp char)
189            (cond ((and (setq ret (encode-char char '=ucs 'defined-only))
190                        (not (and (<= #xE000 ret)(<= ret #xF8FF))))
191                   (setq char-spec (list (cons '=ucs ret)))
192                   (cond ((setq ret (get-char-attribute char 'name))
193                          (setq char-spec (cons (cons 'name ret) char-spec))
194                          )
195                         ((setq ret (get-char-attribute char 'name*))
196                          (setq char-spec (cons (cons 'name* ret) char-spec))
197                          ))
198                   )
199                  ((setq ret
200                         (catch 'tag
201                           (let ((rest char-db-coded-charset-priority-list)
202                                 ccs)
203                             (while rest
204                               (setq ccs (charset-name
205                                          (find-charset (car rest))))
206                               (if (setq ret
207                                         (encode-char char ccs
208                                                      'defined-only))
209                                   (throw 'tag (cons ccs ret)))
210                               (setq rest (cdr rest))))))
211                   (setq char-spec (list ret))
212                   (dolist (ccs (delq (car ret) (charset-list)))
213                     (if (and (or (charset-iso-final-char ccs)
214                                  (memq ccs
215                                        '(=daikanwa
216                                          =daikanwa@rev2
217                                          ;; =gt-k
218                                          )))
219                              (setq ccs (charset-name ccs))
220                              (null (assq ccs char-spec))
221                              (setq ret (encode-char char ccs 'defined-only)))
222                         (setq char-spec (cons (cons ccs ret) char-spec))))
223                   (if (null char-spec)
224                       (setq char-spec (split-char char)))
225                   (cond ((setq ret (get-char-attribute char 'name))
226                          (setq char-spec (cons (cons 'name ret) char-spec))
227                          )
228                         ((setq ret (get-char-attribute char 'name*))
229                          (setq char-spec (cons (cons 'name* ret) char-spec))
230                          ))
231                   ))
232            char-spec)
233           ((consp char)
234            char))))
235     
236 (defun char-db-insert-char-spec (char &optional readable column
237                                       required-features)
238   (unless column
239     (setq column (current-column)))
240   (let (char-spec temp-char)
241     (setq char-spec (char-db-make-char-spec char))
242     (unless (or (characterp char) ; char
243                 (condition-case nil
244                     (setq char (find-char char-spec))
245                   (error nil)))
246       ;; define temporary character
247       ;;   Current implementation is dirty.
248       (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
249                                          char-spec)))
250       (remove-char-attribute temp-char 'ideograph-daikanwa)
251       (setq char temp-char))
252     (insert-char-attributes char
253                             readable
254                             (union (mapcar #'car char-spec)
255                                    required-features))
256     (when temp-char
257       ;; undefine temporary character
258       ;;   Current implementation is dirty.
259       (setq char-spec (char-attribute-alist temp-char))
260       (while char-spec
261         (remove-char-attribute temp-char (car (car char-spec)))
262         (setq char-spec (cdr char-spec))))))
263
264 (defun char-db-insert-alist (alist &optional readable column)
265   (unless column
266     (setq column (current-column)))
267   (let ((line-breaking
268          (concat "\n" (make-string (1+ column) ?\ )))
269         name value
270         ret al ; cal
271         key
272         lbs cell rest separator)
273     (insert "(")
274     (while alist
275       (setq name (car (car alist))
276             value (cdr (car alist)))
277       (cond ((eq name 'char)
278              (insert "(char . ")
279              (if (setq ret (condition-case nil
280                                (find-char value)
281                              (error nil)))
282                  (progn
283                    (setq al nil
284                          ;; cal nil
285                          )
286                    (while value
287                      (setq key (car (car value)))
288                      ;; (if (find-charset key)
289                      ;;     (setq cal (cons key cal))
290                      (setq al (cons key al))
291                      ;; )
292                      (setq value (cdr value)))
293                    (insert-char-attributes ret
294                                            readable
295                                            (or al 'none) ; cal
296                                            ))
297                (insert (prin1-to-string value)))
298              (insert ")")
299              (insert line-breaking))
300             ((consp value)
301              (insert (format "(%-18s " name))
302              (setq lbs (concat "\n" (make-string (current-column) ?\ )))
303              (while (consp value)
304                (setq cell (car value))
305                (if (and (consp cell)
306                         (consp (car cell))
307                         (setq ret (condition-case nil
308                                       (find-char cell)
309                                     (error nil)))
310                         )
311                    (progn
312                      (setq rest cell
313                            al nil
314                            ;; cal nil
315                            )
316                      (while rest
317                        (setq key (car (car rest)))
318                        ;; (if (find-charset key)
319                        ;;     (setq cal (cons key cal))
320                        (setq al (cons key al))
321                        ;; )
322                        (setq rest (cdr rest)))
323                      (if separator
324                          (insert lbs))
325                      (insert-char-attributes ret
326                                              readable
327                                              al ; cal
328                                              )
329                      (setq separator lbs))
330                  (if separator
331                      (insert separator))
332                  (insert (prin1-to-string cell))
333                  (setq separator " "))
334                (setq value (cdr value)))
335              (insert ")")
336              (insert line-breaking))
337             (t
338              (insert (format "(%-18s . %S)%s"
339                              name value
340                              line-breaking))))
341       (setq alist (cdr alist))))
342   (insert ")"))
343
344 (defun char-db-insert-char-reference (plist &optional readable column)
345   (unless column
346     (setq column (current-column)))
347   (let ((line-breaking
348          (concat "\n" (make-string (1+ column) ?\ )))
349         (separator "")
350         name value)
351     (insert "(")
352     (while plist
353       (setq name (pop plist))
354       (setq value (pop plist))
355       (cond ((eq name :char)
356              (insert separator)
357              (insert ":char\t")
358              (cond ((numberp value)
359                     (setq value (decode-char '=ucs value)))
360                    ;; ((consp value)
361                    ;;  (setq value (or (find-char value)
362                    ;;                  value)))
363                    )
364              (char-db-insert-char-spec value readable)
365              (insert line-breaking)
366              (setq separator ""))
367             ((eq name :radical)
368              (insert (format "%s%s\t%d ; %c%s"
369                              separator
370                              name value
371                              (ideographic-radical value)
372                              line-breaking))
373              (setq separator ""))
374             (t
375              (insert (format "%s%s\t%S" separator name value))
376              (setq separator line-breaking)))
377       ))
378   (insert ")"))
379
380 (defun char-db-decode-isolated-char (ccs code-point)
381   (let (ret)
382     (setq ret
383           (cond ((eq ccs 'arabic-iso8859-6)
384                  (decode-char ccs code-point))
385                 ((and (memq ccs '(=gt-pj-1
386                                   =gt-pj-2
387                                   =gt-pj-3
388                                   =gt-pj-4
389                                   =gt-pj-5
390                                   =gt-pj-6
391                                   =gt-pj-7
392                                   =gt-pj-8
393                                   =gt-pj-9
394                                   =gt-pj-10
395                                   =gt-pj-11))
396                       (setq ret (decode-char ccs code-point))
397                       (setq ret (encode-char ret '=gt 'defined-only)))
398                  (decode-builtin-char '=gt ret))
399                 (t
400                  (decode-builtin-char ccs code-point))))
401     (cond ((and (<= 0 (char-int ret))
402                 (<= (char-int ret) #x1F))
403            (decode-char '=ucs (+ #x2400 (char-int ret))))
404           ((= (char-int ret) #x7F)
405            ?\u2421)
406           (t ret))))
407
408 (defvar char-db-convert-obsolete-format t)
409
410 (defun char-db-insert-ccs-feature (name value line-breaking)
411   (insert
412    (format
413     (cond ((memq name '(=daikanwa
414                         =daikanwa@rev1 =daikanwa@rev2
415                         =gt =gt-k =cbeta =zinbun-oracle))
416            "(%-18s . %05d)\t; %c")
417           ((eq name 'mojikyo)
418            "(%-18s . %06d)\t; %c")
419           ((>= (charset-dimension name) 2)
420            "(%-18s . #x%04X)\t; %c")
421           (t
422            "(%-18s . #x%02X)\t; %c"))
423     name
424     (if (= (charset-iso-graphic-plane name) 1)
425         (logior value
426                 (cond ((= (charset-dimension name) 1)
427                        #x80)
428                       ((= (charset-dimension name) 2)
429                        #x8080)
430                       ((= (charset-dimension name) 3)
431                        #x808080)
432                       (t 0)))
433       value)
434     (char-db-decode-isolated-char name value)))
435   (if (and (= (charset-chars name) 94)
436            (= (charset-dimension name) 2))
437       (insert (format " [%02d-%02d]"
438                       (- (lsh value -8) 32)
439                       (- (logand value 255) 32))))
440   (insert line-breaking))
441
442 (defun insert-char-attributes (char &optional readable attributes column)
443   (unless column
444     (setq column (current-column)))
445   (let (name value ; has-long-ccs-name
446         rest
447         radical strokes
448         (line-breaking
449          (concat "\n" (make-string (1+ column) ?\ )))
450         lbs cell separator ret
451         key al cal
452         dest-ccss
453         sources required-features
454         ccss)
455     (let (atr-d)
456       (setq attributes
457             (sort (if attributes
458                       (if (consp attributes)
459                           (progn
460                             (dolist (name attributes)
461                               (unless (memq name char-db-ignored-attributes)
462                                 (if (find-charset name)
463                                     (push name ccss))
464                                 (push name atr-d)))
465                             atr-d))
466                     (dolist (name (char-attribute-list))
467                       (unless (memq name char-db-ignored-attributes)
468                         (if (find-charset name)
469                             (push name ccss))
470                         (push name atr-d)))
471                     atr-d)
472                   #'char-attribute-name<)))
473     (insert "(")
474     (when (and (memq 'name attributes)
475                (setq value (get-char-attribute char 'name)))
476       (insert (format
477                (if (> (+ (current-column) (length value)) 48)
478                    "(name . %S)%s"
479                  "(name               . %S)%s")
480                value line-breaking))
481       (setq attributes (delq 'name attributes))
482       )
483     (when (and (memq 'name* attributes)
484                (setq value (get-char-attribute char 'name*)))
485       (insert (format
486                (if (> (+ (current-column) (length value)) 48)
487                    "(name* . %S)%s"
488                  "(name*              . %S)%s")
489                value line-breaking))
490       (setq attributes (delq 'name* attributes))
491       )
492     (when (and (memq 'script attributes)
493                (setq value (get-char-attribute char 'script)))
494       (insert (format "(script\t\t%s)%s"
495                       (mapconcat (function prin1-to-string)
496                                  value " ")
497                       line-breaking))
498       (setq attributes (delq 'script attributes))
499       )
500     ;; (when (and (memq '<-denotational attributes)
501     ;;            (setq value (get-char-attribute char '<-denotational))
502     ;;            (null (cdr value))
503     ;;            (setq value (encode-char (car value) 'ucs 'defined-only)))
504     ;;   (insert (format "(%-18s . #x%04X)\t; %c%s"
505     ;;                   '=>ucs value (decode-char 'ucs value)
506     ;;                   line-breaking))
507     ;;   (setq attributes (delq '<-denotational attributes)))
508     (dolist (name '(=>ucs =>ucs*))
509       (when (and (memq name attributes)
510                  (setq value (get-char-attribute char name)))
511         (insert (format "(%-18s . #x%04X)\t; %c%s"
512                         name value (decode-char '=ucs value)
513                         line-breaking))
514         (setq attributes (delq name attributes))))
515     (dolist (name '(=>ucs@gb =>ucs@cns =>ucs@jis =>ucs@ks =>ucs@big5))
516       (when (and (memq name attributes)
517                  (setq value (get-char-attribute char name)))
518         (insert (format "(%-18s . #x%04X)\t; %c%s"
519                         name value
520                         (decode-char (intern
521                                       (concat "="
522                                               (substring
523                                                (symbol-name name) 2)))
524                                      value)
525                         line-breaking))
526         (setq attributes (delq name attributes))
527         ))
528     (dolist (name '(=>daikanwa))
529       (when (and (memq name attributes)
530                  (setq value (get-char-attribute char name)))
531         (insert
532          (if (integerp value)
533              (format "(%-18s . %05d)\t; %c%s"
534                      name value (decode-char '=daikanwa value)
535                      line-breaking)
536            (format "(%-18s %s)\t; %c%s"
537                    name
538                    (mapconcat (function prin1-to-string)
539                               value " ")
540                    (char-representative-of-daikanwa char)
541                    line-breaking)))
542         (setq attributes (delq name attributes))))
543     (when (and (memq 'general-category attributes)
544                (setq value (get-char-attribute char 'general-category)))
545       (insert (format
546                "(general-category\t%s) ; %s%s"
547                (mapconcat (lambda (cell)
548                             (format "%S" cell))
549                           value " ")
550                (cond ((rassoc value unidata-normative-category-alist)
551                       "Normative Category")
552                      ((rassoc value unidata-informative-category-alist)
553                       "Informative Category")
554                      (t
555                       "Unknown Category"))
556                line-breaking))
557       (setq attributes (delq 'general-category attributes))
558       )
559     (when (and (memq 'bidi-category attributes)
560                (setq value (get-char-attribute char 'bidi-category)))
561       (insert (format "(bidi-category\t. %S)%s"
562                       value
563                       line-breaking))
564       (setq attributes (delq 'bidi-category attributes))
565       )
566     (unless (or (not (memq 'mirrored attributes))
567                 (eq (setq value (get-char-attribute char 'mirrored 'empty))
568                     'empty))
569       (insert (format "(mirrored\t\t. %S)%s"
570                       value
571                       line-breaking))
572       (setq attributes (delq 'mirrored attributes))
573       )
574     (cond
575      ((and (memq 'decimal-digit-value attributes)
576            (setq value (get-char-attribute char 'decimal-digit-value)))
577       (insert (format "(decimal-digit-value . %S)%s"
578                       value
579                       line-breaking))
580       (setq attributes (delq 'decimal-digit-value attributes))
581       (when (and (memq 'digit-value attributes)
582                  (setq value (get-char-attribute char 'digit-value)))
583         (insert (format "(digit-value\t . %S)%s"
584                         value
585                         line-breaking))
586         (setq attributes (delq 'digit-value attributes))
587         )
588       (when (and (memq 'numeric-value attributes)
589                  (setq value (get-char-attribute char 'numeric-value)))
590         (insert (format "(numeric-value\t . %S)%s"
591                         value
592                         line-breaking))
593         (setq attributes (delq 'numeric-value attributes))
594         )
595       )
596      (t
597       (when (and (memq 'digit-value attributes)
598                  (setq value (get-char-attribute char 'digit-value)))
599         (insert (format "(digit-value\t. %S)%s"
600                         value
601                         line-breaking))
602         (setq attributes (delq 'digit-value attributes))
603         )
604       (when (and (memq 'numeric-value attributes)
605                  (setq value (get-char-attribute char 'numeric-value)))
606         (insert (format "(numeric-value\t. %S)%s"
607                         value
608                         line-breaking))
609         (setq attributes (delq 'numeric-value attributes))
610         )))
611     (when (and (memq 'iso-10646-comment attributes)
612                (setq value (get-char-attribute char 'iso-10646-comment)))
613       (insert (format "(iso-10646-comment\t. %S)%s"
614                       value
615                       line-breaking))
616       (setq attributes (delq 'iso-10646-comment attributes))
617       )
618     (when (and (memq 'morohashi-daikanwa attributes)
619                (setq value (get-char-attribute char 'morohashi-daikanwa)))
620       (insert (format "(morohashi-daikanwa\t%s)%s"
621                       (mapconcat (function prin1-to-string) value " ")
622                       line-breaking))
623       (setq attributes (delq 'morohashi-daikanwa attributes))
624       )
625     (setq radical nil
626           strokes nil)
627     (when (and (memq 'ideographic-radical attributes)
628                (setq value (get-char-attribute char 'ideographic-radical)))
629       (setq radical value)
630       (insert (format "(ideographic-radical . %S)\t; %c%s"
631                       radical
632                       (ideographic-radical radical)
633                       line-breaking))
634       (setq attributes (delq 'ideographic-radical attributes))
635       )
636     (when (and (memq 'shuowen-radical attributes)
637                (setq value (get-char-attribute char 'shuowen-radical)))
638       (insert (format "(shuowen-radical\t. %S)\t; %c%s"
639                       value
640                       (shuowen-radical value)
641                       line-breaking))
642       (setq attributes (delq 'shuowen-radical attributes))
643       )
644     (let (key)
645       (dolist (domain
646                (append
647                 char-db-feature-domains
648                 (let (dest domain)
649                   (dolist (feature (char-attribute-list))
650                     (setq feature (symbol-name feature))
651                     (when (string-match
652                            "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
653                            feature)
654                       (setq domain (intern (match-string 2 feature)))
655                      (unless (memq domain dest)
656                        (setq dest (cons domain dest)))))
657                   (sort dest #'string<))))
658         (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
659         (when (and (memq key attributes)
660                    (setq value (get-char-attribute char key)))
661           (setq radical value)
662           (insert (format "(%s . %S)\t; %c%s"
663                           key
664                           radical
665                           (ideographic-radical radical)
666                           line-breaking))
667           (setq attributes (delq key attributes))
668           )
669         (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
670         (when (and (memq key attributes)
671                    (setq value (get-char-attribute char key)))
672           (setq strokes value)
673           (insert (format "(%s . %S)%s"
674                           key
675                           strokes
676                           line-breaking))
677           (setq attributes (delq key attributes))
678           )
679         (setq key (intern (format "%s@%s" 'total-strokes domain)))
680         (when (and (memq key attributes)
681                    (setq value (get-char-attribute char key)))
682           (insert (format "(%s       . %S)%s"
683                           key
684                           value
685                           line-breaking))
686           (setq attributes (delq key attributes))
687           )
688         (dolist (feature '(ideographic-radical
689                            ideographic-strokes
690                            total-strokes))
691           (setq key (intern (format "%s@%s*sources" feature domain)))
692           (when (and (memq key attributes)
693                      (setq value (get-char-attribute char key)))
694             (insert (format "(%s%s" key line-breaking))
695             (dolist (cell value)
696               (insert (format " %s" cell)))
697             (insert ")")
698             (insert line-breaking)
699             (setq attributes (delq key attributes))
700             ))
701         ))
702     (when (and (memq 'ideographic-strokes attributes)
703                (setq value (get-char-attribute char 'ideographic-strokes)))
704       (setq strokes value)
705       (insert (format "(ideographic-strokes . %S)%s"
706                       strokes
707                       line-breaking))
708       (setq attributes (delq 'ideographic-strokes attributes))
709       )
710     (when (and (memq 'kangxi-radical attributes)
711                (setq value (get-char-attribute char 'kangxi-radical)))
712       (unless (eq value radical)
713         (insert (format "(kangxi-radical\t . %S)\t; %c%s"
714                         value
715                         (ideographic-radical value)
716                         line-breaking))
717         (or radical
718             (setq radical value)))
719       (setq attributes (delq 'kangxi-radical attributes))
720       )
721     (when (and (memq 'kangxi-strokes attributes)
722                (setq value (get-char-attribute char 'kangxi-strokes)))
723       (unless (eq value strokes)
724         (insert (format "(kangxi-strokes\t . %S)%s"
725                         value
726                         line-breaking))
727         (or strokes
728             (setq strokes value)))
729       (setq attributes (delq 'kangxi-strokes attributes))
730       )
731     (when (and (memq 'japanese-radical attributes)
732                (setq value (get-char-attribute char 'japanese-radical)))
733       (unless (eq value radical)
734         (insert (format "(japanese-radical\t . %S)\t; %c%s"
735                         value
736                         (ideographic-radical value)
737                         line-breaking))
738         (or radical
739             (setq radical value)))
740       (setq attributes (delq 'japanese-radical attributes))
741       )
742     (when (and (memq 'japanese-strokes attributes)
743                (setq value (get-char-attribute char 'japanese-strokes)))
744       (unless (eq value strokes)
745         (insert (format "(japanese-strokes\t . %S)%s"
746                         value
747                         line-breaking))
748         (or strokes
749             (setq strokes value)))
750       (setq attributes (delq 'japanese-strokes attributes))
751       )
752     (when (and (memq 'cns-radical attributes)
753                (setq value (get-char-attribute char 'cns-radical)))
754       (insert (format "(cns-radical\t . %S)\t; %c%s"
755                       value
756                       (ideographic-radical value)
757                       line-breaking))
758       (setq attributes (delq 'cns-radical attributes))
759       )
760     (when (and (memq 'cns-strokes attributes)
761                (setq value (get-char-attribute char 'cns-strokes)))
762       (unless (eq value strokes)
763         (insert (format "(cns-strokes\t . %S)%s"
764                         value
765                         line-breaking))
766         (or strokes
767             (setq strokes value)))
768       (setq attributes (delq 'cns-strokes attributes))
769       )
770     (when (and (memq 'shinjigen-1-radical attributes)
771                (setq value (get-char-attribute char 'shinjigen-1-radical)))
772       (unless (eq value radical)
773         (insert (format "(shinjigen-1-radical . %S)\t; %c%s"
774                         value
775                         (ideographic-radical value)
776                         line-breaking))
777         (or radical
778             (setq radical value)))
779       (setq attributes (delq 'shinjigen-1-radical attributes))
780       )
781     (when (and (memq 'ideographic- attributes)
782                (setq value (get-char-attribute char 'ideographic-)))
783       (insert "(ideographic-       ")
784       (setq lbs (concat "\n" (make-string (current-column) ?\ ))
785             separator nil)
786       (while (consp value)
787         (setq cell (car value))
788         (if (integerp cell)
789             (setq cell (decode-char '=ucs cell)))
790         (cond ((characterp cell)
791                (if separator
792                    (insert lbs))
793                (if readable
794                    (insert (format "%S" cell))
795                  (char-db-insert-char-spec cell readable))
796                (setq separator lbs))
797               ((consp cell)
798                (if separator
799                    (insert lbs))
800                (if (consp (car cell))
801                    (char-db-insert-char-spec cell readable)
802                  (char-db-insert-char-reference cell readable))
803                (setq separator lbs))
804               (t
805                (if separator
806                    (insert separator))
807                (insert (prin1-to-string cell))
808                (setq separator " ")))
809         (setq value (cdr value)))
810       (insert ")")
811       (insert line-breaking)
812       (setq attributes (delq 'ideographic- attributes)))
813     (when (and (memq 'total-strokes attributes)
814                (setq value (get-char-attribute char 'total-strokes)))
815       (insert (format "(total-strokes       . %S)%s"
816                       value
817                       line-breaking))
818       (setq attributes (delq 'total-strokes attributes))
819       )
820     (when (and (memq '->ideograph attributes)
821                (setq value (get-char-attribute char '->ideograph)))
822       (insert (format "(->ideograph\t%s)%s"
823                       (mapconcat (lambda (code)
824                                    (cond ((symbolp code)
825                                           (symbol-name code))
826                                          ((integerp code)
827                                           (format "#x%04X" code))
828                                          (t
829                                           (format "%s %S"
830                                                   line-breaking code))))
831                                  value " ")
832                       line-breaking))
833       (setq attributes (delq '->ideograph attributes))
834       )
835     (when (and (memq '->decomposition attributes)
836                (setq value (get-char-attribute char '->decomposition)))
837       (insert (format "(->decomposition\t%s)%s"
838                       (mapconcat (lambda (code)
839                                    (cond ((symbolp code)
840                                           (symbol-name code))
841                                          ((characterp code)
842                                           (if readable
843                                               (format "%S" code)
844                                             (format "#x%04X"
845                                                     (char-int code))
846                                             ))
847                                          ((integerp code)
848                                           (format "#x%04X" code))
849                                          (t
850                                           (format "%s%S" line-breaking code))))
851                                  value " ")
852                       line-breaking))
853       (setq attributes (delq '->decomposition attributes))
854       )
855     (if (equal (get-char-attribute char '->titlecase)
856                (get-char-attribute char '->uppercase))
857         (setq attributes (delq '->titlecase attributes)))
858     (when (and (memq '->mojikyo attributes)
859                (setq value (get-char-attribute char '->mojikyo)))
860       (insert (format "(->mojikyo\t\t. %06d)\t; %c%s"
861                       value (decode-char 'mojikyo value)
862                       line-breaking))
863       (setq attributes (delq '->mojikyo attributes))
864       )
865     (when (and (memq 'hanyu-dazidian-vol attributes)
866                (setq value (get-char-attribute char 'hanyu-dazidian-vol)))
867       (insert (format "(hanyu-dazidian-vol  . %d)%s"
868                       value line-breaking))
869       (setq attributes (delq 'hanyu-dazidian-vol attributes))
870       )
871     (when (and (memq 'hanyu-dazidian-page attributes)
872                (setq value (get-char-attribute char 'hanyu-dazidian-page)))
873       (insert (format "(hanyu-dazidian-page . %d)%s"
874                       value line-breaking))
875       (setq attributes (delq 'hanyu-dazidian-page attributes))
876       )
877     (when (and (memq 'hanyu-dazidian-char attributes)
878                (setq value (get-char-attribute char 'hanyu-dazidian-char)))
879       (insert (format "(hanyu-dazidian-char . %d)%s"
880                       value line-breaking))
881       (setq attributes (delq 'hanyu-dazidian-char attributes))
882       )
883     (unless readable
884       (dolist (ignored '(composition
885                          ->denotational <-subsumptive ->ucs-unified
886                          ->ideographic-component-forms))
887         (setq attributes (delq ignored attributes))))
888     (while attributes
889       (setq name (car attributes))
890       (if (setq value (get-char-attribute char name))
891           (cond ((setq ret (find-charset name))
892                  (setq name (charset-name ret))
893                  (if (and (not (memq name dest-ccss))
894                           (prog1
895                               (setq value (get-char-attribute char name))
896                             (setq dest-ccss (cons name dest-ccss))))
897                      (char-db-insert-ccs-feature name value line-breaking))
898                  )
899                 ((string-match "^=>ucs@" (symbol-name name))
900                  (insert (format "(%-18s . #x%04X)\t; %c%s"
901                                  name value (decode-char '=ucs value)
902                                  line-breaking))
903                  )
904                 ((eq name 'jisx0208-1978/4X)
905                  (insert (format "(%-18s . #x%04X)%s"
906                                  name value
907                                  line-breaking))
908                  )
909                 ((and (not readable)
910                       (null (get-char-attribute
911                              char
912                              (intern (format "%s*sources" name))))
913                       (not (string-match "\\*sources$" (symbol-name name)))
914                       (or (eq name '<-identical)
915                           (string-match "^->halfwidth" (symbol-name name))
916                           (and
917                            (string-match "^->fullwidth" (symbol-name name))
918                            (not
919                             (and (consp value)
920                                  (characterp (car value))
921                                  (encode-char
922                                   (car value) '=ucs 'defined-only))))
923                           (string-match "^->simplified" (symbol-name name))
924                           (string-match "^->vulgar" (symbol-name name))
925                           (string-match "^->wrong" (symbol-name name))
926                           (string-match "^->same" (symbol-name name))
927                           (string-match "^->formed" (symbol-name name))
928                           (string-match "^->original" (symbol-name name))
929                           (string-match "^->ancient" (symbol-name name))
930                           (string-match "^->Oracle-Bones" (symbol-name name))
931                           ))
932                  )
933                 ((or (eq name 'ideographic-structure)
934                      (eq name 'ideographic-combination)
935                      (eq name 'ideographic-)
936                      (string-match "^\\(->\\|<-\\)" (symbol-name name)))
937                  (insert (format "(%-18s%s " name line-breaking))
938                  (setq lbs (concat "\n" (make-string (current-column) ?\ ))
939                        separator nil)
940                  (while (consp value)
941                    (setq cell (car value))
942                    (if (integerp cell)
943                        (setq cell (decode-char '=ucs cell)))
944                    (cond ((eq name '->subsumptive)
945                           (when (or (not
946                                      (some (lambda (atr)
947                                              (get-char-attribute cell atr))
948                                            char-db-ignored-attributes))
949                                     (some (lambda (ccs)
950                                             (encode-char cell ccs
951                                                          'defined-only))
952                                           ccss))
953                             (if separator
954                                 (insert lbs))
955                             (let ((char-db-ignored-attributes
956                                    (cons '<-subsumptive
957                                          char-db-ignored-attributes)))
958                               (insert-char-attributes cell readable))
959                             (setq separator lbs))
960                           )
961                          ((characterp cell)
962                           (setq sources
963                                 (get-char-attribute
964                                  char
965                                  (intern (format "%s*sources" name))))
966                           (setq required-features nil)
967                           (dolist (source sources)
968                             (cond
969                              ((memq source '(JP JP/Jouyou
970                                                 shinjigen-1))
971                               (setq required-features
972                                     (union required-features
973                                            '(=jis-x0208
974                                              =jis-x0208@1990
975                                              =jis-x0213-1-2000
976                                              =jis-x0213-2-2000
977                                              =jis-x0212
978                                              =jis-x0208@1983
979                                              =jis-x0208@1978))))
980                              ((eq source 'CN)
981                               (setq required-features
982                                     (union required-features
983                                            '(=gb2312
984                                              =gb12345
985                                              =iso-ir165)))))
986                             (cond
987                              ((find-charset
988                                (setq ret (intern (format "=%s" source))))
989                               (setq required-features
990                                     (cons ret required-features)))
991                              (t (setq required-features
992                                       (cons source required-features)))))
993                           (cond ((string-match "@JP" (symbol-name name))
994                                  (setq required-features
995                                        (union required-features
996                                               '(=jis-x0208
997                                                 =jis-x0208@1990
998                                                 =jis-x0213-1-2000
999                                                 =jis-x0213-2-2000
1000                                                 =jis-x0212
1001                                                 =jis-x0208@1983
1002                                                 =jis-x0208@1978))))
1003                                 ((string-match "@CN" (symbol-name name))
1004                                  (setq required-features
1005                                        (union required-features
1006                                               '(=gb2312
1007                                                 =gb12345
1008                                                 =iso-ir165)))))
1009                           (if separator
1010                               (insert lbs))
1011                           (if readable
1012                               (insert (format "%S" cell))
1013                             (char-db-insert-char-spec cell readable
1014                                                       nil
1015                                                       required-features))
1016                           (setq separator lbs))
1017                          ((consp cell)
1018                           (if separator
1019                               (insert lbs))
1020                           (if (consp (car cell))
1021                               (char-db-insert-char-spec cell readable)
1022                             (char-db-insert-char-reference cell readable))
1023                           (setq separator lbs))
1024                          (t
1025                           (if separator
1026                               (insert separator))
1027                           (insert (prin1-to-string cell))
1028                           (setq separator " ")))
1029                    (setq value (cdr value)))
1030                  (insert ")")
1031                  (insert line-breaking))
1032                 ((memq name '(ideograph=
1033                               original-ideograph-of
1034                               ancient-ideograph-of
1035                               vulgar-ideograph-of
1036                               wrong-ideograph-of
1037                               ;; simplified-ideograph-of
1038                               ideographic-variants
1039                               ;; ideographic-different-form-of
1040                               ))
1041                  (insert (format "(%-18s%s " name line-breaking))
1042                  (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1043                        separator nil)
1044                  (while (consp value)
1045                    (setq cell (car value))
1046                    (if (and (consp cell)
1047                             (consp (car cell)))
1048                        (progn
1049                          (if separator
1050                              (insert lbs))
1051                          (char-db-insert-alist cell readable)
1052                          (setq separator lbs))
1053                      (if separator
1054                          (insert separator))
1055                      (insert (prin1-to-string cell))
1056                      (setq separator " "))
1057                    (setq value (cdr value)))
1058                  (insert ")")
1059                  (insert line-breaking))
1060                 ((consp value)
1061                  (insert (format "(%-18s " name))
1062                  (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1063                        separator nil)
1064                  (while (consp value)
1065                    (setq cell (car value))
1066                    (if (and (consp cell)
1067                             (consp (car cell))
1068                             (setq ret (condition-case nil
1069                                           (find-char cell)
1070                                         (error nil))))
1071                        (progn
1072                          (setq rest cell
1073                                al nil
1074                                cal nil)
1075                          (while rest
1076                            (setq key (car (car rest)))
1077                            (if (find-charset key)
1078                                (setq cal (cons key cal))
1079                              (setq al (cons key al)))
1080                            (setq rest (cdr rest)))
1081                          (if separator
1082                              (insert lbs))
1083                          (insert-char-attributes ret
1084                                                  readable
1085                                                  al cal)
1086                          (setq separator lbs))
1087                      (setq ret (prin1-to-string cell))
1088                      (if separator
1089                          (if (< (+ (current-column)
1090                                    (length ret)
1091                                    (length separator))
1092                                 76)
1093                              (insert separator)
1094                            (insert lbs)))
1095                      (insert ret)
1096                      (setq separator " "))
1097                    (setq value (cdr value)))
1098                  (insert ")")
1099                  (insert line-breaking))
1100                 (t
1101                  (insert (format "(%-18s . %S)%s"
1102                                  name value
1103                                  line-breaking)))
1104                 ))
1105       (setq attributes (cdr attributes)))
1106     (insert ")")))
1107
1108 (defun insert-char-data (char &optional readable
1109                               attributes)
1110   (save-restriction
1111     (narrow-to-region (point)(point))
1112     (insert "(define-char
1113   '")
1114     (insert-char-attributes char readable attributes)
1115     (insert ")\n")
1116     (goto-char (point-min))
1117     (while (re-search-forward "[ \t]+$" nil t)
1118       (replace-match ""))
1119     ;; from tabify.
1120     (goto-char (point-min))
1121     (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
1122       (let ((column (current-column))
1123             (indent-tabs-mode t))
1124         (delete-region (match-beginning 0) (point))
1125         (indent-to column)))
1126     (goto-char (point-max))
1127     ;; (tabify (point-min)(point-max))
1128     ))
1129
1130 (defun insert-char-data-with-variant (char &optional printable
1131                                            no-ucs-unified
1132                                            script excluded-script)
1133   (insert-char-data char printable)
1134   (let ((variants (char-variants char))
1135         rest
1136         variant vs ret)
1137     (setq variants (sort variants #'<))
1138     (setq rest variants)
1139     (setq variants (cons char variants))
1140     (while rest
1141       (setq variant (car rest))
1142       (unless (get-char-attribute variant '<-subsumptive)
1143         (if (and (or (null script)
1144                      (null (setq vs (get-char-attribute variant 'script)))
1145                      (memq script vs))
1146                  (or (null excluded-script)
1147                      (null (setq vs (get-char-attribute variant 'script)))
1148                      (not (memq excluded-script vs))))
1149             (unless (and no-ucs-unified (get-char-attribute variant '=ucs))
1150               (insert-char-data variant printable)
1151               (if (setq ret (char-variants variant))
1152                   (while ret
1153                     (or (memq (car ret) variants)
1154                         ;; (get-char-attribute (car ret) '<-subsumptive)
1155                         (setq rest (nconc rest (list (car ret)))))
1156                     (setq ret (cdr ret)))))))
1157       (setq rest (cdr rest)))))
1158
1159 (defun insert-char-range-data (min max &optional script excluded-script)
1160   (let ((code min)
1161         char)
1162     (while (<= code max)
1163       (setq char (decode-char '=ucs code))
1164       (if (encode-char char '=ucs 'defined-only)
1165           (insert-char-data-with-variant char nil 'no-ucs-unified
1166                                          script excluded-script))
1167       (setq code (1+ code)))))
1168
1169 (defun write-char-range-data-to-file (min max file
1170                                           &optional script excluded-script)
1171   (let ((coding-system-for-write char-db-file-coding-system))
1172     (with-temp-buffer
1173       (insert (format ";; -*- coding: %s -*-\n"
1174                       char-db-file-coding-system))
1175       (insert-char-range-data min max script excluded-script)
1176       (write-region (point-min)(point-max) file))))
1177
1178 (defvar what-character-original-window-configuration)
1179
1180 ;;;###autoload
1181 (defun what-char-definition (char)
1182   (interactive (list (char-after)))
1183   (let ((buf (get-buffer-create "*Character Description*"))
1184         (the-buf (current-buffer))
1185         (win-conf (current-window-configuration)))
1186     (pop-to-buffer buf)
1187     (make-local-variable 'what-character-original-window-configuration)
1188     (setq what-character-original-window-configuration win-conf)
1189     (setq buffer-read-only nil)
1190     (erase-buffer)
1191     (condition-case err
1192         (progn
1193           (insert-char-data-with-variant char 'printable)
1194           (unless (char-attribute-alist char)
1195             (insert (format ";; = %c\n"
1196                             (let* ((rest (split-char char))
1197                                    (ccs (pop rest))
1198                                    (code (pop rest)))
1199                               (while rest
1200                                 (setq code (logior (lsh code 8)
1201                                                    (pop rest))))
1202                               (decode-char ccs code)))))
1203           ;; (char-db-update-comment)
1204           (set-buffer-modified-p nil)
1205           (view-mode the-buf (lambda (buf)
1206                                (set-window-configuration
1207                                 what-character-original-window-configuration)
1208                                ))
1209           (goto-char (point-min)))
1210       (error (progn
1211                (set-window-configuration
1212                 what-character-original-window-configuration)
1213                (signal (car err) (cdr err)))))))
1214
1215 (provide 'char-db-util)
1216
1217 ;;; char-db-util.el ends here