Reformatted.
[chise/xemacs-chise.git] / lisp / utf-2000 / char-db-util.el
1 ;;; char-db-util.el --- Character Database utility -*- coding: utf-8-er; -*-
2
3 ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
4 ;;   2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 MORIOKA Tomohiko.
5
6 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
7 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
8
9 ;; This file is part of XEmacs CHISE.
10
11 ;; XEmacs CHISE is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
15
16 ;; XEmacs CHISE is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
23 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Code:
27
28 (require 'chise-subr)
29 (require 'ideograph-subr)
30
31 (defconst unidata-normative-category-alist
32   '(("Lu" letter        uppercase)
33     ("Ll" letter        lowercase)
34     ("Lt" letter        titlecase)
35     ("Mn" mark          non-spacing)
36     ("Mc" mark          spacing-combining)
37     ("Me" mark          enclosing)
38     ("Nd" number        decimal-digit)
39     ("Nl" number        letter)
40     ("No" number        other)
41     ("Zs" separator     space)
42     ("Zl" separator     line)
43     ("Zp" separator     paragraph)
44     ("Cc" other         control)
45     ("Cf" other         format)
46     ("Cs" other         surrogate)
47     ("Co" other         private-use)
48     ("Cn" other         not-assigned)))
49
50 (defconst unidata-informative-category-alist
51   '(("Lm" letter        modifier)
52     ("Lo" letter        other)
53     ("Pc" punctuation   connector)
54     ("Pd" punctuation   dash)
55     ("Ps" punctuation   open)
56     ("Pe" punctuation   close)
57     ("Pi" punctuation   initial-quote)
58     ("Pf" punctuation   final-quote)
59     ("Po" punctuation   other)
60     ("Sm" symbol        math)
61     ("Sc" symbol        currency)
62     ("Sk" symbol        modifier)
63     ("So" symbol        other)
64     ))
65
66 (defconst shuowen-radicals
67   [?一 ?上 ?示 ?三 ?王 ?玉 ?玨 ?气 ?士 ?丨 ; 010
68    ?屮 ?艸 ?蓐 ?茻 ?小 ?八 ?釆 ?半 ?牛 ?犛 ; 020
69    ?告 ?口 ?凵 ?吅 ?哭 ?走 ?止 ?癶 ?步 ?此 ; 030
70    ?正 ?是 ?辵 ?彳 ?廴 ?㢟 ?行 ?齒 ?牙 ?足 ; 040
71    ?疋 ?品 ?龠 ?冊 ?㗊 ?舌 ?干 ?谷 ?只 ?㕯 ; 050
72    ?句 ?丩 ?古 ?十 ?卅 ?言 ?誩 ?音 ?䇂 ?丵 ; 060
73    ?菐 ?𠬞 ?𠬜 ?共 ?異 ?舁 ?𦥑 ?䢅 ?爨 ?革 ; 070
74    ?鬲 ?䰜 ?爪 ?𠃨 ?鬥 ?又 ?𠂇 ?㕜 ?支 ?𦘒 ; 080
75    ?聿 ?畫 ?隶 ?臤 ?臣 ?殳 ?殺 ?𠘧 ?寸 ?皮 ; 090
76    ?㼱 ?攴 ?敎 ?卜 ?用 ?爻 ?㸚 ?𥄎 ?目 ?䀠 ; 100
77    ?眉 ?盾 ?自 ?白 ?鼻 ?皕 ?習 ?羽 ?隹 ?奞 ; 110
78    ?萑 ?𦫳 ?苜 ?羊 ?羴 ?瞿 ?雔 ?雥 ?鳥 ?烏 ; 120
79    ?𠦒 ?冓 ?幺 ?𢆶 ?叀 ?玄 ?予 ?放 ?𠬪 ?𣦼 ; 130
80    ?歺 ?死 ?冎 ?骨 ?肉 ?筋 ?刀 ?刃 ?㓞 ?丰 ; 140
81    ?耒 ?𧢲 ?竹 ?箕 ?丌 ?左 ?工 ?㠭 ?巫 ?甘 ; 150
82    ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?旨 ?喜 ?壴 ; 160
83 ;  ?旨 ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?喜 ?壴 ; 160
84    ?鼓 ?豈 ?豆 ?豊 ?豐 ?䖒 ?虍 ?虎 ?虤 ?皿 ; 170
85    ?𠙴 ?去 ?血 ?丶 ?丹 ?青 ?井 ?皀 ?鬯 ?食 ; 180
86    ?亼 ?會 ?倉 ?入 ?缶 ?矢 ?高 ?冂 ?𩫏 ?京 ; 190
87    ?亯 ?𣆪 ?畗 ?㐭 ?嗇 ?來 ?麥 ?夊 ?舛 ?䑞 ; 200
88    ?韋 ?弟 ?夂 ?久 ?桀 ?木 ?東 ?林 ?才 ?叒 ; 210
89    ?之 ?帀 ?出 ?𣎵 ?生 ?乇 ?𠂹 ?𠌶 ?華 ?𥝌 ; 220
90    ?稽 ?巢 ?桼 ?束 ?㯻 ?囗 ?員 ?貝 ?邑 ?𨛜 ; 230
91    ?日 ?旦 ?倝 ?㫃 ?冥 ?晶 ?月 ?有 ?明 ?囧 ; 240
92    ?夕 ?多 ?毌 ?𢎘 ?𣐺 ?卣 ?齊 ?朿 ?片 ?鼎 ; 250
93    ?克 ?彔 ?禾 ?秝 ?黍 ?香 ?米 ?毇 ?臼 ?凶 ; 260
94    ?𣎳 ?林 ?麻 ?尗 ?耑 ?韭 ?瓜 ?瓠 ?宀 ?宮 ; 270
95    ?呂 ?穴 ?㝱 ?𤕫 ?冖 ?𠔼 ?冃 ?㒳 ?网 ?襾 ; 280
96    ?巾 ?巿 ?帛 ?白 ?㡀 ?黹 ?人 ?𠤎 ?匕 ?从 ; 290
97    ?比 ?北 ?丘 ?㐺 ?𡈼 ?重 ?臥 ?身 ?㐆 ?衣 ; 300
98    ])
99
100 (defun shuowen-radical (number)
101   (aref shuowen-radicals (1- number)))
102
103 (defvar char-db-file-coding-system 'utf-8-mcs-er)
104
105 (defvar char-db-ignored-attributes '(ideographic-products))
106
107 (defvar char-db-coded-charset-priority-list
108   '(ascii
109     control-1
110     latin-iso8859-1
111     latin-iso8859-2
112     latin-iso8859-3
113     latin-iso8859-4
114     latin-iso8859-9
115     latin-jisx0201
116     cyrillic-iso8859-5
117     greek-iso8859-7
118     thai-tis620
119     =adobe-japan1-0
120     =adobe-japan1-1
121     =adobe-japan1-2
122     =adobe-japan1-3
123     =adobe-japan1-4
124     =adobe-japan1-5
125     =adobe-japan1-6
126     =jis-x0208
127     =jis-x0208@1978
128     =jis-x0208@1983
129     japanese-jisx0212
130     chinese-gb2312
131     =jis-x0208@1990
132     chinese-cns11643-1
133     chinese-cns11643-2
134     chinese-cns11643-3
135     chinese-cns11643-4
136     chinese-cns11643-5
137     chinese-cns11643-6
138     chinese-cns11643-7
139     =jis-x0213-1
140     =jis-x0213-1@2000
141     =jis-x0213-1@2004
142     =jis-x0213-2
143     korean-ksc5601
144     chinese-isoir165
145     katakana-jisx0201
146     hebrew-iso8859-8
147     chinese-gb12345
148     latin-viscii
149     ethiopic-ucs
150     =big5-cdp
151     =hanyo-denshi/ja
152     =hanyo-denshi/jb
153     =hanyo-denshi/jc
154     =hanyo-denshi/jd
155     =hanyo-denshi/ft
156     =hanyo-denshi/ia
157     =hanyo-denshi/ib
158     =hanyo-denshi/hg
159     =hanyo-denshi/jt
160     =hanyo-denshi/ks
161     ==adobe-japan1-0
162     ==adobe-japan1-1
163     ==adobe-japan1-2
164     ==adobe-japan1-3
165     ==adobe-japan1-4
166     ==adobe-japan1-5
167     ==adobe-japan1-6
168     ==jis-x0208
169     ==jis-x0213-1
170     ==jis-x0213-2
171     ==hanyo-denshi/ja
172     ==hanyo-denshi/jb
173     ==hanyo-denshi/jc
174     ==hanyo-denshi/ft
175     ==hanyo-denshi/ib
176     ==hanyo-denshi/hg
177     ==hanyo-denshi/jt
178     ==hanyo-denshi/ks
179     =gt
180     =gt-k
181     =daikanwa
182     =daikanwa@rev2
183     =daikanwa@rev1
184     ==daikanwa
185     =cbeta
186     ideograph-hanziku-1
187     ideograph-hanziku-2
188     ideograph-hanziku-3
189     ideograph-hanziku-4
190     ideograph-hanziku-5
191     ideograph-hanziku-6
192     ideograph-hanziku-7
193     ideograph-hanziku-8
194     ideograph-hanziku-9
195     ideograph-hanziku-10
196     ideograph-hanziku-11
197     ideograph-hanziku-12
198     ;; =>>>adobe-japan1-0
199     ;; =>>>adobe-japan1-1
200     ;; =>>>adobe-japan1-2
201     ;; =>>>adobe-japan1-3
202     ;; =>>>adobe-japan1-4
203     ;; =>>>adobe-japan1-5
204     ;; =>>>adobe-japan1-6
205     ;; =>>>jis-x0208
206     ;; =>>>jis-x0213-1
207     ;; =>>>jis-x0213-2
208     ;; =>>>hanyo-denshi/ja
209     ;; =>>>hanyo-denshi/jb
210     ;; =>>>hanyo-denshi/jc
211     ;; =>>>hanyo-denshi/ft
212     ;; =>>>hanyo-denshi/ib
213     ;; =>>>hanyo-denshi/hg
214     ;; =>>>hanyo-denshi/jt
215     ;; =>>>hanyo-denshi/ks
216     ;; =>>>gt
217     =>>adobe-japan1-0
218     =>>adobe-japan1-1
219     =>>adobe-japan1-2
220     =>>adobe-japan1-3
221     =>>adobe-japan1-4
222     =>>adobe-japan1-5
223     =>>adobe-japan1-6
224     =>>jis-x0208
225     =>>jis-x0213-1
226     =>>jis-x0213-1@2000
227     =>>jis-x0213-1@2004
228     =>>jis-x0213-2
229     =>>jis-x0208@1978
230     =>>hanyo-denshi/ft
231     =>>hanyo-denshi/jt
232     =>>hanyo-denshi/ks
233     =>>gt
234     =>>daikanwa
235     =>>cbeta
236     =+>jis-x0208
237     =+>jis-x0213-1
238     =+>jis-x0213-2
239     =+>adobe-japan1-0
240     =+>adobe-japan1-1
241     =+>adobe-japan1-2
242     =+>adobe-japan1-3
243     =+>adobe-japan1-4
244     =+>adobe-japan1-5
245     =+>adobe-japan1-6
246     =+>jis-x0208@1978
247     =>jis-x0208
248     =>jis-x0208@1997
249     =>jis-x0213-1
250     =>jis-x0213-1@2000
251     =>jis-x0213-1@2004
252     =>jis-x0213-2
253     ==>ucs@bucs
254     =>ucs@hanyo-denshi
255     =>ucs@iso
256     =>ucs@unicode
257     =>ucs@jis
258     =>ucs@cns
259     =>ucs@ks
260     =+>ucs@iso
261     =+>ucs@unicode
262     =+>ucs@jis
263     =+>ucs@jis/1990
264     =+>ucs@cns
265     =+>ucs@ks
266     =>>ucs@iso
267     =>>ucs@unicode
268     =>>ucs@jis
269     =>>ucs@cns
270     =>>>ucs@iso
271     =>>>ucs@unicode
272     ==ucs@iso
273     ==ucs@unicode
274     ==gb2312
275     ==ks-x1001
276     ==cns11643-1
277     ==cns11643-2
278     ==cns11643-3
279     ==gt
280     ==jis-x0208@1990
281     ==gt-k
282     =ucs@iso
283     =ucs@unicode
284     =ucs@cns
285     =>>big5-cdp
286     =>>gt-k
287     =+>gt
288     =>gt
289     =>big5-cdp
290     =>daikanwa
291     =>daikanwa/ho
292     =>cns11643-7
293     =big5
294     =big5-eten
295     =>gt-k
296     =zinbun-oracle
297     =>zinbun-oracle
298     =ruimoku-v6
299     =>>ruimoku-v6
300     =jef-china3
301     =shinjigen
302     =big5-cdp-var-3
303     =big5-cdp-var-5))
304
305
306 ;;; @ char-db formatters
307 ;;;
308
309 (defun char-db-make-char-spec (char)
310   (let (ret char-spec)
311     (cond ((characterp char)
312            (cond ((and (setq ret (encode-char char '=ucs 'defined-only))
313                        (not (and (<= #xE000 ret)(<= ret #xF8FF))))
314                   (setq char-spec (list (cons '=ucs ret)))
315                   (cond ((setq ret (get-char-attribute char 'name))
316                          (setq char-spec (cons (cons 'name ret) char-spec))
317                          )
318                         ((setq ret (get-char-attribute char 'name*))
319                          (setq char-spec (cons (cons 'name* ret) char-spec))
320                          ))
321                   )
322                  ((encode-char char '=adobe-japan1 'defined-only)
323                   (setq char-spec nil)
324                   (dolist (ccs (charset-list))
325                     (if (and (or (memq ccs
326                                        '(=adobe-japan1-0
327                                          =adobe-japan1-1
328                                          =adobe-japan1-2
329                                          =adobe-japan1-3
330                                          =adobe-japan1-4
331                                          =adobe-japan1-5
332                                          =adobe-japan1-6
333                                          ))
334                                  ;; (eq (charset-property ccs 'iso-ir) 177)
335                                  (string-match "=ucs@" (symbol-name ccs))
336                                  )
337                              (setq ccs (charset-name ccs))
338                              (null (assq ccs char-spec))
339                              (setq ret (encode-char char ccs 'defined-only)))
340                         (setq char-spec (cons (cons ccs ret) char-spec))))
341                   )
342                  ((encode-char char '==adobe-japan1 'defined-only)
343                   (setq char-spec nil)
344                   (dolist (ccs (charset-list))
345                     (if (and (or (memq ccs
346                                        '(==adobe-japan1-0
347                                          ==adobe-japan1-1
348                                          ==adobe-japan1-2
349                                          ==adobe-japan1-3
350                                          ==adobe-japan1-4
351                                          ==adobe-japan1-5
352                                          ==adobe-japan1-6
353                                          ))
354                                  ;; (eq (charset-property ccs 'iso-ir) 177)
355                                  (string-match "=ucs@" (symbol-name ccs))
356                                  )
357                              (setq ccs (charset-name ccs))
358                              (null (assq ccs char-spec))
359                              (setq ret (encode-char char ccs 'defined-only)))
360                         (setq char-spec (cons (cons ccs ret) char-spec))))
361                   )
362                  ((setq ret
363                         (catch 'tag
364                           (let ((rest char-db-coded-charset-priority-list)
365                                 ccs)
366                             (while rest
367                               (setq ccs (charset-name
368                                          (find-charset (car rest))))
369                               (if (setq ret
370                                         (encode-char char ccs
371                                                      'defined-only))
372                                   (throw 'tag (cons ccs ret)))
373                               (setq rest (cdr rest))))))
374                   (setq char-spec (list ret))
375                   (dolist (ccs (delq (car ret) (charset-list)))
376                     (if (and (or (charset-iso-final-char ccs)
377                                  (memq ccs
378                                        '(=daikanwa
379                                          =daikanwa@rev2
380                                          ;; =gt-k
381                                          =jis-x0208@1997
382                                          ))
383                                  (eq (charset-property ccs 'iso-ir) 177)
384                                  ;; (string-match "=ucs@" (symbol-name ccs))
385                                  )
386                              (setq ccs (charset-name ccs))
387                              (null (assq ccs char-spec))
388                              (setq ret (encode-char char ccs 'defined-only)))
389                         (setq char-spec (cons (cons ccs ret) char-spec))))
390                   (if (null char-spec)
391                       (setq char-spec (split-char char)))
392                   (cond ((setq ret (get-char-attribute char 'name))
393                          (setq char-spec (cons (cons 'name ret) char-spec))
394                          )
395                         ((setq ret (get-char-attribute char 'name*))
396                          (setq char-spec (cons (cons 'name* ret) char-spec))
397                          ))
398                   )
399                  ((setq ret (get-char-attribute
400                              char 'ideographic-combination))
401                   (setq char-spec
402                         (cons (cons 'ideographic-combination ret)
403                               char-spec))
404                   ))
405            char-spec)
406           ((consp char)
407            char))))
408     
409 (defun char-db-insert-char-spec (char &optional readable column
410                                       required-features)
411   (unless column
412     (setq column (current-column)))
413   (let (char-spec temp-char)
414     (setq char-spec (char-db-make-char-spec char))
415     (unless (or (characterp char) ; char
416                 (condition-case nil
417                     (setq char (find-char char-spec))
418                   (error nil)))
419       ;; define temporary character
420       ;;   Current implementation is dirty.
421       (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
422                                          char-spec)))
423       (remove-char-attribute temp-char 'ideograph-daikanwa)
424       (setq char temp-char))
425     (insert-char-attributes char
426                             readable
427                             (union (mapcar #'car char-spec)
428                                    required-features)
429                             nil 'for-sub-node)
430     (when temp-char
431       ;; undefine temporary character
432       ;;   Current implementation is dirty.
433       (setq char-spec (char-attribute-alist temp-char))
434       (while char-spec
435         (remove-char-attribute temp-char (car (car char-spec)))
436         (setq char-spec (cdr char-spec))))))
437
438 (defun char-db-insert-alist (alist &optional readable column)
439   (unless column
440     (setq column (current-column)))
441   (let ((line-breaking
442          (concat "\n" (make-string (1+ column) ?\ )))
443         name value
444         ret al ; cal
445         key
446         lbs cell rest separator)
447     (insert "(")
448     (while alist
449       (setq name (car (car alist))
450             value (cdr (car alist)))
451       (cond ((eq name 'char)
452              (insert "(char . ")
453              (if (setq ret (condition-case nil
454                                (find-char value)
455                              (error nil)))
456                  (progn
457                    (setq al nil
458                          ;; cal nil
459                          )
460                    (while value
461                      (setq key (car (car value)))
462                      ;; (if (find-charset key)
463                      ;;     (setq cal (cons key cal))
464                      (setq al (cons key al))
465                      ;; )
466                      (setq value (cdr value)))
467                    (insert-char-attributes ret
468                                            readable
469                                            (or al 'none) ; cal
470                                            nil 'for-sub-node))
471                (insert (prin1-to-string value)))
472              (insert ")")
473              (insert line-breaking))
474             ((consp value)
475              (insert (format "(%-18s " name))
476              (setq lbs (concat "\n" (make-string (current-column) ?\ )))
477              (while (consp value)
478                (setq cell (car value))
479                (if (and (consp cell)
480                         (consp (car cell))
481                         (setq ret (condition-case nil
482                                       (find-char cell)
483                                     (error nil)))
484                         )
485                    (progn
486                      (setq rest cell
487                            al nil
488                            ;; cal nil
489                            )
490                      (while rest
491                        (setq key (car (car rest)))
492                        ;; (if (find-charset key)
493                        ;;     (setq cal (cons key cal))
494                        (setq al (cons key al))
495                        ;; )
496                        (setq rest (cdr rest)))
497                      (if separator
498                          (insert lbs))
499                      (insert-char-attributes ret
500                                              readable
501                                              al ; cal
502                                              nil 'for-sub-node)
503                      (setq separator lbs))
504                  (if separator
505                      (insert separator))
506                  (insert (prin1-to-string cell))
507                  (setq separator " "))
508                (setq value (cdr value)))
509              (insert ")")
510              (insert line-breaking))
511             (t
512              (insert (format "(%-18s . %S)%s"
513                              name value
514                              line-breaking))))
515       (setq alist (cdr alist))))
516   (insert ")"))
517
518 (defun char-db-insert-char-reference (plist &optional readable column)
519   (unless column
520     (setq column (current-column)))
521   (let ((line-breaking
522          (concat "\n" (make-string (1+ column) ?\ )))
523         (separator "")
524         name value)
525     (insert "(")
526     (while plist
527       (setq name (pop plist))
528       (setq value (pop plist))
529       (cond ((eq name :char)
530              (insert separator)
531              (insert ":char\t")
532              (cond ((numberp value)
533                     (setq value (decode-char '=ucs value)))
534                    ;; ((consp value)
535                    ;;  (setq value (or (find-char value)
536                    ;;                  value)))
537                    )
538              (char-db-insert-char-spec value readable)
539              (insert line-breaking)
540              (setq separator ""))
541             ((eq name :radical)
542              (insert (format "%s%s\t%d ; %c%s"
543                              separator
544                              name value
545                              (ideographic-radical value)
546                              line-breaking))
547              (setq separator ""))
548             (t
549              (insert (format "%s%s\t%S" separator name value))
550              (setq separator line-breaking)))
551       ))
552   (insert ")"))
553
554 (defun char-db-decode-isolated-char (ccs code-point)
555   (let (ret)
556     (setq ret
557           (cond ((eq ccs 'arabic-iso8859-6)
558                  (decode-char ccs code-point))
559                 ;; ((eq ccs '=gt)
560                 ;;  (decode-builtin-char '==gt code-point))
561                 ((and (memq ccs '(=gt-pj-1
562                                   =gt-pj-2
563                                   =gt-pj-3
564                                   =gt-pj-4
565                                   =gt-pj-5
566                                   =gt-pj-6
567                                   =gt-pj-7
568                                   =gt-pj-8
569                                   =gt-pj-9
570                                   =gt-pj-10
571                                   =gt-pj-11))
572                       (setq ret (decode-char ccs code-point))
573                       (setq ret (encode-char ret '=gt 'defined-only)))
574                  (decode-builtin-char '=gt ret))
575                 (t
576                  (decode-builtin-char ccs code-point))))
577     (cond ((and (<= 0 (char-int ret))
578                 (<= (char-int ret) #x1F))
579            (decode-char '=ucs (+ #x2400 (char-int ret))))
580           ((= (char-int ret) #x7F)
581            ?\u2421)
582           (t ret))))
583
584 (defvar char-db-convert-obsolete-format t)
585
586 (defun char-db-insert-ccs-feature (name value line-breaking)
587   (cond
588    ((integerp value)
589     (insert
590      (format
591       (cond
592        ((memq name '(==shinjigen
593                      =shinjigen
594                      =shinjigen@1ed ==shinjigen@1ed
595                      =shinjigen@rev ==shinjigen@rev
596                      =shinjigen/+p@rev ==shinjigen/+p@rev
597                      ==daikanwa/ho
598                      =daikanwa/ho =>>daikanwa/ho =>daikanwa/ho))
599         "(%-18s .  %04d)\t; %c")
600        ((eq name '=shinjigen@1ed/24pr)
601         "(%-18s . %04d)\t; %c")
602        ((or (memq name '(==daikanwa
603                          =daikanwa =>>daikanwa =>daikanwa
604                          =daikanwa@rev1 =daikanwa@rev2
605                          =daikanwa/+p ==daikanwa/+p =>>daikanwa/+p
606                          =daikanwa/+2p =>>daikanwa/+2p
607                          =gt ==gt ; =>>>gt
608                          =>>gt =+>gt =>gt
609                          =gt-k ==gt-k =>>gt-k =>gt-k
610                          =adobe-japan1-0 ==adobe-japan1-0 ; =>>>adobe-japan1-0
611                          =adobe-japan1-1 ==adobe-japan1-1 ; =>>>adobe-japan1-1
612                          =adobe-japan1-2 ==adobe-japan1-2 ; =>>>adobe-japan1-2
613                          =adobe-japan1-3 ==adobe-japan1-3 ; =>>>adobe-japan1-3
614                          =adobe-japan1-4 ==adobe-japan1-4 ; =>>>adobe-japan1-4
615                          =adobe-japan1-5 ==adobe-japan1-5 ; =>>>adobe-japan1-5
616                          =adobe-japan1-6 ==adobe-japan1-6 ; =>>>adobe-japan1-6
617                          =>>adobe-japan1-0 =+>adobe-japan1-0
618                          =>>adobe-japan1-1 =+>adobe-japan1-1
619                          =>>adobe-japan1-2 =+>adobe-japan1-2
620                          =>>adobe-japan1-3 =+>adobe-japan1-3
621                          =>>adobe-japan1-4 =+>adobe-japan1-4
622                          =>>adobe-japan1-5 =+>adobe-japan1-5
623                          =>>adobe-japan1-6 =+>adobe-japan1-6
624                          =cbeta =>>cbeta
625                          =zinbun-oracle =>zinbun-oracle))
626             ;; (string-match "^=adobe-" (symbol-name name))
627             )
628         "(%-18s . %05d)\t; %c")
629        ((memq name '(=hanyo-denshi/ks
630                      ==hanyo-denshi/ks ; =>>>hanyo-denshi/ks
631                      =>>hanyo-denshi/ks
632                      =koseki
633                      =zihai mojikyo))
634         "(%-18s . %06d)\t; %c")
635        ((>= (charset-dimension name) 2)
636         "(%-18s . #x%04X)\t; %c")
637        (t
638         "(%-18s . #x%02X)\t; %c"))
639       name
640       (if (= (charset-iso-graphic-plane name) 1)
641           (logior value
642                   (cond ((= (charset-dimension name) 1)
643                          #x80)
644                         ((= (charset-dimension name) 2)
645                          #x8080)
646                         ((= (charset-dimension name) 3)
647                          #x808080)
648                         (t 0)))
649         value)
650       (char-db-decode-isolated-char name value)))
651     (if (and (= (charset-chars name) 94)
652              (= (charset-dimension name) 2))
653         (insert (format " [%02d-%02d]"
654                         (- (lsh value -8) 32)
655                         (- (logand value 255) 32))))
656     )
657    (t
658     (insert (format "(%-18s . %s)" name value))
659     ))
660   (insert line-breaking))
661
662 (defun char-db-insert-relation-feature (char name value line-breaking
663                                              ccss readable)
664   (insert (format "(%-18s%s " name line-breaking))
665   (let ((lbs (concat "\n" (make-string (current-column) ?\ )))
666         separator cell sources required-features
667         ret)
668     (while (consp value)
669       (setq cell (car value))
670       (if (integerp cell)
671           (setq cell (decode-char '=ucs cell)))
672       (cond
673        ((eq name '->subsumptive)
674         (when (or (not (some (lambda (atr)
675                                (get-char-attribute cell atr))
676                              char-db-ignored-attributes))
677                   (some (lambda (ccs)
678                           (encode-char cell ccs 'defined-only))
679                         ccss))
680           (if separator
681               (insert lbs))
682           (let ((char-db-ignored-attributes
683                  (cons '<-subsumptive
684                        char-db-ignored-attributes)))
685             (insert-char-attributes cell readable nil nil 'for-sub-node))
686           (setq separator lbs))
687         )
688        ((characterp cell)
689         (setq sources
690               (get-char-attribute
691                char (intern (format "%s*sources" name))))
692         (setq required-features nil)
693         (dolist (source sources)
694           (cond
695            ((memq source '(JP
696                            JP/Jouyou
697                            shinjigen shinjigen@1ed shinjigen@rev))
698             (setq required-features
699                   (union required-features
700                          '(=jis-x0208
701                            =jis-x0208@1990
702                            =jis-x0213-1@2000
703                            =jis-x0213-1@2004
704                            =jis-x0213-2
705                            =jis-x0212
706                            =jis-x0208@1983
707                            =jis-x0208@1978
708                            =shinjigen
709                            =shinjigen@1ed
710                            =shinjigen@rev
711                            =shinjigen/+p@rev))))
712            ((eq source 'CN)
713             (setq required-features
714                   (union required-features
715                          '(=gb2312
716                            =gb12345
717                            =iso-ir165)))))
718           (cond
719            ((find-charset (setq ret (intern (format "=%s" source))))
720             (setq required-features
721                   (cons ret required-features)))
722            (t (setq required-features
723                     (cons source required-features)))))
724         (cond ((string-match "@JP" (symbol-name name))
725                (setq required-features
726                      (union required-features
727                             '(=jis-x0208
728                               =jis-x0208@1990
729                               =jis-x0213-1-2000
730                               =jis-x0213-2-2000
731                               =jis-x0212
732                               =jis-x0208@1983
733                               =jis-x0208@1978))))
734               ((string-match "@CN" (symbol-name name))
735                (setq required-features
736                      (union required-features
737                             '(=gb2312
738                               =gb12345
739                               =iso-ir165)))))
740         (if separator
741             (insert lbs))
742         (if readable
743             (insert (format "%S" cell))
744           (char-db-insert-char-spec cell readable
745                                     nil
746                                     required-features))
747         (setq separator lbs))
748        ((consp cell)
749         (if separator
750             (insert lbs))
751         (if (consp (car cell))
752             (char-db-insert-char-spec cell readable)
753           (char-db-insert-char-reference cell readable))
754         (setq separator lbs))
755        (t
756         (if separator
757             (insert separator))
758         (insert (prin1-to-string cell))
759         (setq separator " ")))
760       (setq value (cdr value)))
761     (insert ")")
762     (insert line-breaking)))
763
764 (defun insert-char-attributes (char &optional readable attributes column
765                                     for-sub-node)
766   (unless column
767     (setq column (current-column)))
768   (let (name value ; has-long-ccs-name
769         rest
770         radical strokes
771         (line-breaking
772          (concat "\n" (make-string (1+ column) ?\ )))
773         lbs cell separator ret
774         key al cal
775         dest-ccss ; sources required-features
776         ccss)
777     (let (atr-d)
778       (setq attributes
779             (sort (if attributes
780                       (if (consp attributes)
781                           (progn
782                             (dolist (name attributes)
783                               (unless (memq name char-db-ignored-attributes)
784                                 (if (find-charset name)
785                                     (push name ccss))
786                                 (push name atr-d)))
787                             atr-d))
788                     (dolist (name (char-attribute-list))
789                       (unless (memq name char-db-ignored-attributes)
790                         (if (find-charset name)
791                             (push name ccss))
792                         (push name atr-d)))
793                     atr-d)
794                   #'char-attribute-name<)))
795     (insert "(")
796     (when (memq '<-subsumptive attributes)
797       (when (or readable (not for-sub-node))
798         (when (setq value (get-char-attribute char '<-subsumptive))
799           (char-db-insert-relation-feature char '<-subsumptive value
800                                            line-breaking
801                                            ccss readable)))
802       (setq attributes (delq '<-subsumptive attributes)))
803     (when (and (memq '<-denotational attributes)
804                (setq value (get-char-attribute char '<-denotational)))
805       (char-db-insert-relation-feature char '<-denotational value
806                                        line-breaking
807                                        ccss readable)
808       (setq attributes (delq '<-denotational attributes)))
809     (when (and (memq 'name attributes)
810                (setq value (get-char-attribute char 'name)))
811       (insert (format
812                (if (> (+ (current-column) (length value)) 48)
813                    "(name . %S)%s"
814                  "(name               . %S)%s")
815                value line-breaking))
816       (setq attributes (delq 'name attributes))
817       )
818     (when (and (memq 'name* attributes)
819                (setq value (get-char-attribute char 'name*)))
820       (insert (format
821                (if (> (+ (current-column) (length value)) 48)
822                    "(name* . %S)%s"
823                  "(name*              . %S)%s")
824                value line-breaking))
825       (setq attributes (delq 'name* attributes))
826       )
827     (when (and (memq 'script attributes)
828                (setq value (get-char-attribute char 'script)))
829       (insert (format "(script\t\t%s)%s"
830                       (mapconcat (function prin1-to-string)
831                                  value " ")
832                       line-breaking))
833       (setq attributes (delq 'script attributes))
834       )
835     (dolist (name '(=>ucs =>ucs*))
836       (when (and (memq name attributes)
837                  (setq value (get-char-attribute char name)))
838         (insert (format "(%-18s . #x%04X)\t; %c%s"
839                         name value (decode-char '=ucs value)
840                         line-breaking))
841         (setq attributes (delq name attributes))))
842     (dolist (name '(=>ucs@gb =>ucs@big5))
843       (when (and (memq name attributes)
844                  (setq value (get-char-attribute char name)))
845         (insert (format "(%-18s . #x%04X)\t; %c%s"
846                         name value
847                         (decode-char (intern
848                                       (concat "="
849                                               (substring
850                                                (symbol-name name) 2)))
851                                      value)
852                         line-breaking))
853         (setq attributes (delq name attributes))
854         ))
855     ;; (dolist (name '(=>daikanwa))
856     ;;   (when (and (memq name attributes)
857     ;;              (setq value (get-char-attribute char name)))
858     ;;     (insert
859     ;;      (if (integerp value)
860     ;;          (format "(%-18s . %05d)\t; %c%s"
861     ;;                  name value (decode-char '=daikanwa value)
862     ;;                  line-breaking)
863     ;;        (format "(%-18s %s)\t; %c%s"
864     ;;                name
865     ;;                (mapconcat (function prin1-to-string)
866     ;;                           value " ")
867     ;;                (char-representative-of-daikanwa char)
868     ;;                line-breaking)))
869     ;;     (setq attributes (delq name attributes))))
870     (when (and (memq 'general-category attributes)
871                (setq value (get-char-attribute char 'general-category)))
872       (insert (format
873                "(general-category\t%s) ; %s%s"
874                (mapconcat (lambda (cell)
875                             (format "%S" cell))
876                           value " ")
877                (cond ((rassoc value unidata-normative-category-alist)
878                       "Normative Category")
879                      ((rassoc value unidata-informative-category-alist)
880                       "Informative Category")
881                      (t
882                       "Unknown Category"))
883                line-breaking))
884       (setq attributes (delq 'general-category attributes))
885       )
886     (when (and (memq 'bidi-category attributes)
887                (setq value (get-char-attribute char 'bidi-category)))
888       (insert (format "(bidi-category\t. %S)%s"
889                       value
890                       line-breaking))
891       (setq attributes (delq 'bidi-category attributes))
892       )
893     (unless (or (not (memq 'mirrored attributes))
894                 (eq (setq value (get-char-attribute char 'mirrored 'empty))
895                     'empty))
896       (insert (format "(mirrored\t\t. %S)%s"
897                       value
898                       line-breaking))
899       (setq attributes (delq 'mirrored attributes))
900       )
901     (cond
902      ((and (memq 'decimal-digit-value attributes)
903            (setq value (get-char-attribute char 'decimal-digit-value)))
904       (insert (format "(decimal-digit-value . %S)%s"
905                       value
906                       line-breaking))
907       (setq attributes (delq 'decimal-digit-value attributes))
908       (when (and (memq 'digit-value attributes)
909                  (setq value (get-char-attribute char 'digit-value)))
910         (insert (format "(digit-value\t . %S)%s"
911                         value
912                         line-breaking))
913         (setq attributes (delq 'digit-value attributes))
914         )
915       (when (and (memq 'numeric-value attributes)
916                  (setq value (get-char-attribute char 'numeric-value)))
917         (insert (format "(numeric-value\t . %S)%s"
918                         value
919                         line-breaking))
920         (setq attributes (delq 'numeric-value attributes))
921         )
922       )
923      (t
924       (when (and (memq 'digit-value attributes)
925                  (setq value (get-char-attribute char 'digit-value)))
926         (insert (format "(digit-value\t. %S)%s"
927                         value
928                         line-breaking))
929         (setq attributes (delq 'digit-value attributes))
930         )
931       (when (and (memq 'numeric-value attributes)
932                  (setq value (get-char-attribute char 'numeric-value)))
933         (insert (format "(numeric-value\t. %S)%s"
934                         value
935                         line-breaking))
936         (setq attributes (delq 'numeric-value attributes))
937         )))
938     (when (and (memq 'iso-10646-comment attributes)
939                (setq value (get-char-attribute char 'iso-10646-comment)))
940       (insert (format "(iso-10646-comment\t. %S)%s"
941                       value
942                       line-breaking))
943       (setq attributes (delq 'iso-10646-comment attributes))
944       )
945     (when (and (memq 'morohashi-daikanwa attributes)
946                (setq value (get-char-attribute char 'morohashi-daikanwa)))
947       (insert (format "(morohashi-daikanwa\t%s)%s"
948                       (mapconcat (function prin1-to-string) value " ")
949                       line-breaking))
950       (setq attributes (delq 'morohashi-daikanwa attributes))
951       )
952     (setq radical nil
953           strokes nil)
954     (when (and (memq 'ideographic-radical attributes)
955                (setq value (get-char-attribute char 'ideographic-radical)))
956       (setq radical value)
957       (insert (format "(ideographic-radical . %S)\t; %c%s"
958                       radical
959                       (ideographic-radical radical)
960                       line-breaking))
961       (setq attributes (delq 'ideographic-radical attributes))
962       )
963     (when (and (memq 'shuowen-radical attributes)
964                (setq value (get-char-attribute char 'shuowen-radical)))
965       (insert (format "(shuowen-radical\t. %S)\t; %c%s"
966                       value
967                       (shuowen-radical value)
968                       line-breaking))
969       (setq attributes (delq 'shuowen-radical attributes))
970       )
971     (let (key)
972       (dolist (domain
973                (append
974                 char-db-feature-domains
975                 (let (dest domain)
976                   (dolist (feature (char-attribute-list))
977                     (setq feature (symbol-name feature))
978                     (when (string-match
979                            "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
980                            feature)
981                       (setq domain (intern (match-string 2 feature)))
982                      (unless (memq domain dest)
983                        (setq dest (cons domain dest)))))
984                   (sort dest #'string<))))
985         (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
986         (when (and (memq key attributes)
987                    (setq value (get-char-attribute char key)))
988           (setq radical value)
989           (insert (format "(%s . %S)\t; %c%s"
990                           key
991                           radical
992                           (ideographic-radical radical)
993                           line-breaking))
994           (setq attributes (delq key attributes))
995           )
996         (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
997         (when (and (memq key attributes)
998                    (setq value (get-char-attribute char key)))
999           (setq strokes value)
1000           (insert (format "(%s . %S)%s"
1001                           key
1002                           strokes
1003                           line-breaking))
1004           (setq attributes (delq key attributes))
1005           )
1006         (setq key (intern (format "%s@%s" 'total-strokes domain)))
1007         (when (and (memq key attributes)
1008                    (setq value (get-char-attribute char key)))
1009           (insert (format "(%s       . %S)%s"
1010                           key
1011                           value
1012                           line-breaking))
1013           (setq attributes (delq key attributes))
1014           )
1015         (dolist (feature '(ideographic-radical
1016                            ideographic-strokes
1017                            total-strokes))
1018           (setq key (intern (format "%s@%s*sources" feature domain)))
1019           (when (and (memq key attributes)
1020                      (setq value (get-char-attribute char key)))
1021             (insert (format "(%s%s" key line-breaking))
1022             (dolist (cell value)
1023               (insert (format " %s" cell)))
1024             (insert ")")
1025             (insert line-breaking)
1026             (setq attributes (delq key attributes))
1027             ))
1028         ))
1029     (when (and (memq 'ideographic-strokes attributes)
1030                (setq value (get-char-attribute char 'ideographic-strokes)))
1031       (setq strokes value)
1032       (insert (format "(ideographic-strokes . %S)%s"
1033                       strokes
1034                       line-breaking))
1035       (setq attributes (delq 'ideographic-strokes attributes))
1036       )
1037     (when (and (memq 'kangxi-radical attributes)
1038                (setq value (get-char-attribute char 'kangxi-radical)))
1039       (unless (eq value radical)
1040         (insert (format "(kangxi-radical\t . %S)\t; %c%s"
1041                         value
1042                         (ideographic-radical value)
1043                         line-breaking))
1044         (or radical
1045             (setq radical value)))
1046       (setq attributes (delq 'kangxi-radical attributes))
1047       )
1048     (when (and (memq 'kangxi-strokes attributes)
1049                (setq value (get-char-attribute char 'kangxi-strokes)))
1050       (unless (eq value strokes)
1051         (insert (format "(kangxi-strokes\t . %S)%s"
1052                         value
1053                         line-breaking))
1054         (or strokes
1055             (setq strokes value)))
1056       (setq attributes (delq 'kangxi-strokes attributes))
1057       )
1058     (when (and (memq 'japanese-radical attributes)
1059                (setq value (get-char-attribute char 'japanese-radical)))
1060       (unless (eq value radical)
1061         (insert (format "(japanese-radical\t . %S)\t; %c%s"
1062                         value
1063                         (ideographic-radical value)
1064                         line-breaking))
1065         (or radical
1066             (setq radical value)))
1067       (setq attributes (delq 'japanese-radical attributes))
1068       )
1069     (when (and (memq 'japanese-strokes attributes)
1070                (setq value (get-char-attribute char 'japanese-strokes)))
1071       (unless (eq value strokes)
1072         (insert (format "(japanese-strokes\t . %S)%s"
1073                         value
1074                         line-breaking))
1075         (or strokes
1076             (setq strokes value)))
1077       (setq attributes (delq 'japanese-strokes attributes))
1078       )
1079     (when (and (memq 'cns-radical attributes)
1080                (setq value (get-char-attribute char 'cns-radical)))
1081       (insert (format "(cns-radical\t . %S)\t; %c%s"
1082                       value
1083                       (ideographic-radical value)
1084                       line-breaking))
1085       (setq attributes (delq 'cns-radical attributes))
1086       )
1087     (when (and (memq 'cns-strokes attributes)
1088                (setq value (get-char-attribute char 'cns-strokes)))
1089       (unless (eq value strokes)
1090         (insert (format "(cns-strokes\t . %S)%s"
1091                         value
1092                         line-breaking))
1093         (or strokes
1094             (setq strokes value)))
1095       (setq attributes (delq 'cns-strokes attributes))
1096       )
1097     ;; (when (and (memq 'shinjigen-1-radical attributes)
1098     ;;            (setq value (get-char-attribute char 'shinjigen-1-radical)))
1099     ;;   (unless (eq value radical)
1100     ;;     (insert (format "(shinjigen-1-radical . %S)\t; %c%s"
1101     ;;                     value
1102     ;;                     (ideographic-radical value)
1103     ;;                     line-breaking))
1104     ;;     (or radical
1105     ;;         (setq radical value)))
1106     ;;   (setq attributes (delq 'shinjigen-1-radical attributes))
1107     ;;   )
1108     (when (and (memq 'ideographic- attributes)
1109                (setq value (get-char-attribute char 'ideographic-)))
1110       (insert "(ideographic-       ")
1111       (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1112             separator nil)
1113       (while (consp value)
1114         (setq cell (car value))
1115         (if (integerp cell)
1116             (setq cell (decode-char '=ucs cell)))
1117         (cond ((characterp cell)
1118                (if separator
1119                    (insert lbs))
1120                (if readable
1121                    (insert (format "%S" cell))
1122                  (char-db-insert-char-spec cell readable))
1123                (setq separator lbs))
1124               ((consp cell)
1125                (if separator
1126                    (insert lbs))
1127                (if (consp (car cell))
1128                    (char-db-insert-char-spec cell readable)
1129                  (char-db-insert-char-reference cell readable))
1130                (setq separator lbs))
1131               (t
1132                (if separator
1133                    (insert separator))
1134                (insert (prin1-to-string cell))
1135                (setq separator " ")))
1136         (setq value (cdr value)))
1137       (insert ")")
1138       (insert line-breaking)
1139       (setq attributes (delq 'ideographic- attributes)))
1140     (when (and (memq 'total-strokes attributes)
1141                (setq value (get-char-attribute char 'total-strokes)))
1142       (insert (format "(total-strokes       . %S)%s"
1143                       value
1144                       line-breaking))
1145       (setq attributes (delq 'total-strokes attributes))
1146       )
1147     (when (and (memq '->ideograph attributes)
1148                (setq value (get-char-attribute char '->ideograph)))
1149       (insert (format "(->ideograph\t%s)%s"
1150                       (mapconcat (lambda (code)
1151                                    (cond ((symbolp code)
1152                                           (symbol-name code))
1153                                          ((integerp code)
1154                                           (format "#x%04X" code))
1155                                          (t
1156                                           (format "%s %S"
1157                                                   line-breaking code))))
1158                                  value " ")
1159                       line-breaking))
1160       (setq attributes (delq '->ideograph attributes))
1161       )
1162     ;; (when (and (memq '->decomposition attributes)
1163     ;;            (setq value (get-char-attribute char '->decomposition)))
1164     ;;   (insert (format "(->decomposition\t%s)%s"
1165     ;;                   (mapconcat (lambda (code)
1166     ;;                                (cond ((symbolp code)
1167     ;;                                       (symbol-name code))
1168     ;;                                      ((characterp code)
1169     ;;                                       (if readable
1170     ;;                                           (format "%S" code)
1171     ;;                                         (format "#x%04X"
1172     ;;                                                 (char-int code))
1173     ;;                                         ))
1174     ;;                                      ((integerp code)
1175     ;;                                       (format "#x%04X" code))
1176     ;;                                      (t
1177     ;;                                       (format "%s%S" line-breaking code))))
1178     ;;                              value " ")
1179     ;;                   line-breaking))
1180     ;;   (setq attributes (delq '->decomposition attributes))
1181     ;;   )
1182     (if (equal (get-char-attribute char '->titlecase)
1183                (get-char-attribute char '->uppercase))
1184         (setq attributes (delq '->titlecase attributes)))
1185     (when (and (memq '->mojikyo attributes)
1186                (setq value (get-char-attribute char '->mojikyo)))
1187       (insert (format "(->mojikyo\t\t. %06d)\t; %c%s"
1188                       value (decode-char 'mojikyo value)
1189                       line-breaking))
1190       (setq attributes (delq '->mojikyo attributes))
1191       )
1192     (when (and (memq 'hanyu-dazidian-vol attributes)
1193                (setq value (get-char-attribute char 'hanyu-dazidian-vol)))
1194       (insert (format "(hanyu-dazidian-vol  . %d)%s"
1195                       value line-breaking))
1196       (setq attributes (delq 'hanyu-dazidian-vol attributes))
1197       )
1198     (when (and (memq 'hanyu-dazidian-page attributes)
1199                (setq value (get-char-attribute char 'hanyu-dazidian-page)))
1200       (insert (format "(hanyu-dazidian-page . %d)%s"
1201                       value line-breaking))
1202       (setq attributes (delq 'hanyu-dazidian-page attributes))
1203       )
1204     (when (and (memq 'hanyu-dazidian-char attributes)
1205                (setq value (get-char-attribute char 'hanyu-dazidian-char)))
1206       (insert (format "(hanyu-dazidian-char . %d)%s"
1207                       value line-breaking))
1208       (setq attributes (delq 'hanyu-dazidian-char attributes))
1209       )
1210     (unless readable
1211       (dolist (ignored '(composition
1212                          ->denotational <-subsumptive ->ucs-unified
1213                          ->ideographic-component-forms))
1214         (setq attributes (delq ignored attributes))))
1215     (while attributes
1216       (setq name (car attributes))
1217       (unless (eq (setq value (get-char-attribute char name 'value-is-empty))
1218                   'value-is-empty)
1219         (cond ((setq ret (find-charset name))
1220                (setq name (charset-name ret))
1221                (when (not (memq name dest-ccss))
1222                  (setq dest-ccss (cons name dest-ccss))
1223                  (char-db-insert-ccs-feature name value line-breaking))
1224                )
1225               ((string-match "^=>ucs@" (symbol-name name))
1226                (insert (format "(%-18s . #x%04X)\t; %c%s"
1227                                name value (decode-char '=ucs value)
1228                                line-breaking))
1229                )
1230               ((eq name 'jisx0208-1978/4X)
1231                (insert (format "(%-18s . #x%04X)%s"
1232                                name value
1233                                line-breaking))
1234                )
1235               ((and
1236                 (not readable)
1237                 (not (eq name '->subsumptive))
1238                 (not (eq name '->uppercase))
1239                 (not (eq name '->lowercase))
1240                 (not (eq name '->titlecase))
1241                 (not (eq name '->canonical))
1242                 (not (eq name '->Bopomofo))
1243                 (not (eq name '->mistakable))
1244                 (not (eq name '->ideographic-variants))
1245                 (null (get-char-attribute
1246                        char (intern (format "%s*sources" name))))
1247                 (not (string-match "\\*sources$" (symbol-name name)))
1248                 (null (get-char-attribute
1249                        char (intern (format "%s*note" name))))
1250                 (not (string-match "\\*note$" (symbol-name name)))
1251                 (or (eq name '<-identical)
1252                     (eq name '<-uppercase)
1253                     (eq name '<-lowercase)
1254                     (eq name '<-titlecase)
1255                     (eq name '<-canonical)
1256                     (eq name '<-ideographic-variants)
1257                     ;; (eq name '<-synonyms)
1258                     (string-match "^<-synonyms" (symbol-name name))
1259                     (eq name '<-mistakable)
1260                     (when (string-match "^->" (symbol-name name))
1261                       (cond
1262                        ((string-match "^->fullwidth" (symbol-name name))
1263                         (not (and (consp value)
1264                                   (characterp (car value))
1265                                   (encode-char
1266                                    (car value) '=ucs 'defined-only)))
1267                         )
1268                        (t)))
1269                     ))
1270                )
1271               ((or (eq name 'ideographic-structure)
1272                    (eq name 'ideographic-combination)
1273                    (eq name 'ideographic-)
1274                    (eq name '=decomposition)
1275                    (char-feature-base-name= '=decomposition name)
1276                    (char-feature-base-name= '=>decomposition name)
1277                    ;; (string-match "^=>*decomposition\\(@[^*]+\\)?$"
1278                    ;;               (symbol-name name))
1279                    (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name name))
1280                    (string-match "^\\(->\\|<-\\)[^*]*\\*sources$"
1281                                  (symbol-name name))
1282                    )
1283                (char-db-insert-relation-feature char name value
1284                                                 line-breaking
1285                                                 ccss readable))
1286               ((memq name '(ideograph=
1287                             original-ideograph-of
1288                             ancient-ideograph-of
1289                             vulgar-ideograph-of
1290                             wrong-ideograph-of
1291                             ;; simplified-ideograph-of
1292                             ideographic-variants
1293                             ;; ideographic-different-form-of
1294                             ))
1295                (insert (format "(%-18s%s " name line-breaking))
1296                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1297                      separator nil)
1298                (while (consp value)
1299                  (setq cell (car value))
1300                  (if (and (consp cell)
1301                           (consp (car cell)))
1302                      (progn
1303                        (if separator
1304                            (insert lbs))
1305                        (char-db-insert-alist cell readable)
1306                        (setq separator lbs))
1307                    (if separator
1308                        (insert separator))
1309                    (insert (prin1-to-string cell))
1310                    (setq separator " "))
1311                  (setq value (cdr value)))
1312                (insert ")")
1313                (insert line-breaking))
1314               ((consp value)
1315                (insert (format "(%-18s " name))
1316                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1317                      separator nil)
1318                (while (consp value)
1319                  (setq cell (car value))
1320                  (if (and (consp cell)
1321                           (consp (car cell))
1322                           (setq ret (condition-case nil
1323                                         (find-char cell)
1324                                       (error nil))))
1325                      (progn
1326                        (setq rest cell
1327                              al nil
1328                              cal nil)
1329                        (while rest
1330                          (setq key (car (car rest)))
1331                          (if (find-charset key)
1332                              (setq cal (cons key cal))
1333                            (setq al (cons key al)))
1334                          (setq rest (cdr rest)))
1335                        (if separator
1336                            (insert lbs))
1337                        (insert-char-attributes ret
1338                                                readable
1339                                                al ; cal
1340                                                nil 'for-sub-node)
1341                        (setq separator lbs))
1342                    (setq ret (prin1-to-string cell))
1343                    (if separator
1344                        (if (< (+ (current-column)
1345                                  (length ret)
1346                                  (length separator))
1347                               76)
1348                            (insert separator)
1349                          (insert lbs)))
1350                    (insert ret)
1351                    (setq separator " "))
1352                  (setq value (cdr value)))
1353                (insert ")")
1354                (insert line-breaking))
1355               (t
1356                (insert (format "(%-18s" name))
1357                (setq ret (prin1-to-string value))
1358                (unless (< (+ (current-column)
1359                              (length ret)
1360                              3)
1361                           76)
1362                  (insert line-breaking))
1363                (insert " . " ret ")" line-breaking)
1364                ;; (insert (format "(%-18s . %S)%s"
1365                ;;                 name value
1366                ;;                 line-breaking))
1367                )
1368               ))
1369       (setq attributes (cdr attributes)))
1370     (insert ")")))
1371
1372 (defun insert-char-data (char &optional readable
1373                               attributes)
1374   (save-restriction
1375     (narrow-to-region (point)(point))
1376     (insert "(define-char
1377   '")
1378     (insert-char-attributes char readable attributes)
1379     (insert ")\n")
1380     (goto-char (point-min))
1381     (while (re-search-forward "[ \t]+$" nil t)
1382       (replace-match ""))
1383     ;; from tabify.
1384     (goto-char (point-min))
1385     (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
1386       (let ((column (current-column))
1387             (indent-tabs-mode t))
1388         (delete-region (match-beginning 0) (point))
1389         (indent-to column)))
1390     (goto-char (point-max))
1391     ;; (tabify (point-min)(point-max))
1392     ))
1393
1394 (defun insert-char-data-with-variant (char &optional printable
1395                                            no-ucs-unified
1396                                            script excluded-script)
1397   (insert-char-data char printable)
1398   (let ((variants (char-variants char))
1399         rest
1400         variant vs ret)
1401     (setq variants (sort variants #'<))
1402     (setq rest variants)
1403     (setq variants (cons char variants))
1404     (while rest
1405       (setq variant (car rest))
1406       (unless (get-char-attribute variant '<-subsumptive)
1407         (if (and (or (null script)
1408                      (null (setq vs (get-char-attribute variant 'script)))
1409                      (memq script vs))
1410                  (or (null excluded-script)
1411                      (null (setq vs (get-char-attribute variant 'script)))
1412                      (not (memq excluded-script vs))))
1413             (unless (and no-ucs-unified (get-char-attribute variant '=ucs))
1414               (insert-char-data variant printable)
1415               (if (setq ret (char-variants variant))
1416                   (while ret
1417                     (or (memq (car ret) variants)
1418                         ;; (get-char-attribute (car ret) '<-subsumptive)
1419                         (setq rest (nconc rest (list (car ret)))))
1420                     (setq ret (cdr ret)))))))
1421       (setq rest (cdr rest)))))
1422
1423 (defun insert-char-range-data (min max &optional script excluded-script)
1424   (let ((code min)
1425         char)
1426     (while (<= code max)
1427       (setq char (decode-char '=ucs code))
1428       (if (encode-char char '=ucs 'defined-only)
1429           (insert-char-data-with-variant char nil 'no-ucs-unified
1430                                          script excluded-script))
1431       (setq code (1+ code)))))
1432
1433 (defun write-char-range-data-to-file (min max file
1434                                           &optional script excluded-script)
1435   (let ((coding-system-for-write char-db-file-coding-system))
1436     (with-temp-buffer
1437       (insert (format ";; -*- coding: %s -*-\n"
1438                       char-db-file-coding-system))
1439       (insert-char-range-data min max script excluded-script)
1440       (write-region (point-min)(point-max) file))))
1441
1442 (defvar what-character-original-window-configuration)
1443
1444 ;;;###autoload
1445 (defun what-char-definition (char)
1446   (interactive (list (char-after)))
1447   (let ((buf (get-buffer-create "*Character Description*"))
1448         (the-buf (current-buffer))
1449         (win-conf (current-window-configuration)))
1450     (pop-to-buffer buf)
1451     (make-local-variable 'what-character-original-window-configuration)
1452     (setq what-character-original-window-configuration win-conf)
1453     (setq buffer-read-only nil)
1454     (erase-buffer)
1455     (condition-case err
1456         (progn
1457           (insert-char-data-with-variant char 'printable)
1458           (unless (char-attribute-alist char)
1459             (insert (format ";; = %c\n"
1460                             (let* ((rest (split-char char))
1461                                    (ccs (pop rest))
1462                                    (code (pop rest)))
1463                               (while rest
1464                                 (setq code (logior (lsh code 8)
1465                                                    (pop rest))))
1466                               (decode-char ccs code)))))
1467           ;; (char-db-update-comment)
1468           (set-buffer-modified-p nil)
1469           (view-mode the-buf (lambda (buf)
1470                                (set-window-configuration
1471                                 what-character-original-window-configuration)
1472                                ))
1473           (goto-char (point-min)))
1474       (error (progn
1475                (set-window-configuration
1476                 what-character-original-window-configuration)
1477                (signal (car err) (cdr err)))))))
1478
1479
1480 ;;; @ end
1481 ;;;
1482
1483 (provide 'char-db-util)
1484
1485 ;;; char-db-util.el ends here