02cd1f9e5fec3f66537fa9d3ff7b473034c16fb6
[chise/xemacs-chise.git.1] / lisp / utf-2000 / char-db-util.el
1 ;;; char-db-util.el --- Character Database utility -*- coding: utf-8-er; -*-
2
3 ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
4 ;;   2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 MORIOKA Tomohiko.
5
6 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
7 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
8
9 ;; This file is part of XEmacs CHISE.
10
11 ;; XEmacs CHISE is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
15
16 ;; XEmacs CHISE is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
23 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Code:
27
28 (require 'chise-subr)
29 (require 'ideograph-subr)
30
31 (defconst unidata-normative-category-alist
32   '(("Lu" letter        uppercase)
33     ("Ll" letter        lowercase)
34     ("Lt" letter        titlecase)
35     ("Mn" mark          non-spacing)
36     ("Mc" mark          spacing-combining)
37     ("Me" mark          enclosing)
38     ("Nd" number        decimal-digit)
39     ("Nl" number        letter)
40     ("No" number        other)
41     ("Zs" separator     space)
42     ("Zl" separator     line)
43     ("Zp" separator     paragraph)
44     ("Cc" other         control)
45     ("Cf" other         format)
46     ("Cs" other         surrogate)
47     ("Co" other         private-use)
48     ("Cn" other         not-assigned)))
49
50 (defconst unidata-informative-category-alist
51   '(("Lm" letter        modifier)
52     ("Lo" letter        other)
53     ("Pc" punctuation   connector)
54     ("Pd" punctuation   dash)
55     ("Ps" punctuation   open)
56     ("Pe" punctuation   close)
57     ("Pi" punctuation   initial-quote)
58     ("Pf" punctuation   final-quote)
59     ("Po" punctuation   other)
60     ("Sm" symbol        math)
61     ("Sc" symbol        currency)
62     ("Sk" symbol        modifier)
63     ("So" symbol        other)
64     ))
65
66 (defconst shuowen-radicals
67   [?一 ?上 ?示 ?三 ?王 ?玉 ?玨 ?气 ?士 ?丨 ; 010
68    ?屮 ?艸 ?蓐 ?茻 ?小 ?八 ?釆 ?半 ?牛 ?犛 ; 020
69    ?告 ?口 ?凵 ?吅 ?哭 ?走 ?止 ?癶 ?步 ?此 ; 030
70    ?正 ?是 ?辵 ?彳 ?廴 ?㢟 ?行 ?齒 ?牙 ?足 ; 040
71    ?疋 ?品 ?龠 ?冊 ?㗊 ?舌 ?干 ?谷 ?只 ?㕯 ; 050
72    ?句 ?丩 ?古 ?十 ?卅 ?言 ?誩 ?音 ?䇂 ?丵 ; 060
73    ?菐 ?𠬞 ?𠬜 ?共 ?異 ?舁 ?𦥑 ?䢅 ?爨 ?革 ; 070
74    ?鬲 ?䰜 ?爪 ?𠃨 ?鬥 ?又 ?𠂇 ?㕜 ?支 ?𦘒 ; 080
75    ?聿 ?畫 ?隶 ?臤 ?臣 ?殳 ?殺 ?𠘧 ?寸 ?皮 ; 090
76    ?㼱 ?攴 ?敎 ?卜 ?用 ?爻 ?㸚 ?𥄎 ?目 ?䀠 ; 100
77    ?眉 ?盾 ?自 ?白 ?鼻 ?皕 ?習 ?羽 ?隹 ?奞 ; 110
78    ?萑 ?𦫳 ?苜 ?羊 ?羴 ?瞿 ?雔 ?雥 ?鳥 ?烏 ; 120
79    ?𠦒 ?冓 ?幺 ?𢆶 ?叀 ?玄 ?予 ?放 ?𠬪 ?𣦼 ; 130
80    ?歺 ?死 ?冎 ?骨 ?肉 ?筋 ?刀 ?刃 ?㓞 ?丰 ; 140
81    ?耒 ?𧢲 ?竹 ?箕 ?丌 ?左 ?工 ?㠭 ?巫 ?甘 ; 150
82    ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?旨 ?喜 ?壴 ; 160
83 ;  ?旨 ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?喜 ?壴 ; 160
84    ?鼓 ?豈 ?豆 ?豊 ?豐 ?䖒 ?虍 ?虎 ?虤 ?皿 ; 170
85    ?𠙴 ?去 ?血 ?丶 ?丹 ?青 ?井 ?皀 ?鬯 ?食 ; 180
86    ?亼 ?會 ?倉 ?入 ?缶 ?矢 ?高 ?冂 ?𩫏 ?京 ; 190
87    ?亯 ?𣆪 ?畗 ?㐭 ?嗇 ?來 ?麥 ?夊 ?舛 ?䑞 ; 200
88    ?韋 ?弟 ?夂 ?久 ?桀 ?木 ?東 ?林 ?才 ?叒 ; 210
89    ?之 ?帀 ?出 ?𣎵 ?生 ?乇 ?𠂹 ?𠌶 ?華 ?𥝌 ; 220
90    ?稽 ?巢 ?桼 ?束 ?㯻 ?囗 ?員 ?貝 ?邑 ?𨛜 ; 230
91    ?日 ?旦 ?倝 ?㫃 ?冥 ?晶 ?月 ?有 ?明 ?囧 ; 240
92    ?夕 ?多 ?毌 ?𢎘 ?𣐺 ?卣 ?齊 ?朿 ?片 ?鼎 ; 250
93    ?克 ?彔 ?禾 ?秝 ?黍 ?香 ?米 ?毇 ?臼 ?凶 ; 260
94    ?𣎳 ?林 ?麻 ?尗 ?耑 ?韭 ?瓜 ?瓠 ?宀 ?宮 ; 270
95    ?呂 ?穴 ?㝱 ?𤕫 ?冖 ?𠔼 ?冃 ?㒳 ?网 ?襾 ; 280
96    ?巾 ?巿 ?帛 ?白 ?㡀 ?黹 ?人 ?𠤎 ?匕 ?从 ; 290
97    ?比 ?北 ?丘 ?㐺 ?𡈼 ?重 ?臥 ?身 ?㐆 ?衣 ; 300
98    ?裘 ?老 ?毛 ?毳 ?尸 ?尺 ?尾 ?履 ?舟 ?方 ; 310
99    ?儿 ?兄 ?兂 ?皃 ?𠑹 ?先 ?秃 ?見 ?覞 ?欠 ; 320
100    ?㱃 ?㳄 ?旡 ?頁 ?𦣻 ?面 ?丏 ?首 ?𥄉 ?須 ; 330
101    ?彡 ?彣 ?文 ?髟 ?后 ?司 ?卮 ?卩 ?印 ?色 ; 340
102    ?𠨍 ?辟 ?勹 ?包 ?茍 ?鬼 ?甶 ?厶 ?嵬 ?山 ; 350
103    ])
104
105 (defun shuowen-radical (number)
106   (aref shuowen-radicals (1- number)))
107
108 (defvar char-db-file-coding-system 'utf-8-mcs-er)
109
110 (defvar char-db-ignored-attributes '(ideographic-products))
111
112 (defvar char-db-coded-charset-priority-list
113   '(ascii
114     control-1
115     latin-iso8859-1
116     latin-iso8859-2
117     latin-iso8859-3
118     latin-iso8859-4
119     latin-iso8859-9
120     latin-jisx0201
121     cyrillic-iso8859-5
122     greek-iso8859-7
123     thai-tis620
124     ;; =mj
125     ;; =adobe-japan1-0
126     ;; =adobe-japan1-1
127     ;; =adobe-japan1-2
128     ;; =adobe-japan1-3
129     ;; =adobe-japan1-4
130     ;; =adobe-japan1-5
131     ;; =adobe-japan1-6
132     =jis-x0208
133     =jis-x0208@1978
134     =jis-x0208@1983
135     japanese-jisx0212
136     chinese-gb2312
137     =jis-x0208@1990
138     chinese-cns11643-1
139     chinese-cns11643-2
140     chinese-cns11643-3
141     chinese-cns11643-4
142     chinese-cns11643-5
143     chinese-cns11643-6
144     chinese-cns11643-7
145     =jis-x0213-1
146     =jis-x0213-1@2000
147     =jis-x0213-1@2004
148     =jis-x0213-2
149     korean-ksc5601
150     chinese-isoir165
151     katakana-jisx0201
152     hebrew-iso8859-8
153     chinese-gb12345
154     latin-viscii
155     ethiopic-ucs
156     =big5-cdp
157     =hanyo-denshi/ja
158     =hanyo-denshi/jb
159     =hanyo-denshi/jc
160     =hanyo-denshi/jd
161     =hanyo-denshi/ft
162     =hanyo-denshi/ia
163     =hanyo-denshi/ib
164     =hanyo-denshi/hg
165     =hanyo-denshi/jt
166     =hanyo-denshi/ks
167     ;; ==mj
168     ;; ==adobe-japan1-0
169     ;; ==adobe-japan1-1
170     ;; ==adobe-japan1-2
171     ;; ==adobe-japan1-3
172     ;; ==adobe-japan1-4
173     ;; ==adobe-japan1-5
174     ;; ==adobe-japan1-6
175     ==jis-x0208
176     ==jis-x0213-1
177     ==jis-x0213-2
178     ==hanyo-denshi/ja
179     ==hanyo-denshi/jb
180     ==hanyo-denshi/jc
181     ==hanyo-denshi/ft
182     ==hanyo-denshi/ib
183     ==hanyo-denshi/hg
184     ==hanyo-denshi/jt
185     ==hanyo-denshi/ks
186     =gt
187     =gt-k
188     =daikanwa
189     =daikanwa@rev2
190     =daikanwa@rev1
191     ==daikanwa
192     =cbeta
193     ideograph-hanziku-1
194     ideograph-hanziku-2
195     ideograph-hanziku-3
196     ideograph-hanziku-4
197     ideograph-hanziku-5
198     ideograph-hanziku-6
199     ideograph-hanziku-7
200     ideograph-hanziku-8
201     ideograph-hanziku-9
202     ideograph-hanziku-10
203     ideograph-hanziku-11
204     ideograph-hanziku-12
205     ;; =>>>adobe-japan1-0
206     ;; =>>>adobe-japan1-1
207     ;; =>>>adobe-japan1-2
208     ;; =>>>adobe-japan1-3
209     ;; =>>>adobe-japan1-4
210     ;; =>>>adobe-japan1-5
211     ;; =>>>adobe-japan1-6
212     ;; =>>>jis-x0208
213     ;; =>>>jis-x0213-1
214     ;; =>>>jis-x0213-2
215     ;; =>>>hanyo-denshi/ja
216     ;; =>>>hanyo-denshi/jb
217     ;; =>>>hanyo-denshi/jc
218     ;; =>>>hanyo-denshi/ft
219     ;; =>>>hanyo-denshi/ib
220     ;; =>>>hanyo-denshi/hg
221     ;; =>>>hanyo-denshi/jt
222     ;; =>>>hanyo-denshi/ks
223     ;; =>>>gt
224     =>>adobe-japan1-0
225     =>>adobe-japan1-1
226     =>>adobe-japan1-2
227     =>>adobe-japan1-3
228     =>>adobe-japan1-4
229     =>>adobe-japan1-5
230     =>>adobe-japan1-6
231     =>>jis-x0208
232     =>>jis-x0213-1
233     =>>jis-x0213-1@2000
234     =>>jis-x0213-1@2004
235     =>>jis-x0213-2
236     =>>jis-x0208@1978
237     =>>hanyo-denshi/ft
238     =>>hanyo-denshi/jt
239     =>>hanyo-denshi/ks
240     =>>gt
241     =>>daikanwa
242     =>>cbeta
243     =+>jis-x0208
244     =+>jis-x0213-1
245     =+>jis-x0213-2
246     =+>adobe-japan1-0
247     =+>adobe-japan1-1
248     =+>adobe-japan1-2
249     =+>adobe-japan1-3
250     =+>adobe-japan1-4
251     =+>adobe-japan1-5
252     =+>adobe-japan1-6
253     =+>jis-x0208@1978
254     =>jis-x0208
255     =>jis-x0208@1997
256     =>jis-x0213-1
257     =>jis-x0213-1@2000
258     =>jis-x0213-1@2004
259     =>jis-x0213-2
260     ==>ucs@bucs
261     =>ucs@hanyo-denshi
262     =>ucs@iso
263     =>ucs@unicode
264     =>ucs@jis
265     =>ucs@cns
266     =>ucs@ks
267     =+>ucs@iso
268     =+>ucs@unicode
269     =+>ucs@jis
270     =+>ucs@jis/1990
271     =+>ucs@cns
272     =+>ucs@ks
273     =>>ucs@iso
274     =>>ucs@unicode
275     =>>ucs@jis
276     =>>ucs@cns
277     =>>>ucs@iso
278     =>>>ucs@unicode
279     ==ucs@iso
280     ==ucs@unicode
281     ;; ==ucs@cns
282     ==gb2312
283     ==ks-x1001
284     ==cns11643-1
285     ==cns11643-2
286     ==cns11643-3
287     ==cns11643-4
288     ==cns11643-5
289     ==cns11643-6
290     ==cns11643-7
291     ==gt
292     ==jis-x0208@1990
293     ;; ==jis-x0208@1983
294     ;; ==jis-x0208@1978
295     ==gt-k
296     =ucs@iso
297     =ucs@unicode
298     =ucs@cns
299     ==big5-cdp
300     ==cbeta
301     =>>big5-cdp
302     =>>gt-k
303     =+>gt
304     =>gt
305     =>big5-cdp
306     =>daikanwa
307     =>daikanwa/ho
308     =>cns11643-7
309     =big5
310     =big5-eten
311     =>gt-k
312     =zinbun-oracle
313     =>zinbun-oracle
314     =ruimoku-v6
315     =>>ruimoku-v6
316     ==ruimoku-v6
317     =jef-china3
318     =shinjigen
319     =big5-cdp-var-3
320     =big5-cdp-var-5))
321
322
323 ;;; @ char-db formatters
324 ;;;
325
326 (defun char-db-make-char-spec (char)
327   (let (ret char-spec)
328     (cond ((characterp char)
329            (cond ((and (setq ret (encode-char char '=ucs 'defined-only))
330                        (not (and (<= #xE000 ret)(<= ret #xF8FF))))
331                   (setq char-spec (list (cons '=ucs ret)))
332                   (cond ((setq ret (get-char-attribute char 'name))
333                          (setq char-spec (cons (cons 'name ret) char-spec))
334                          )
335                         ((setq ret (get-char-attribute char 'name*))
336                          (setq char-spec (cons (cons 'name* ret) char-spec))
337                          ))
338                   )
339                  ((encode-char char '=mj 'defined-only)
340                   (setq char-spec nil)
341                   (dolist (ccs (charset-list))
342                     (if (and (or (eq ccs '=mj)
343                                  ;; (eq (charset-property ccs 'iso-ir) 177)
344                                  (string-match "=ucs@" (symbol-name ccs))
345                                  )
346                              (setq ccs (charset-name ccs))
347                              (null (assq ccs char-spec))
348                              (setq ret (encode-char char ccs 'defined-only)))
349                         (setq char-spec (cons (cons ccs ret) char-spec))))
350                   )
351                  ((encode-char char '==mj 'defined-only)
352                   (setq char-spec nil)
353                   (dolist (ccs (charset-list))
354                     (if (and (or (eq ccs '==mj)
355                                  ;; (eq (charset-property ccs 'iso-ir) 177)
356                                  (string-match "=ucs@" (symbol-name ccs))
357                                  )
358                              (setq ccs (charset-name ccs))
359                              (null (assq ccs char-spec))
360                              (setq ret (encode-char char ccs 'defined-only)))
361                         (setq char-spec (cons (cons ccs ret) char-spec))))
362                   )
363                  ((encode-char char '=adobe-japan1 'defined-only)
364                   (setq char-spec nil)
365                   (dolist (ccs (charset-list))
366                     (if (and (or (memq ccs
367                                        '(=adobe-japan1-0
368                                          =adobe-japan1-1
369                                          =adobe-japan1-2
370                                          =adobe-japan1-3
371                                          =adobe-japan1-4
372                                          =adobe-japan1-5
373                                          =adobe-japan1-6
374                                          ))
375                                  ;; (eq (charset-property ccs 'iso-ir) 177)
376                                  (string-match "=ucs@" (symbol-name ccs))
377                                  )
378                              (setq ccs (charset-name ccs))
379                              (null (assq ccs char-spec))
380                              (setq ret (encode-char char ccs 'defined-only)))
381                         (setq char-spec (cons (cons ccs ret) char-spec))))
382                   )
383                  ((encode-char char '==adobe-japan1 'defined-only)
384                   (setq char-spec nil)
385                   (dolist (ccs (charset-list))
386                     (if (and (or (memq ccs
387                                        '(==adobe-japan1-0
388                                          ==adobe-japan1-1
389                                          ==adobe-japan1-2
390                                          ==adobe-japan1-3
391                                          ==adobe-japan1-4
392                                          ==adobe-japan1-5
393                                          ==adobe-japan1-6
394                                          ))
395                                  ;; (eq (charset-property ccs 'iso-ir) 177)
396                                  (string-match "=ucs@" (symbol-name ccs))
397                                  )
398                              (setq ccs (charset-name ccs))
399                              (null (assq ccs char-spec))
400                              (setq ret (encode-char char ccs 'defined-only)))
401                         (setq char-spec (cons (cons ccs ret) char-spec))))
402                   )
403                  ((setq ret
404                         (catch 'tag
405                           (let ((rest char-db-coded-charset-priority-list)
406                                 ccs)
407                             (while rest
408                               (setq ccs (charset-name
409                                          (find-charset (car rest))))
410                               (if (setq ret
411                                         (encode-char char ccs
412                                                      'defined-only))
413                                   (throw 'tag (cons ccs ret)))
414                               (setq rest (cdr rest))))))
415                   (setq char-spec (list ret))
416                   (dolist (ccs (delq (car ret) (charset-list)))
417                     (if (and (or (charset-iso-final-char ccs)
418                                  (memq ccs
419                                        '(=daikanwa
420                                          =daikanwa@rev2
421                                          ;; =gt-k
422                                          =jis-x0208@1997
423                                          ))
424                                  (eq (charset-property ccs 'iso-ir) 177)
425                                  ;; (string-match "=ucs@" (symbol-name ccs))
426                                  )
427                              (setq ccs (charset-name ccs))
428                              (null (assq ccs char-spec))
429                              (setq ret (encode-char char ccs 'defined-only)))
430                         (setq char-spec (cons (cons ccs ret) char-spec))))
431                   (if (null char-spec)
432                       (setq char-spec (split-char char)))
433                   (cond ((setq ret (get-char-attribute char 'name))
434                          (setq char-spec (cons (cons 'name ret) char-spec))
435                          )
436                         ((setq ret (get-char-attribute char 'name*))
437                          (setq char-spec (cons (cons 'name* ret) char-spec))
438                          ))
439                   )
440                  ((setq ret (get-char-attribute
441                              char 'ideographic-combination))
442                   (setq char-spec
443                         (cons (cons 'ideographic-combination ret)
444                               char-spec))
445                   ))
446            char-spec)
447           ((consp char)
448            char))))
449     
450 (defun char-db-insert-char-spec (char &optional readable column
451                                       required-features)
452   (unless column
453     (setq column (current-column)))
454   (let (char-spec temp-char)
455     (setq char-spec (char-db-make-char-spec char))
456     (unless (or (characterp char) ; char
457                 (condition-case nil
458                     (setq char (find-char char-spec))
459                   (error nil)))
460       ;; define temporary character
461       ;;   Current implementation is dirty.
462       (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
463                                          char-spec)))
464       (remove-char-attribute temp-char 'ideograph-daikanwa)
465       (setq char temp-char))
466     (insert-char-attributes char
467                             readable
468                             (union (mapcar #'car char-spec)
469                                    required-features)
470                             nil 'for-sub-node)
471     (when temp-char
472       ;; undefine temporary character
473       ;;   Current implementation is dirty.
474       (setq char-spec (char-attribute-alist temp-char))
475       (while char-spec
476         (remove-char-attribute temp-char (car (car char-spec)))
477         (setq char-spec (cdr char-spec))))))
478
479 (defun char-db-insert-alist (alist &optional readable column)
480   (unless column
481     (setq column (current-column)))
482   (let ((line-breaking
483          (concat "\n" (make-string (1+ column) ?\ )))
484         name value
485         ret al ; cal
486         key
487         lbs cell rest separator)
488     (insert "(")
489     (while alist
490       (setq name (car (car alist))
491             value (cdr (car alist)))
492       (cond ((eq name 'char)
493              (insert "(char . ")
494              (if (setq ret (condition-case nil
495                                (find-char value)
496                              (error nil)))
497                  (progn
498                    (setq al nil
499                          ;; cal nil
500                          )
501                    (while value
502                      (setq key (car (car value)))
503                      ;; (if (find-charset key)
504                      ;;     (setq cal (cons key cal))
505                      (setq al (cons key al))
506                      ;; )
507                      (setq value (cdr value)))
508                    (insert-char-attributes ret
509                                            readable
510                                            (or al 'none) ; cal
511                                            nil 'for-sub-node))
512                (insert (prin1-to-string value)))
513              (insert ")")
514              (insert line-breaking))
515             ((consp value)
516              (insert (format "(%-18s " name))
517              (setq lbs (concat "\n" (make-string (current-column) ?\ )))
518              (while (consp value)
519                (setq cell (car value))
520                (if (and (consp cell)
521                         (consp (car cell))
522                         (setq ret (condition-case nil
523                                       (find-char cell)
524                                     (error nil)))
525                         )
526                    (progn
527                      (setq rest cell
528                            al nil
529                            ;; cal nil
530                            )
531                      (while rest
532                        (setq key (car (car rest)))
533                        ;; (if (find-charset key)
534                        ;;     (setq cal (cons key cal))
535                        (setq al (cons key al))
536                        ;; )
537                        (setq rest (cdr rest)))
538                      (if separator
539                          (insert lbs))
540                      (insert-char-attributes ret
541                                              readable
542                                              al ; cal
543                                              nil 'for-sub-node)
544                      (setq separator lbs))
545                  (if separator
546                      (insert separator))
547                  (insert (prin1-to-string cell))
548                  (setq separator " "))
549                (setq value (cdr value)))
550              (insert ")")
551              (insert line-breaking))
552             (t
553              (insert (format "(%-18s . %S)%s"
554                              name value
555                              line-breaking))))
556       (setq alist (cdr alist))))
557   (insert ")"))
558
559 (defun char-db-insert-char-reference (plist &optional readable column)
560   (unless column
561     (setq column (current-column)))
562   (let ((line-breaking
563          (concat "\n" (make-string (1+ column) ?\ )))
564         (separator "")
565         name value)
566     (insert "(")
567     (while plist
568       (setq name (pop plist))
569       (setq value (pop plist))
570       (cond ((eq name :char)
571              (insert separator)
572              (insert ":char\t")
573              (cond ((numberp value)
574                     (setq value (decode-char '=ucs value)))
575                    ;; ((consp value)
576                    ;;  (setq value (or (find-char value)
577                    ;;                  value)))
578                    )
579              (char-db-insert-char-spec value readable)
580              (insert line-breaking)
581              (setq separator ""))
582             ((eq name :radical)
583              (insert (format "%s%s\t%d ; %c%s"
584                              separator
585                              name value
586                              (ideographic-radical value)
587                              line-breaking))
588              (setq separator ""))
589             (t
590              (insert (format "%s%s\t%S" separator name value))
591              (setq separator line-breaking)))
592       ))
593   (insert ")"))
594
595 (defun char-db-decode-isolated-char (ccs code-point)
596   (let (ret)
597     (setq ret
598           (cond ((eq ccs 'arabic-iso8859-6)
599                  (decode-char ccs code-point))
600                 ;; ((eq ccs '=gt)
601                 ;;  (decode-builtin-char '==gt code-point))
602                 ((and (memq ccs '(=gt-pj-1
603                                   =gt-pj-2
604                                   =gt-pj-3
605                                   =gt-pj-4
606                                   =gt-pj-5
607                                   =gt-pj-6
608                                   =gt-pj-7
609                                   =gt-pj-8
610                                   =gt-pj-9
611                                   =gt-pj-10
612                                   =gt-pj-11))
613                       (setq ret (decode-char ccs code-point))
614                       (setq ret (encode-char ret '=gt 'defined-only)))
615                  (decode-builtin-char '=gt ret))
616                 (t
617                  (decode-builtin-char ccs code-point))))
618     (cond ((and (<= 0 (char-int ret))
619                 (<= (char-int ret) #x1F))
620            (decode-char '=ucs (+ #x2400 (char-int ret))))
621           ((= (char-int ret) #x7F)
622            ?\u2421)
623           (t ret))))
624
625 (defvar char-db-convert-obsolete-format t)
626
627 (defun char-db-insert-ccs-feature (name value line-breaking)
628   (cond
629    ((integerp value)
630     (insert
631      (format
632       (cond
633        ((memq name '(==shinjigen
634                      =shinjigen
635                      =shinjigen@1ed ==shinjigen@1ed
636                      =shinjigen@rev ==shinjigen@rev
637                      =shinjigen/+p@rev ==shinjigen/+p@rev
638                      ===daikanwa/ho ==daikanwa/ho
639                      =daikanwa/ho =>>daikanwa/ho =>daikanwa/ho))
640         "(%-18s .  %04d)\t; %c")
641        ((eq name '=shinjigen@1ed/24pr)
642         "(%-18s . %04d)\t; %c")
643        ((or (memq name '(===daikanwa
644                          ==daikanwa =daikanwa =>>daikanwa =>daikanwa
645                          =daikanwa@rev1 =daikanwa@rev2
646                          =daikanwa/+p ==daikanwa/+p ===daikanwa/+p
647                          =>>daikanwa/+p
648                          =daikanwa/+2p =>>daikanwa/+2p
649                          =gt ==gt ===gt
650                          =>>gt =+>gt =>gt
651                          =gt-k ==gt-k ===gt-k
652                          =>>gt-k =>gt-k
653                          =adobe-japan1-0 ==adobe-japan1-0 ===adobe-japan1-0
654                          =adobe-japan1-1 ==adobe-japan1-1 ===adobe-japan1-1
655                          =adobe-japan1-2 ==adobe-japan1-2 ===adobe-japan1-2
656                          =adobe-japan1-3 ==adobe-japan1-3 ===adobe-japan1-3
657                          =adobe-japan1-4 ==adobe-japan1-4 ===adobe-japan1-4
658                          =adobe-japan1-5 ==adobe-japan1-5 ===adobe-japan1-5
659                          =adobe-japan1-6 ==adobe-japan1-6 ===adobe-japan1-6
660                          =>>adobe-japan1-0 =+>adobe-japan1-0
661                          =>>adobe-japan1-1 =+>adobe-japan1-1
662                          =>>adobe-japan1-2 =+>adobe-japan1-2
663                          =>>adobe-japan1-3 =+>adobe-japan1-3
664                          =>>adobe-japan1-4 =+>adobe-japan1-4
665                          =>>adobe-japan1-5 =+>adobe-japan1-5
666                          =>>adobe-japan1-6 =+>adobe-japan1-6
667                          =cbeta ==cbeta ===cbeta =>>cbeta
668                          =zinbun-oracle =>zinbun-oracle))
669             ;; (string-match "^=adobe-" (symbol-name name))
670             )
671         "(%-18s . %05d)\t; %c")
672        ((memq name '(=hanyo-denshi/ks
673                      ==hanyo-denshi/ks ===hanyo-denshi/ks
674                      =>>hanyo-denshi/ks
675                      =koseki
676                      =mj ==mj ===mj =>>mj
677                      =zihai mojikyo))
678         "(%-18s . %06d)\t; %c")
679        ((>= (charset-dimension name) 2)
680         "(%-18s . #x%04X)\t; %c")
681        (t
682         "(%-18s . #x%02X)\t; %c"))
683       name
684       (if (= (charset-iso-graphic-plane name) 1)
685           (logior value
686                   (cond ((= (charset-dimension name) 1)
687                          #x80)
688                         ((= (charset-dimension name) 2)
689                          #x8080)
690                         ((= (charset-dimension name) 3)
691                          #x808080)
692                         (t 0)))
693         value)
694       (char-db-decode-isolated-char name value)))
695     (if (and (= (charset-chars name) 94)
696              (= (charset-dimension name) 2))
697         (insert (format " [%02d-%02d]"
698                         (- (lsh value -8) 32)
699                         (- (logand value 255) 32))))
700     )
701    (t
702     (insert (format "(%-18s . %s)" name value))
703     ))
704   (insert line-breaking))
705
706 (defun char-db-insert-relation-feature (char name value line-breaking
707                                              ccss readable)
708   (insert (format "(%-18s%s " name line-breaking))
709   (let ((lbs (concat "\n" (make-string (current-column) ?\ )))
710         separator cell sources required-features
711         ret)
712     (while (consp value)
713       (setq cell (car value))
714       (if (integerp cell)
715           (setq cell (decode-char '=ucs cell)))
716       (cond
717        ((eq name '->subsumptive)
718         (when (or (not (some (lambda (atr)
719                                (get-char-attribute cell atr))
720                              char-db-ignored-attributes))
721                   (some (lambda (ccs)
722                           (encode-char cell ccs 'defined-only))
723                         ccss))
724           (if separator
725               (insert lbs))
726           (let ((char-db-ignored-attributes
727                  (cons '<-subsumptive
728                        char-db-ignored-attributes)))
729             (insert-char-attributes cell readable nil nil 'for-sub-node))
730           (setq separator lbs))
731         )
732        ((characterp cell)
733         (setq sources
734               (get-char-attribute
735                char (intern (format "%s*sources" name))))
736         (setq required-features nil)
737         (dolist (source sources)
738           (cond
739            ((memq source '(JP
740                            JP/Jouyou
741                            shinjigen shinjigen@1ed shinjigen@rev))
742             (setq required-features
743                   (union required-features
744                          '(=jis-x0208
745                            =jis-x0208@1990
746                            =jis-x0213-1@2000
747                            =jis-x0213-1@2004
748                            =jis-x0213-2
749                            =jis-x0212
750                            =jis-x0208@1983
751                            =jis-x0208@1978
752                            =shinjigen
753                            =shinjigen@1ed
754                            =shinjigen@rev
755                            =shinjigen/+p@rev))))
756            ((eq source 'CN)
757             (setq required-features
758                   (union required-features
759                          '(=gb2312
760                            =gb12345
761                            =iso-ir165)))))
762           (cond
763            ((find-charset (setq ret (intern (format "=%s" source))))
764             (setq required-features
765                   (cons ret required-features)))
766            (t (setq required-features
767                     (cons source required-features)))))
768         (cond ((string-match "@JP" (symbol-name name))
769                (setq required-features
770                      (union required-features
771                             '(=jis-x0208
772                               =jis-x0208@1990
773                               =jis-x0213-1-2000
774                               =jis-x0213-2-2000
775                               =jis-x0212
776                               =jis-x0208@1983
777                               =jis-x0208@1978))))
778               ((string-match "@CN" (symbol-name name))
779                (setq required-features
780                      (union required-features
781                             '(=gb2312
782                               =gb12345
783                               =iso-ir165)))))
784         (if separator
785             (insert lbs))
786         (if readable
787             (insert (format "%S" cell))
788           (char-db-insert-char-spec cell readable
789                                     nil
790                                     required-features))
791         (setq separator lbs))
792        ((consp cell)
793         (if separator
794             (insert lbs))
795         (if (consp (car cell))
796             (char-db-insert-char-spec cell readable)
797           (char-db-insert-char-reference cell readable))
798         (setq separator lbs))
799        (t
800         (if separator
801             (insert separator))
802         (insert (prin1-to-string cell))
803         (setq separator " ")))
804       (setq value (cdr value)))
805     (insert ")")
806     (insert line-breaking)))
807
808 (defun insert-char-attributes (char &optional readable attributes column
809                                     for-sub-node)
810   (unless column
811     (setq column (current-column)))
812   (let (name value ; has-long-ccs-name
813         rest
814         radical strokes
815         (line-breaking
816          (concat "\n" (make-string (1+ column) ?\ )))
817         lbs cell separator ret
818         key al cal
819         dest-ccss ; sources required-features
820         ccss)
821     (let (atr-d)
822       (setq attributes
823             (sort (if attributes
824                       (if (consp attributes)
825                           (progn
826                             (dolist (name attributes)
827                               (unless (memq name char-db-ignored-attributes)
828                                 (if (find-charset name)
829                                     (push name ccss))
830                                 (push name atr-d)))
831                             atr-d))
832                     (dolist (name (char-attribute-list))
833                       (unless (memq name char-db-ignored-attributes)
834                         (if (find-charset name)
835                             (push name ccss))
836                         (push name atr-d)))
837                     atr-d)
838                   #'char-attribute-name<)))
839     (insert "(")
840     (when (memq '<-subsumptive attributes)
841       (when (or readable (not for-sub-node))
842         (when (setq value (get-char-attribute char '<-subsumptive))
843           (char-db-insert-relation-feature char '<-subsumptive value
844                                            line-breaking
845                                            ccss readable)))
846       (setq attributes (delq '<-subsumptive attributes)))
847     (when (and (memq '<-denotational attributes)
848                (setq value (get-char-attribute char '<-denotational)))
849       (char-db-insert-relation-feature char '<-denotational value
850                                        line-breaking
851                                        ccss readable)
852       (setq attributes (delq '<-denotational attributes)))
853     (when (and (memq 'name attributes)
854                (setq value (get-char-attribute char 'name)))
855       (insert (format
856                (if (> (+ (current-column) (length value)) 48)
857                    "(name . %S)%s"
858                  "(name               . %S)%s")
859                value line-breaking))
860       (setq attributes (delq 'name attributes))
861       )
862     (when (and (memq 'name* attributes)
863                (setq value (get-char-attribute char 'name*)))
864       (insert (format
865                (if (> (+ (current-column) (length value)) 48)
866                    "(name* . %S)%s"
867                  "(name*              . %S)%s")
868                value line-breaking))
869       (setq attributes (delq 'name* attributes))
870       )
871     (when (and (memq 'script attributes)
872                (setq value (get-char-attribute char 'script)))
873       (insert (format "(script\t\t%s)%s"
874                       (mapconcat (function prin1-to-string)
875                                  value " ")
876                       line-breaking))
877       (setq attributes (delq 'script attributes))
878       )
879     (dolist (name '(=>ucs =>ucs*))
880       (when (and (memq name attributes)
881                  (setq value (get-char-attribute char name)))
882         (insert (format "(%-18s . #x%04X)\t; %c%s"
883                         name value (decode-char '=ucs value)
884                         line-breaking))
885         (setq attributes (delq name attributes))))
886     (dolist (name '(=>ucs@gb =>ucs@big5))
887       (when (and (memq name attributes)
888                  (setq value (get-char-attribute char name)))
889         (insert (format "(%-18s . #x%04X)\t; %c%s"
890                         name value
891                         (decode-char (intern
892                                       (concat "="
893                                               (substring
894                                                (symbol-name name) 2)))
895                                      value)
896                         line-breaking))
897         (setq attributes (delq name attributes))
898         ))
899     ;; (dolist (name '(=>daikanwa))
900     ;;   (when (and (memq name attributes)
901     ;;              (setq value (get-char-attribute char name)))
902     ;;     (insert
903     ;;      (if (integerp value)
904     ;;          (format "(%-18s . %05d)\t; %c%s"
905     ;;                  name value (decode-char '=daikanwa value)
906     ;;                  line-breaking)
907     ;;        (format "(%-18s %s)\t; %c%s"
908     ;;                name
909     ;;                (mapconcat (function prin1-to-string)
910     ;;                           value " ")
911     ;;                (char-representative-of-daikanwa char)
912     ;;                line-breaking)))
913     ;;     (setq attributes (delq name attributes))))
914     (when (and (memq 'general-category attributes)
915                (setq value (get-char-attribute char 'general-category)))
916       (insert (format
917                "(general-category\t%s) ; %s%s"
918                (mapconcat (lambda (cell)
919                             (format "%S" cell))
920                           value " ")
921                (cond ((rassoc value unidata-normative-category-alist)
922                       "Normative Category")
923                      ((rassoc value unidata-informative-category-alist)
924                       "Informative Category")
925                      (t
926                       "Unknown Category"))
927                line-breaking))
928       (setq attributes (delq 'general-category attributes))
929       )
930     (when (and (memq 'bidi-category attributes)
931                (setq value (get-char-attribute char 'bidi-category)))
932       (insert (format "(bidi-category\t. %S)%s"
933                       value
934                       line-breaking))
935       (setq attributes (delq 'bidi-category attributes))
936       )
937     (unless (or (not (memq 'mirrored attributes))
938                 (eq (setq value (get-char-attribute char 'mirrored 'empty))
939                     'empty))
940       (insert (format "(mirrored\t\t. %S)%s"
941                       value
942                       line-breaking))
943       (setq attributes (delq 'mirrored attributes))
944       )
945     (cond
946      ((and (memq 'decimal-digit-value attributes)
947            (setq value (get-char-attribute char 'decimal-digit-value)))
948       (insert (format "(decimal-digit-value . %S)%s"
949                       value
950                       line-breaking))
951       (setq attributes (delq 'decimal-digit-value attributes))
952       (when (and (memq 'digit-value attributes)
953                  (setq value (get-char-attribute char 'digit-value)))
954         (insert (format "(digit-value\t . %S)%s"
955                         value
956                         line-breaking))
957         (setq attributes (delq 'digit-value attributes))
958         )
959       (when (and (memq 'numeric-value attributes)
960                  (setq value (get-char-attribute char 'numeric-value)))
961         (insert (format "(numeric-value\t . %S)%s"
962                         value
963                         line-breaking))
964         (setq attributes (delq 'numeric-value attributes))
965         )
966       )
967      (t
968       (when (and (memq 'digit-value attributes)
969                  (setq value (get-char-attribute char 'digit-value)))
970         (insert (format "(digit-value\t. %S)%s"
971                         value
972                         line-breaking))
973         (setq attributes (delq 'digit-value attributes))
974         )
975       (when (and (memq 'numeric-value attributes)
976                  (setq value (get-char-attribute char 'numeric-value)))
977         (insert (format "(numeric-value\t. %S)%s"
978                         value
979                         line-breaking))
980         (setq attributes (delq 'numeric-value attributes))
981         )))
982     (when (and (memq 'iso-10646-comment attributes)
983                (setq value (get-char-attribute char 'iso-10646-comment)))
984       (insert (format "(iso-10646-comment\t. %S)%s"
985                       value
986                       line-breaking))
987       (setq attributes (delq 'iso-10646-comment attributes))
988       )
989     (when (and (memq 'morohashi-daikanwa attributes)
990                (setq value (get-char-attribute char 'morohashi-daikanwa)))
991       (insert (format "(morohashi-daikanwa\t%s)%s"
992                       (mapconcat (function prin1-to-string) value " ")
993                       line-breaking))
994       (setq attributes (delq 'morohashi-daikanwa attributes))
995       )
996     (setq radical nil
997           strokes nil)
998     (when (and (memq 'ideographic-radical attributes)
999                (setq value (get-char-attribute char 'ideographic-radical)))
1000       (setq radical value)
1001       (insert (format "(ideographic-radical . %S)\t; %c%s"
1002                       radical
1003                       (ideographic-radical radical)
1004                       line-breaking))
1005       (setq attributes (delq 'ideographic-radical attributes))
1006       )
1007     (when (and (memq 'shuowen-radical attributes)
1008                (setq value (get-char-attribute char 'shuowen-radical)))
1009       (insert (format "(shuowen-radical\t. %S)\t; %c%s"
1010                       value
1011                       (shuowen-radical value)
1012                       line-breaking))
1013       (setq attributes (delq 'shuowen-radical attributes))
1014       )
1015     (let (key)
1016       (dolist (domain
1017                (append
1018                 char-db-feature-domains
1019                 (let (dest domain)
1020                   (dolist (feature (char-attribute-list))
1021                     (setq feature (symbol-name feature))
1022                     (when (string-match
1023                            "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
1024                            feature)
1025                       (setq domain (intern (match-string 2 feature)))
1026                      (unless (memq domain dest)
1027                        (setq dest (cons domain dest)))))
1028                   (sort dest #'string<))))
1029         (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
1030         (when (and (memq key attributes)
1031                    (setq value (get-char-attribute char key)))
1032           (setq radical value)
1033           (insert (format "(%s . %S)\t; %c%s"
1034                           key
1035                           radical
1036                           (ideographic-radical radical)
1037                           line-breaking))
1038           (setq attributes (delq key attributes))
1039           )
1040         (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
1041         (when (and (memq key attributes)
1042                    (setq value (get-char-attribute char key)))
1043           (setq strokes value)
1044           (insert (format "(%s . %S)%s"
1045                           key
1046                           strokes
1047                           line-breaking))
1048           (setq attributes (delq key attributes))
1049           )
1050         (setq key (intern (format "%s@%s" 'total-strokes domain)))
1051         (when (and (memq key attributes)
1052                    (setq value (get-char-attribute char key)))
1053           (insert (format "(%s       . %S)%s"
1054                           key
1055                           value
1056                           line-breaking))
1057           (setq attributes (delq key attributes))
1058           )
1059         (dolist (feature '(ideographic-radical
1060                            ideographic-strokes
1061                            total-strokes))
1062           (setq key (intern (format "%s@%s*sources" feature domain)))
1063           (when (and (memq key attributes)
1064                      (setq value (get-char-attribute char key)))
1065             (insert (format "(%s%s" key line-breaking))
1066             (dolist (cell value)
1067               (insert (format " %s" cell)))
1068             (insert ")")
1069             (insert line-breaking)
1070             (setq attributes (delq key attributes))
1071             ))
1072         ))
1073     (when (and (memq 'ideographic-strokes attributes)
1074                (setq value (get-char-attribute char 'ideographic-strokes)))
1075       (setq strokes value)
1076       (insert (format "(ideographic-strokes . %S)%s"
1077                       strokes
1078                       line-breaking))
1079       (setq attributes (delq 'ideographic-strokes attributes))
1080       )
1081     (when (and (memq 'kangxi-radical attributes)
1082                (setq value (get-char-attribute char 'kangxi-radical)))
1083       (unless (eq value radical)
1084         (insert (format "(kangxi-radical\t . %S)\t; %c%s"
1085                         value
1086                         (ideographic-radical value)
1087                         line-breaking))
1088         (or radical
1089             (setq radical value)))
1090       (setq attributes (delq 'kangxi-radical attributes))
1091       )
1092     (when (and (memq 'kangxi-strokes attributes)
1093                (setq value (get-char-attribute char 'kangxi-strokes)))
1094       (unless (eq value strokes)
1095         (insert (format "(kangxi-strokes\t . %S)%s"
1096                         value
1097                         line-breaking))
1098         (or strokes
1099             (setq strokes value)))
1100       (setq attributes (delq 'kangxi-strokes attributes))
1101       )
1102     (when (and (memq 'japanese-radical attributes)
1103                (setq value (get-char-attribute char 'japanese-radical)))
1104       (unless (eq value radical)
1105         (insert (format "(japanese-radical\t . %S)\t; %c%s"
1106                         value
1107                         (ideographic-radical value)
1108                         line-breaking))
1109         (or radical
1110             (setq radical value)))
1111       (setq attributes (delq 'japanese-radical attributes))
1112       )
1113     (when (and (memq 'japanese-strokes attributes)
1114                (setq value (get-char-attribute char 'japanese-strokes)))
1115       (unless (eq value strokes)
1116         (insert (format "(japanese-strokes\t . %S)%s"
1117                         value
1118                         line-breaking))
1119         (or strokes
1120             (setq strokes value)))
1121       (setq attributes (delq 'japanese-strokes attributes))
1122       )
1123     (when (and (memq 'cns-radical attributes)
1124                (setq value (get-char-attribute char 'cns-radical)))
1125       (insert (format "(cns-radical\t . %S)\t; %c%s"
1126                       value
1127                       (ideographic-radical value)
1128                       line-breaking))
1129       (setq attributes (delq 'cns-radical attributes))
1130       )
1131     (when (and (memq 'cns-strokes attributes)
1132                (setq value (get-char-attribute char 'cns-strokes)))
1133       (unless (eq value strokes)
1134         (insert (format "(cns-strokes\t . %S)%s"
1135                         value
1136                         line-breaking))
1137         (or strokes
1138             (setq strokes value)))
1139       (setq attributes (delq 'cns-strokes attributes))
1140       )
1141     ;; (when (and (memq 'shinjigen-1-radical attributes)
1142     ;;            (setq value (get-char-attribute char 'shinjigen-1-radical)))
1143     ;;   (unless (eq value radical)
1144     ;;     (insert (format "(shinjigen-1-radical . %S)\t; %c%s"
1145     ;;                     value
1146     ;;                     (ideographic-radical value)
1147     ;;                     line-breaking))
1148     ;;     (or radical
1149     ;;         (setq radical value)))
1150     ;;   (setq attributes (delq 'shinjigen-1-radical attributes))
1151     ;;   )
1152     (when (and (memq 'ideographic- attributes)
1153                (setq value (get-char-attribute char 'ideographic-)))
1154       (insert "(ideographic-       ")
1155       (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1156             separator nil)
1157       (while (consp value)
1158         (setq cell (car value))
1159         (if (integerp cell)
1160             (setq cell (decode-char '=ucs cell)))
1161         (cond ((characterp cell)
1162                (if separator
1163                    (insert lbs))
1164                (if readable
1165                    (insert (format "%S" cell))
1166                  (char-db-insert-char-spec cell readable))
1167                (setq separator lbs))
1168               ((consp cell)
1169                (if separator
1170                    (insert lbs))
1171                (if (consp (car cell))
1172                    (char-db-insert-char-spec cell readable)
1173                  (char-db-insert-char-reference cell readable))
1174                (setq separator lbs))
1175               (t
1176                (if separator
1177                    (insert separator))
1178                (insert (prin1-to-string cell))
1179                (setq separator " ")))
1180         (setq value (cdr value)))
1181       (insert ")")
1182       (insert line-breaking)
1183       (setq attributes (delq 'ideographic- attributes)))
1184     (when (and (memq 'total-strokes attributes)
1185                (setq value (get-char-attribute char 'total-strokes)))
1186       (insert (format "(total-strokes       . %S)%s"
1187                       value
1188                       line-breaking))
1189       (setq attributes (delq 'total-strokes attributes))
1190       )
1191     (when (and (memq '->ideograph attributes)
1192                (setq value (get-char-attribute char '->ideograph)))
1193       (insert (format "(->ideograph\t%s)%s"
1194                       (mapconcat (lambda (code)
1195                                    (cond ((symbolp code)
1196                                           (symbol-name code))
1197                                          ((integerp code)
1198                                           (format "#x%04X" code))
1199                                          (t
1200                                           (format "%s %S"
1201                                                   line-breaking code))))
1202                                  value " ")
1203                       line-breaking))
1204       (setq attributes (delq '->ideograph attributes))
1205       )
1206     ;; (when (and (memq '->decomposition attributes)
1207     ;;            (setq value (get-char-attribute char '->decomposition)))
1208     ;;   (insert (format "(->decomposition\t%s)%s"
1209     ;;                   (mapconcat (lambda (code)
1210     ;;                                (cond ((symbolp code)
1211     ;;                                       (symbol-name code))
1212     ;;                                      ((characterp code)
1213     ;;                                       (if readable
1214     ;;                                           (format "%S" code)
1215     ;;                                         (format "#x%04X"
1216     ;;                                                 (char-int code))
1217     ;;                                         ))
1218     ;;                                      ((integerp code)
1219     ;;                                       (format "#x%04X" code))
1220     ;;                                      (t
1221     ;;                                       (format "%s%S" line-breaking code))))
1222     ;;                              value " ")
1223     ;;                   line-breaking))
1224     ;;   (setq attributes (delq '->decomposition attributes))
1225     ;;   )
1226     (if (equal (get-char-attribute char '->titlecase)
1227                (get-char-attribute char '->uppercase))
1228         (setq attributes (delq '->titlecase attributes)))
1229     (when (and (memq '->mojikyo attributes)
1230                (setq value (get-char-attribute char '->mojikyo)))
1231       (insert (format "(->mojikyo\t\t. %06d)\t; %c%s"
1232                       value (decode-char 'mojikyo value)
1233                       line-breaking))
1234       (setq attributes (delq '->mojikyo attributes))
1235       )
1236     (when (and (memq 'hanyu-dazidian-vol attributes)
1237                (setq value (get-char-attribute char 'hanyu-dazidian-vol)))
1238       (insert (format "(hanyu-dazidian-vol  . %d)%s"
1239                       value line-breaking))
1240       (setq attributes (delq 'hanyu-dazidian-vol attributes))
1241       )
1242     (when (and (memq 'hanyu-dazidian-page attributes)
1243                (setq value (get-char-attribute char 'hanyu-dazidian-page)))
1244       (insert (format "(hanyu-dazidian-page . %d)%s"
1245                       value line-breaking))
1246       (setq attributes (delq 'hanyu-dazidian-page attributes))
1247       )
1248     (when (and (memq 'hanyu-dazidian-char attributes)
1249                (setq value (get-char-attribute char 'hanyu-dazidian-char)))
1250       (insert (format "(hanyu-dazidian-char . %d)%s"
1251                       value line-breaking))
1252       (setq attributes (delq 'hanyu-dazidian-char attributes))
1253       )
1254     (unless readable
1255       (dolist (ignored '(composition
1256                          ->denotational <-subsumptive ->ucs-unified
1257                          ->ideographic-component-forms))
1258         (setq attributes (delq ignored attributes))))
1259     (while attributes
1260       (setq name (car attributes))
1261       (unless (eq (setq value (get-char-attribute char name 'value-is-empty))
1262                   'value-is-empty)
1263         (cond ((setq ret (find-charset name))
1264                (setq name (charset-name ret))
1265                (when (not (memq name dest-ccss))
1266                  (setq dest-ccss (cons name dest-ccss))
1267                  (char-db-insert-ccs-feature name value line-breaking))
1268                )
1269               ((string-match "^=>ucs@" (symbol-name name))
1270                (insert (format "(%-18s . #x%04X)\t; %c%s"
1271                                name value (decode-char '=ucs value)
1272                                line-breaking))
1273                )
1274               ((eq name 'jisx0208-1978/4X)
1275                (insert (format "(%-18s . #x%04X)%s"
1276                                name value
1277                                line-breaking))
1278                )
1279               ((and
1280                 (not readable)
1281                 (not (eq name '->subsumptive))
1282                 (not (eq name '->uppercase))
1283                 (not (eq name '->lowercase))
1284                 (not (eq name '->titlecase))
1285                 (not (eq name '->canonical))
1286                 (not (eq name '->Bopomofo))
1287                 (not (eq name '->mistakable))
1288                 (not (eq name '->ideographic-variants))
1289                 (null (get-char-attribute
1290                        char (intern (format "%s*sources" name))))
1291                 (not (string-match "\\*sources$" (symbol-name name)))
1292                 (null (get-char-attribute
1293                        char (intern (format "%s*note" name))))
1294                 (not (string-match "\\*note$" (symbol-name name)))
1295                 (or (eq name '<-identical)
1296                     (eq name '<-uppercase)
1297                     (eq name '<-lowercase)
1298                     (eq name '<-titlecase)
1299                     (eq name '<-canonical)
1300                     (eq name '<-ideographic-variants)
1301                     ;; (eq name '<-synonyms)
1302                     (string-match "^<-synonyms" (symbol-name name))
1303                     (eq name '<-mistakable)
1304                     (when (string-match "^->" (symbol-name name))
1305                       (cond
1306                        ((string-match "^->fullwidth" (symbol-name name))
1307                         (not (and (consp value)
1308                                   (characterp (car value))
1309                                   (encode-char
1310                                    (car value) '=ucs 'defined-only)))
1311                         )
1312                        (t)))
1313                     ))
1314                )
1315               ((or (eq name 'ideographic-structure)
1316                    (eq name 'ideographic-combination)
1317                    (eq name 'ideographic-)
1318                    (eq name '=decomposition)
1319                    (char-feature-base-name= '=decomposition name)
1320                    (char-feature-base-name= '=>decomposition name)
1321                    ;; (string-match "^=>*decomposition\\(@[^*]+\\)?$"
1322                    ;;               (symbol-name name))
1323                    (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name name))
1324                    (string-match "^\\(->\\|<-\\)[^*]*\\*sources$"
1325                                  (symbol-name name))
1326                    )
1327                (char-db-insert-relation-feature char name value
1328                                                 line-breaking
1329                                                 ccss readable))
1330               ((memq name '(ideograph=
1331                             original-ideograph-of
1332                             ancient-ideograph-of
1333                             vulgar-ideograph-of
1334                             wrong-ideograph-of
1335                             ;; simplified-ideograph-of
1336                             ideographic-variants
1337                             ;; ideographic-different-form-of
1338                             ))
1339                (insert (format "(%-18s%s " name line-breaking))
1340                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1341                      separator nil)
1342                (while (consp value)
1343                  (setq cell (car value))
1344                  (if (and (consp cell)
1345                           (consp (car cell)))
1346                      (progn
1347                        (if separator
1348                            (insert lbs))
1349                        (char-db-insert-alist cell readable)
1350                        (setq separator lbs))
1351                    (if separator
1352                        (insert separator))
1353                    (insert (prin1-to-string cell))
1354                    (setq separator " "))
1355                  (setq value (cdr value)))
1356                (insert ")")
1357                (insert line-breaking))
1358               ((consp value)
1359                (insert (format "(%-18s " name))
1360                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1361                      separator nil)
1362                (while (consp value)
1363                  (setq cell (car value))
1364                  (if (and (consp cell)
1365                           (consp (car cell))
1366                           (setq ret (condition-case nil
1367                                         (find-char cell)
1368                                       (error nil))))
1369                      (progn
1370                        (setq rest cell
1371                              al nil
1372                              cal nil)
1373                        (while rest
1374                          (setq key (car (car rest)))
1375                          (if (find-charset key)
1376                              (setq cal (cons key cal))
1377                            (setq al (cons key al)))
1378                          (setq rest (cdr rest)))
1379                        (if separator
1380                            (insert lbs))
1381                        (insert-char-attributes ret
1382                                                readable
1383                                                al ; cal
1384                                                nil 'for-sub-node)
1385                        (setq separator lbs))
1386                    (setq ret (prin1-to-string cell))
1387                    (if separator
1388                        (if (< (+ (current-column)
1389                                  (length ret)
1390                                  (length separator))
1391                               76)
1392                            (insert separator)
1393                          (insert lbs)))
1394                    (insert ret)
1395                    (setq separator " "))
1396                  (setq value (cdr value)))
1397                (insert ")")
1398                (insert line-breaking))
1399               (t
1400                (insert (format "(%-18s" name))
1401                (setq ret (prin1-to-string value))
1402                (unless (< (+ (current-column)
1403                              (length ret)
1404                              3)
1405                           76)
1406                  (insert line-breaking))
1407                (insert " . " ret ")" line-breaking)
1408                ;; (insert (format "(%-18s . %S)%s"
1409                ;;                 name value
1410                ;;                 line-breaking))
1411                )
1412               ))
1413       (setq attributes (cdr attributes)))
1414     (insert ")")))
1415
1416 (defun insert-char-data (char &optional readable
1417                               attributes)
1418   (save-restriction
1419     (narrow-to-region (point)(point))
1420     (insert "(define-char
1421   '")
1422     (insert-char-attributes char readable attributes)
1423     (insert ")\n")
1424     (goto-char (point-min))
1425     (while (re-search-forward "[ \t]+$" nil t)
1426       (replace-match ""))
1427     ;; from tabify.
1428     (goto-char (point-min))
1429     (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
1430       (let ((column (current-column))
1431             (indent-tabs-mode t))
1432         (delete-region (match-beginning 0) (point))
1433         (indent-to column)))
1434     (goto-char (point-max))
1435     ;; (tabify (point-min)(point-max))
1436     ))
1437
1438 (defun insert-char-data-with-variant (char &optional printable
1439                                            no-ucs-unified
1440                                            script excluded-script)
1441   (insert-char-data char printable)
1442   (let ((variants (char-variants char))
1443         rest
1444         variant vs ret)
1445     (setq variants (sort variants #'<))
1446     (setq rest variants)
1447     (setq variants (cons char variants))
1448     (while rest
1449       (setq variant (car rest))
1450       (unless (get-char-attribute variant '<-subsumptive)
1451         (if (and (or (null script)
1452                      (null (setq vs (get-char-attribute variant 'script)))
1453                      (memq script vs))
1454                  (or (null excluded-script)
1455                      (null (setq vs (get-char-attribute variant 'script)))
1456                      (not (memq excluded-script vs))))
1457             (unless (and no-ucs-unified (get-char-attribute variant '=ucs))
1458               (insert-char-data variant printable)
1459               (if (setq ret (char-variants variant))
1460                   (while ret
1461                     (or (memq (car ret) variants)
1462                         ;; (get-char-attribute (car ret) '<-subsumptive)
1463                         (setq rest (nconc rest (list (car ret)))))
1464                     (setq ret (cdr ret)))))))
1465       (setq rest (cdr rest)))))
1466
1467 (defun insert-char-range-data (min max &optional script excluded-script)
1468   (let ((code min)
1469         char)
1470     (while (<= code max)
1471       (setq char (decode-char '=ucs code))
1472       (if (encode-char char '=ucs 'defined-only)
1473           (insert-char-data-with-variant char nil 'no-ucs-unified
1474                                          script excluded-script))
1475       (setq code (1+ code)))))
1476
1477 (defun write-char-range-data-to-file (min max file
1478                                           &optional script excluded-script)
1479   (let ((coding-system-for-write char-db-file-coding-system))
1480     (with-temp-buffer
1481       (insert (format ";; -*- coding: %s -*-\n"
1482                       char-db-file-coding-system))
1483       (insert-char-range-data min max script excluded-script)
1484       (write-region (point-min)(point-max) file))))
1485
1486 (defvar what-character-original-window-configuration)
1487
1488 ;;;###autoload
1489 (defun what-char-definition (char)
1490   (interactive (list (char-after)))
1491   (let ((buf (get-buffer-create "*Character Description*"))
1492         (the-buf (current-buffer))
1493         (win-conf (current-window-configuration)))
1494     (pop-to-buffer buf)
1495     (make-local-variable 'what-character-original-window-configuration)
1496     (setq what-character-original-window-configuration win-conf)
1497     (setq buffer-read-only nil)
1498     (erase-buffer)
1499     (condition-case err
1500         (progn
1501           (insert-char-data-with-variant char 'printable)
1502           (unless (char-attribute-alist char)
1503             (insert (format ";; = %c\n"
1504                             (let* ((rest (split-char char))
1505                                    (ccs (pop rest))
1506                                    (code (pop rest)))
1507                               (while rest
1508                                 (setq code (logior (lsh code 8)
1509                                                    (pop rest))))
1510                               (decode-char ccs code)))))
1511           ;; (char-db-update-comment)
1512           (set-buffer-modified-p nil)
1513           (view-mode the-buf (lambda (buf)
1514                                (set-window-configuration
1515                                 what-character-original-window-configuration)
1516                                ))
1517           (goto-char (point-min)))
1518       (error (progn
1519                (set-window-configuration
1520                 what-character-original-window-configuration)
1521                (signal (car err) (cdr err)))))))
1522
1523
1524 ;;; @ end
1525 ;;;
1526
1527 (provide 'char-db-util)
1528
1529 ;;; char-db-util.el ends here