(U-0002CF95): Add ideographic-structure.
[chise/xemacs-chise.git] / lisp / utf-2000 / char-db-util.el
1 ;;; char-db-util.el --- Character Database utility -*- coding: utf-8-er; -*-
2
3 ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
4 ;;   2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019,
5 ;;   2020, 2021, 2022, 2023 MORIOKA Tomohiko.
6
7 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
8 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
9
10 ;; This file is part of XEmacs CHISE.
11
12 ;; XEmacs CHISE is free software; you can redistribute it and/or
13 ;; modify it under the terms of the GNU General Public License as
14 ;; published by the Free Software Foundation; either version 2, or (at
15 ;; your option) any later version.
16
17 ;; XEmacs CHISE is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 ;; General Public License for more details.
21
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
24 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
25 ;; Boston, MA 02111-1307, USA.
26
27 ;;; Code:
28
29 (require 'chise-subr)
30 (require 'ideograph-subr)
31
32 (defconst unidata-normative-category-alist
33   '(("Lu" letter        uppercase)
34     ("Ll" letter        lowercase)
35     ("Lt" letter        titlecase)
36     ("Mn" mark          non-spacing)
37     ("Mc" mark          spacing-combining)
38     ("Me" mark          enclosing)
39     ("Nd" number        decimal-digit)
40     ("Nl" number        letter)
41     ("No" number        other)
42     ("Zs" separator     space)
43     ("Zl" separator     line)
44     ("Zp" separator     paragraph)
45     ("Cc" other         control)
46     ("Cf" other         format)
47     ("Cs" other         surrogate)
48     ("Co" other         private-use)
49     ("Cn" other         not-assigned)))
50
51 (defconst unidata-informative-category-alist
52   '(("Lm" letter        modifier)
53     ("Lo" letter        other)
54     ("Pc" punctuation   connector)
55     ("Pd" punctuation   dash)
56     ("Ps" punctuation   open)
57     ("Pe" punctuation   close)
58     ("Pi" punctuation   initial-quote)
59     ("Pf" punctuation   final-quote)
60     ("Po" punctuation   other)
61     ("Sm" symbol        math)
62     ("Sc" symbol        currency)
63     ("Sk" symbol        modifier)
64     ("So" symbol        other)
65     ))
66
67 (defconst shuowen-radicals
68   [?一 ?上 ?示 ?三 ?王 ?玉 ?玨 ?气 ?士 ?丨 ; 010
69    ?屮 ?艸 ?蓐 ?茻 ?小 ?八 ?釆 ?半 ?牛 ?犛 ; 020
70    ?告 ?口 ?凵 ?吅 ?哭 ?走 ?止 ?癶 ?步 ?此 ; 030
71    ?正 ?是 ?辵 ?彳 ?廴 ?㢟 ?行 ?齒 ?牙 ?足 ; 040
72    ?疋 ?品 ?龠 ?冊 ?㗊 ?舌 ?干 ?𧮫 ?只 ?㕯 ; 050
73    ?句 ?丩 ?古 ?十 ?卅 ?言 ?誩 ?音 ?䇂 ?丵 ; 060
74    ?菐 ?𠬞 ?𠬜 ?共 ?異 ?舁 ?𦥑 ?䢅 ?爨 ?革 ; 070
75    ?鬲 ?䰜 ?爪 ?𠃨 ?鬥 ?又 ?𠂇 ?史 ?支 ?𦘒 ; 080
76    ?聿 ?畫 ?隶 ?臤 ?臣 ?殳 ?殺 ?𠘧 ?寸 ?皮 ; 090
77    ?㼱 ?攴 ?敎 ?卜 ?用 ?爻 ?㸚 ?𥄎 ?目 ?䀠 ; 100
78    ?眉 ?盾 ?自 ?白 ?鼻 ?皕 ?習 ?羽 ?隹 ?奞 ; 110
79    ?雈 ?𦫳 ?𥄕 ?羊 ?羴 ?瞿 ?雔 ?雥 ?鳥 ?烏 ; 120
80    ?𠦒 ?冓 ?幺 ?𢆶 ?叀 ?玄 ?予 ?放 ?𠬪 ?𣦼 ; 130
81    ?歺 ?死 ?冎 ?骨 ?肉 ?筋 ?刀 ?刃 ?㓞 ?丯 ; 140
82    ?耒 ?角 ?竹 ?箕 ?丌 ?左 ?工 ?㠭 ?巫 ?甘 ; 150
83    ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?旨 ?喜 ?壴 ; 160
84 ;  ?旨 ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?喜 ?壴 ; 160
85    ?鼓 ?豈 ?豆 ?豊 ?豐 ?䖒 ?虍 ?虎 ?虤 ?皿 ; 170
86    ?𠙴 ?去 ?血 ?丶 ?丹 ?青 ?井 ?皀 ?鬯 ?食 ; 180
87    ?亼 ?會 ?倉 ?入 ?缶 ?矢 ?高 ?冂 ?𩫏 ?京 ; 190
88    ?亯 ?𣆪 ?畗 ?㐭 ?嗇 ?來 ?麥 ?夊 ?舛 ?䑞 ; 200
89    ?韋 ?弟 ?夂 ?久 ?桀 ?木 ?東 ?林 ?才 ?叒 ; 210
90    ?之 ?帀 ?出 ?𣎵 ?生 ?乇 ?𠂹 ?𠌶 ?華 ?𥝌 ; 220
91    ?稽 ?巢 ?桼 ?束 ?㯻 ?囗 ?員 ?貝 ?邑 ?𨛜 ; 230
92    ?日 ?旦 ?倝 ?㫃 ?冥 ?晶 ?月 ?有 ?明 ?囧 ; 240
93    ?夕 ?多 ?毌 ?𢎘 ?𣐺 ?卣 ?齊 ?朿 ?片 ?鼎 ; 250
94    ?克 ?彔 ?禾 ?秝 ?黍 ?香 ?米 ?毇 ?臼 ?凶 ; 260
95    ?𣎳 ?𣏟 ?麻 ?尗 ?耑 ?韭 ?瓜 ?瓠 ?宀 ?宮 ; 270
96    ?呂 ?穴 ?㝱 ?疒 ?冖 ?𠔼 ?冃 ?㒳 ?网 ?襾 ; 280
97    ?巾 ?巿 ?帛 ?白 ?㡀 ?黹 ?人 ?𠤎 ?匕 ?从 ; 290
98    ?比 ?北 ?丘 ?㐺 ?𡈼 ?重 ?臥 ?身 ?㐆 ?衣 ; 300
99    ?裘 ?老 ?毛 ?毳 ?尸 ?尺 ?尾 ?履 ?舟 ?方 ; 310
100    ?儿 ?兄 ?兂 ?皃 ?𠑹 ?先 ?秃 ?見 ?覞 ?欠 ; 320
101    ?㱃 ?㳄 ?旡 ?頁 ?𦣻 ?面 ?丏 ?首 ?𥄉 ?須 ; 330
102    ?彡 ?彣 ?文 ?髟 ?后 ?司 ?卮 ?卩 ?印 ?色 ; 340
103    ?𠨍 ?辟 ?勹 ?包 ?茍 ?鬼 ?甶 ?厶 ?嵬 ?山 ; 350
104    ?屾 ?屵 ?广 ?厂 ?丸 ?危 ?石 ?長 ?勿 ?冄 ; 360
105    ?而 ?豕 ?㣇 ?彑 ?豚 ?豸 ?𤉡 ?易 ?象 ?馬 ; 370
106    ?𢊁 ?鹿 ?麤 ?㲋 ?兔 ?萈 ?犬 ?㹜 ?鼠 ?能 ; 380
107    ?熊 ?火 ?炎 ?黑 ?囪 ?焱 ?炙 ?赤 ?大 ?亦 ; 390
108    ?夨 ?夭 ?交 ?尣 ?壺 ?壹 ?㚔 ?奢 ?亢 ?夲 ; 400
109    ?夰 ?亣 ?夫 ?立 ?竝 ?囟 ?思 ?心 ?惢 ?水 ; 410
110    ?沝 ?瀕 ?𡿨 ?巜 ?川 ?泉 ?灥 ?永 ?𠂢 ?谷 ; 420
111    ?仌 ?雨 ?雲 ?魚 ?𩺰 ?燕 ?龍 ?飛 ?非 ?卂 ; 430
112    ?𠃉 ?不 ?至 ?西 ?鹵 ?鹽 ?戶 ?門 ?耳 ?𦣝 ; 440
113    ?手 ?𠦬 ?女 ?毋 ?民 ?丿 ?𠂆 ?乁 ?氏 ?氐 ; 450
114    ?戈 ?戉 ?我 ?亅 ?琴 ?𠃊 ?亡 ?匸 ?匚 ?曲 ; 460
115    ?甾 ?瓦 ?弓 ?弜 ?弦 ?系 ?糸 ?素 ?絲 ?率 ; 470
116    ?虫 ?䖵 ?蟲 ?風 ?它 ?龜 ?黽 ?卵 ?二 ?土 ; 480
117    ?垚 ?堇 ?里 ?田 ?畕 ?黃 ?男 ?力 ?劦 ?金 ; 490
118    ?幵 ?勺 ?几 ?且 ?斤 ?斗 ?矛 ?車 ?𠂤 ?𨸏 ; 500
119    ?𨺅 ?厽 ?四 ?宁 ?叕 ?亞 ?五 ?六 ?七 ?九 ; 510
120    ?禸 ?嘼 ?甲 ?乙 ?丙 ?丁 ?戊 ?己 ?巴 ?庚 ; 520
121    ?辛 ?辡 ?壬 ?癸 ?子 ?了 ?孨 ?𠫓 ?丑 ?寅 ; 530
122    ?卯 ?辰 ?巳 ?午 ?未 ?申 ?酉 ?酋 ?戌 ?亥 ; 540
123    ])
124
125 (defun shuowen-radical (number)
126   (aref shuowen-radicals (1- number)))
127
128 (defvar char-db-file-coding-system 'utf-8-mcs-er)
129
130 (defvar char-db-ignored-attributes '(ideographic-products))
131
132 (defvar char-db-coded-charset-priority-list
133   '(ascii
134     control-1
135     latin-iso8859-1
136     latin-iso8859-2
137     latin-iso8859-3
138     latin-iso8859-4
139     latin-iso8859-9
140     latin-jisx0201
141     cyrillic-iso8859-5
142     greek-iso8859-7
143     thai-tis620
144     ;; =mj
145     ;; =adobe-japan1-0
146     ;; =adobe-japan1-1
147     ;; =adobe-japan1-2
148     ;; =adobe-japan1-3
149     ;; =adobe-japan1-4
150     ;; =adobe-japan1-5
151     ;; =adobe-japan1-6
152     =jis-x0208
153     =jis-x0208@1978
154     =jis-x0208@1983
155     japanese-jisx0212
156     chinese-gb2312
157     =jis-x0208@1990
158     chinese-cns11643-1
159     chinese-cns11643-2
160     chinese-cns11643-3
161     chinese-cns11643-4
162     chinese-cns11643-5
163     chinese-cns11643-6
164     chinese-cns11643-7
165     =jis-x0213-1
166     =jis-x0213-1@2000
167     =jis-x0213-1@2004
168     =jis-x0213-2
169     korean-ksc5601
170     chinese-isoir165
171     katakana-jisx0201
172     hebrew-iso8859-8
173     chinese-gb12345
174     latin-viscii
175     ethiopic-ucs
176     =big5-cdp
177     =hanyo-denshi/ja
178     =hanyo-denshi/jb
179     =hanyo-denshi/jc
180     =hanyo-denshi/jd
181     =hanyo-denshi/ft
182     =hanyo-denshi/ia
183     =hanyo-denshi/ib
184     =hanyo-denshi/hg
185     =hanyo-denshi/ip
186     =hanyo-denshi/jt
187     =hanyo-denshi/ks
188     =hanyo-denshi/tk
189     ;; ==mj
190     ;; ==adobe-japan1-0
191     ;; ==adobe-japan1-1
192     ;; ==adobe-japan1-2
193     ;; ==adobe-japan1-3
194     ;; ==adobe-japan1-4
195     ;; ==adobe-japan1-5
196     ;; ==adobe-japan1-6
197     ==jis-x0208
198     ==jis-x0213-1
199     ==jis-x0213-2
200     ==jis-x0212
201     ==hanyo-denshi/ja
202     ==hanyo-denshi/jb
203     ==hanyo-denshi/jc
204     ==hanyo-denshi/ft
205     ==hanyo-denshi/ia
206     ==hanyo-denshi/ib
207     ==hanyo-denshi/hg
208     ==hanyo-denshi/jt
209     ==hanyo-denshi/ks
210     ==hanyo-denshi/tk
211     =gt
212     =gt-k
213     =daikanwa
214     =daijiten
215     =daikanwa@rev2
216     =daikanwa@rev1
217     =daikanwa/+p
218     =daikanwa/ho
219     ==daikanwa
220     ==daijiten
221     =cbeta
222     ideograph-hanziku-1
223     ideograph-hanziku-2
224     ideograph-hanziku-3
225     ideograph-hanziku-4
226     ideograph-hanziku-5
227     ideograph-hanziku-6
228     ideograph-hanziku-7
229     ideograph-hanziku-8
230     ideograph-hanziku-9
231     ideograph-hanziku-10
232     ideograph-hanziku-11
233     ideograph-hanziku-12
234     ;; =>>>adobe-japan1-0
235     ;; =>>>adobe-japan1-1
236     ;; =>>>adobe-japan1-2
237     ;; =>>>adobe-japan1-3
238     ;; =>>>adobe-japan1-4
239     ;; =>>>adobe-japan1-5
240     ;; =>>>adobe-japan1-6
241     ;; =>>>jis-x0208
242     ;; =>>>jis-x0213-1
243     ;; =>>>jis-x0213-2
244     ;; =>>>hanyo-denshi/ja
245     ;; =>>>hanyo-denshi/jb
246     ;; =>>>hanyo-denshi/jc
247     ;; =>>>hanyo-denshi/ft
248     ;; =>>>hanyo-denshi/ib
249     ;; =>>>hanyo-denshi/hg
250     ;; =>>>hanyo-denshi/jt
251     ;; =>>>hanyo-denshi/ks
252     ;; =>>>gt
253     =>>mj
254     =>>adobe-japan1-0
255     =>>adobe-japan1-1
256     =>>adobe-japan1-2
257     =>>adobe-japan1-3
258     =>>adobe-japan1-4
259     =>>adobe-japan1-5
260     =>>adobe-japan1-6
261     =>>jis-x0208
262     =>>jis-x0213-1
263     =>>jis-x0213-1@2000
264     =>>jis-x0213-1@2004
265     =>>jis-x0213-2
266     =>>jis-x0208@1978
267     =>>hanyo-denshi/ft
268     =>>hanyo-denshi/jt
269     =>>hanyo-denshi/ks
270     =>>gt
271     =>>daikanwa
272     =>>cbeta
273     =+>jis-x0208
274     =+>jis-x0213-1
275     =+>jis-x0213-2
276     =+>adobe-japan1-0
277     =+>adobe-japan1-1
278     =+>adobe-japan1-2
279     =+>adobe-japan1-3
280     =+>adobe-japan1-4
281     =+>adobe-japan1-5
282     =+>adobe-japan1-6
283     =+>jis-x0208@1978
284     =>jis-x0208
285     =>jis-x0208@1997
286     =>jis-x0213-1
287     =>jis-x0213-1@2000
288     =>jis-x0213-1@2004
289     =>jis-x0213-2
290     ==>ucs@bucs
291     ==>daijiten
292     ;; =>ucs@hanyo-denshi
293     =>ucs@iso
294     =>ucs@unicode
295     =>ucs@jis
296     =>ucs@cns
297     =>ucs@ks
298     =+>ucs@iso
299     =+>ucs@unicode
300     =+>ucs@jis
301     =+>ucs@jis/1990
302     =+>ucs@cns
303     =+>ucs@ks
304     =>>ucs@iso
305     =>>ucs@unicode
306     =>>ucs@jis
307     =>>ucs@JP
308     =>>ucs@cns
309     =>>>ucs@iso
310     =>>>ucs@unicode
311     ==ucs@iso
312     ==ucs@unicode
313     ;; ==ucs@cns
314     ==gb2312
315     ==ks-x1001
316     ==cns11643-1
317     ==cns11643-2
318     ==cns11643-3
319     ==cns11643-4
320     ==cns11643-5
321     ==cns11643-6
322     ==cns11643-7
323     ==gt
324     ==jis-x0208@1990
325     ;; ==jis-x0208@1983
326     ==jis-x0208@1978
327     ==gt-k
328     =ucs@iso
329     =ucs@unicode
330     =ucs@cns
331     ==big5-cdp
332     ==cbeta
333     =>>big5-cdp
334     =>>gt-k
335     =+>gt
336     =+>big5-cdp
337     =>big5-cdp
338     =>big5-cdp@iwds-1
339     =>mj
340     =>hanyo-denshi/tk
341     =>iwds-1
342     =big5
343     =big5-eten
344     =zinbun-oracle
345     =>zinbun-oracle
346     =ruimoku-v6
347     =>>ruimoku-v6
348     ==ruimoku-v6
349     =>ruimoku-v6
350     =jef-china3
351     =shinjigen
352     =>cns11643-5
353     =>cns11643-7
354     =>gt
355     =>gt-k
356     =>daikanwa
357     =>daikanwa/ho
358     =>cbeta
359     =ucs-var-001
360     =ucs-var-002
361     =ucs-var-003
362     =ucs-var-004
363     =ucs-var-005
364     =ucs-var-006
365     =ucs-var-007
366     =ucs-var-008
367     =ucs-var-010
368     =ucs-itaiji-001
369     =ucs-itaiji-002
370     =ucs-itaiji-003
371     =ucs-itaiji-004
372     =ucs-itaiji-005
373     =ucs-itaiji-006
374     =ucs-itaiji-007
375     =ucs-itaiji-008
376     =ucs-itaiji-009
377     =ucs-itaiji-010
378     =ucs-itaiji-011
379     =ucs-itaiji-012
380     =ucs-itaiji-014
381     =ucs-itaiji-016
382     =ucs-itaiji-018
383     =+>ucs-var-003
384     =>ucs-itaiji-001
385     =>ucs-itaiji-002
386     =>ucs-itaiji-003
387     =>ucs-itaiji-004
388     =>ucs-itaiji-005
389     =>ucs-itaiji-006
390     =>ucs-itaiji-007
391     =>ucs-itaiji-008
392     =>ucs-itaiji-009
393     =>ucs-itaiji-010
394     =>ucs-itaiji-011
395     =>ucs-itaiji-012
396     =>ucs-itaiji-014
397     =>ucs-itaiji-016
398     =>ucs-var-001
399     =big5-cdp-var-001
400     =big5-cdp-var-002
401     =big5-cdp-var-003
402     =big5-cdp-var-004
403     =big5-cdp-var-005
404     =big5-cdp-var-007
405     =big5-cdp-var-008
406     =big5-cdp-var-010
407     =big5-cdp-itaiji-001
408     =big5-cdp-itaiji-002
409     =big5-cdp-itaiji-003
410     =big5-cdp-itaiji-004
411     =big5-cdp-itaiji-005
412     =>big5-cdp-itaiji-001
413     =>big5-cdp-itaiji-002
414     =>big5-cdp-itaiji-003
415     =>big5-cdp-itaiji-004
416     =>big5-cdp-itaiji-005
417     =>big5-cdp-itaiji-006
418     =>big5-cdp-var-001
419     =>ucs@iwds-1
420     =>ucs@cognate
421     =>ucs@component
422     =>ucs@iwds-1/normalized
423     =>ucs-itaiji-001@iwds-1
424     =>ucs-itaiji-002@iwds-1
425     =>ucs-itaiji-003@iwds-1
426     =>ucs-itaiji-006@iwds-1
427     =>big5-cdp@component
428     =>big5-cdp@cognate
429     ==ucs@gb
430     =ucs@gb
431     ==ucs-var-002
432     ==ucs-itaiji-002
433     =ucs@JP/hanazono
434     =koseki
435     =shuowen-jiguge
436     =>shuowen-jiguge
437     ===shuowen-jiguge5
438     ===shuowen-jiguge4
439     ===chise-hdic-ktb-seal))
440
441
442 ;;; @ char-db formatters
443 ;;;
444
445 (defun char-db-make-char-spec (char)
446   (let (ret char-spec)
447     (cond ((characterp char)
448            (cond ((and (setq ret (encode-char char '=ucs 'defined-only))
449                        (not (and (<= #xE000 ret)(<= ret #xF8FF))))
450                   (setq char-spec (list (cons '=ucs ret)))
451                   (cond ((setq ret (get-char-attribute char 'name))
452                          (setq char-spec (cons (cons 'name ret) char-spec))
453                          )
454                         ((setq ret (get-char-attribute char 'name*))
455                          (setq char-spec (cons (cons 'name* ret) char-spec))
456                          ))
457                   )
458                  ((encode-char char '=mj 'defined-only)
459                   (setq char-spec nil)
460                   (dolist (ccs (charset-list))
461                     (if (and (or (eq ccs '=mj)
462                                  ;; (eq (charset-property ccs 'iso-ir) 177)
463                                  (string-match "=ucs@" (symbol-name ccs))
464                                  )
465                              (setq ccs (charset-name ccs))
466                              (null (assq ccs char-spec))
467                              (setq ret (encode-char char ccs 'defined-only)))
468                         (setq char-spec (cons (cons ccs ret) char-spec))))
469                   )
470                  ((encode-char char '==mj 'defined-only)
471                   (setq char-spec nil)
472                   (dolist (ccs (charset-list))
473                     (if (and (or (eq ccs '==mj)
474                                  ;; (eq (charset-property ccs 'iso-ir) 177)
475                                  (string-match "=ucs@" (symbol-name ccs))
476                                  )
477                              (setq ccs (charset-name ccs))
478                              (null (assq ccs char-spec))
479                              (setq ret (encode-char char ccs 'defined-only)))
480                         (setq char-spec (cons (cons ccs ret) char-spec))))
481                   )
482                  ((encode-char char '=adobe-japan1 'defined-only)
483                   (setq char-spec nil)
484                   (dolist (ccs (charset-list))
485                     (if (and (or (memq ccs
486                                        '(=adobe-japan1-0
487                                          =adobe-japan1-1
488                                          =adobe-japan1-2
489                                          =adobe-japan1-3
490                                          =adobe-japan1-4
491                                          =adobe-japan1-5
492                                          =adobe-japan1-6
493                                          ))
494                                  ;; (eq (charset-property ccs 'iso-ir) 177)
495                                  (string-match "=ucs@" (symbol-name ccs))
496                                  )
497                              (setq ccs (charset-name ccs))
498                              (null (assq ccs char-spec))
499                              (setq ret (encode-char char ccs 'defined-only)))
500                         (setq char-spec (cons (cons ccs ret) char-spec))))
501                   )
502                  ((encode-char char '==adobe-japan1 'defined-only)
503                   (setq char-spec nil)
504                   (dolist (ccs (charset-list))
505                     (if (and (or (memq ccs
506                                        '(==adobe-japan1-0
507                                          ==adobe-japan1-1
508                                          ==adobe-japan1-2
509                                          ==adobe-japan1-3
510                                          ==adobe-japan1-4
511                                          ==adobe-japan1-5
512                                          ==adobe-japan1-6
513                                          ))
514                                  ;; (eq (charset-property ccs 'iso-ir) 177)
515                                  (string-match "=ucs@" (symbol-name ccs))
516                                  )
517                              (setq ccs (charset-name ccs))
518                              (null (assq ccs char-spec))
519                              (setq ret (encode-char char ccs 'defined-only)))
520                         (setq char-spec (cons (cons ccs ret) char-spec))))
521                   )
522                  ((setq ret
523                         (catch 'tag
524                           (let ((rest char-db-coded-charset-priority-list)
525                                 ccs)
526                             (while rest
527                               (setq ccs (charset-name
528                                          (find-charset (car rest))))
529                               (if (setq ret
530                                         (encode-char char ccs
531                                                      'defined-only))
532                                   (throw 'tag (cons ccs ret)))
533                               (setq rest (cdr rest))))))
534                   (setq char-spec (list ret))
535                   (dolist (ccs (delq (car ret) (charset-list)))
536                     (if (and (or (charset-iso-final-char ccs)
537                                  (memq ccs
538                                        '(=daikanwa
539                                          =daikanwa@rev2
540                                          ;; =gt-k
541                                          =jis-x0208@1997
542                                          ))
543                                  (and (eq (charset-property ccs 'iso-ir) 177)
544                                       (not (eq (charset-name ccs) '=ucs@big5)))
545                                  ;; (string-match "=ucs@" (symbol-name ccs))
546                                  )
547                              (setq ccs (charset-name ccs))
548                              (null (assq ccs char-spec))
549                              (setq ret (encode-char char ccs 'defined-only)))
550                         (setq char-spec (cons (cons ccs ret) char-spec))))
551                   (if (null char-spec)
552                       (setq char-spec (split-char char)))
553                   (cond ((setq ret (get-char-attribute char 'name))
554                          (setq char-spec (cons (cons 'name ret) char-spec))
555                          )
556                         ((setq ret (get-char-attribute char 'name*))
557                          (setq char-spec (cons (cons 'name* ret) char-spec))
558                          ))
559                   )
560                  ((setq ret (get-char-attribute
561                              char 'ideographic-combination))
562                   (setq char-spec
563                         (cons (cons 'ideographic-combination ret)
564                               char-spec))
565                   ))
566            char-spec)
567           ((consp char)
568            char))))
569     
570 (defun char-db-insert-char-spec (char &optional readable column
571                                       required-features)
572   (unless column
573     (setq column (current-column)))
574   (let (char-spec temp-char)
575     (setq char-spec (char-db-make-char-spec char))
576     (unless (or (characterp char) ; char
577                 (condition-case nil
578                     (setq char (find-char char-spec))
579                   (error nil)))
580       ;; define temporary character
581       ;;   Current implementation is dirty.
582       (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
583                                          char-spec)))
584       (remove-char-attribute temp-char 'ideograph-daikanwa)
585       (setq char temp-char))
586     (insert-char-attributes char
587                             readable
588                             (union (mapcar #'car char-spec)
589                                    required-features)
590                             nil 'for-sub-node)
591     (when temp-char
592       ;; undefine temporary character
593       ;;   Current implementation is dirty.
594       (setq char-spec (char-attribute-alist temp-char))
595       (while char-spec
596         (remove-char-attribute temp-char (car (car char-spec)))
597         (setq char-spec (cdr char-spec))))))
598
599 (defun char-db-insert-alist (alist &optional readable column)
600   (unless column
601     (setq column (current-column)))
602   (let ((line-breaking
603          (concat "\n" (make-string (1+ column) ?\ )))
604         name value
605         ret al ; cal
606         key
607         lbs cell rest separator)
608     (insert "(")
609     (while alist
610       (setq name (car (car alist))
611             value (cdr (car alist)))
612       (cond ((eq name 'char)
613              (insert "(char . ")
614              (if (setq ret (condition-case nil
615                                (find-char value)
616                              (error nil)))
617                  (progn
618                    (setq al nil
619                          ;; cal nil
620                          )
621                    (while value
622                      (setq key (car (car value)))
623                      ;; (if (find-charset key)
624                      ;;     (setq cal (cons key cal))
625                      (setq al (cons key al))
626                      ;; )
627                      (setq value (cdr value)))
628                    (insert-char-attributes ret
629                                            readable
630                                            (or al 'none) ; cal
631                                            nil 'for-sub-node))
632                (insert (prin1-to-string value)))
633              (insert ")")
634              (insert line-breaking))
635             ((consp value)
636              (insert (format "(%-18s " name))
637              (setq lbs (concat "\n" (make-string (current-column) ?\ )))
638              (while (consp value)
639                (setq cell (car value))
640                (if (and (consp cell)
641                         (consp (car cell))
642                         (setq ret (condition-case nil
643                                       (find-char cell)
644                                     (error nil)))
645                         )
646                    (progn
647                      (setq rest cell
648                            al nil
649                            ;; cal nil
650                            )
651                      (while rest
652                        (setq key (car (car rest)))
653                        ;; (if (find-charset key)
654                        ;;     (setq cal (cons key cal))
655                        (setq al (cons key al))
656                        ;; )
657                        (setq rest (cdr rest)))
658                      (if separator
659                          (insert lbs))
660                      (insert-char-attributes ret
661                                              readable
662                                              al ; cal
663                                              nil 'for-sub-node)
664                      (setq separator lbs))
665                  (if separator
666                      (insert separator))
667                  (insert (prin1-to-string cell))
668                  (setq separator " "))
669                (setq value (cdr value)))
670              (insert ")")
671              (insert line-breaking))
672             (t
673              (insert (format "(%-18s . %S)%s"
674                              name value
675                              line-breaking))))
676       (setq alist (cdr alist))))
677   (insert ")"))
678
679 (defun char-db-insert-char-reference (plist &optional readable column)
680   (unless column
681     (setq column (current-column)))
682   (let ((line-breaking
683          (concat "\n" (make-string (1+ column) ?\ )))
684         (separator "")
685         name value)
686     (insert "(")
687     (while plist
688       (setq name (pop plist))
689       (setq value (pop plist))
690       (cond ((eq name :char)
691              (insert separator)
692              (insert ":char\t")
693              (cond ((numberp value)
694                     (setq value (decode-char '=ucs value)))
695                    ;; ((consp value)
696                    ;;  (setq value (or (find-char value)
697                    ;;                  value)))
698                    )
699              (char-db-insert-char-spec value readable)
700              (insert line-breaking)
701              (setq separator ""))
702             ((eq name :radical)
703              (insert (format "%s%s\t%d ; %c%s"
704                              separator
705                              name value
706                              (ideographic-radical value)
707                              line-breaking))
708              (setq separator ""))
709             (t
710              (insert (format "%s%s\t%S" separator name value))
711              (setq separator line-breaking)))
712       ))
713   (insert ")"))
714
715 (defun char-db-decode-isolated-char (ccs code-point)
716   (let (ret)
717     (setq ret
718           (cond ((eq ccs 'arabic-iso8859-6)
719                  (decode-char ccs code-point))
720                 ;; ((eq ccs '=gt)
721                 ;;  (decode-builtin-char '==gt code-point))
722                 ((and (memq ccs '(=gt-pj-1
723                                   =gt-pj-2
724                                   =gt-pj-3
725                                   =gt-pj-4
726                                   =gt-pj-5
727                                   =gt-pj-6
728                                   =gt-pj-7
729                                   =gt-pj-8
730                                   =gt-pj-9
731                                   =gt-pj-10
732                                   =gt-pj-11))
733                       (setq ret (decode-char ccs code-point))
734                       (setq ret (encode-char ret '=gt 'defined-only)))
735                  (decode-builtin-char '=gt ret))
736                 (t
737                  (decode-builtin-char ccs code-point))))
738     (cond ((null ret)
739            (or (decode-char ccs code-point)
740                (define-char (list (cons ccs code-point)))))
741           ((and (<= 0 (char-int ret))
742                 (<= (char-int ret) #x1F))
743            (decode-char '=ucs (+ #x2400 (char-int ret))))
744           ((= (char-int ret) #x7F)
745            ?\u2421)
746           (t ret))))
747
748 (defvar char-db-convert-obsolete-format t)
749
750 (defun char-db-insert-ccs-feature (name value line-breaking)
751   (cond
752    ((integerp value)
753     (insert
754      (format
755       (cond
756        ((memq name '(=>iwds-1
757                      ==shinjigen
758                      =shinjigen
759                      =shinjigen@1ed ==shinjigen@1ed
760                      =shinjigen@rev ==shinjigen@rev
761                      =shinjigen/+p@rev ==shinjigen/+p@rev
762                      ===daikanwa/ho ==daikanwa/ho
763                      =daikanwa/ho =>>daikanwa/ho =>daikanwa/ho))
764         "(%-18s .  %04d)\t; %c")
765        ((eq name '=shinjigen@1ed/24pr)
766         "(%-18s . %04d)\t; %c")
767        ((or
768          (memq name
769                '(===daikanwa
770                  ==daikanwa =daikanwa =>>daikanwa =>daikanwa
771                  =daikanwa@rev1 =daikanwa@rev2
772                  =daikanwa/+p ==daikanwa/+p ===daikanwa/+p
773                  =>>daikanwa/+p
774                  =daikanwa/+2p =>>daikanwa/+2p
775                  =gt ==gt ===gt
776                  =>>gt =+>gt =>gt
777                  =gt-k ==gt-k ===gt-k
778                  =>>gt-k =>gt-k
779                  =adobe-japan1-0 ==adobe-japan1-0 ===adobe-japan1-0
780                  =adobe-japan1-1 ==adobe-japan1-1 ===adobe-japan1-1
781                  =adobe-japan1-2 ==adobe-japan1-2 ===adobe-japan1-2
782                  =adobe-japan1-3 ==adobe-japan1-3 ===adobe-japan1-3
783                  =adobe-japan1-4 ==adobe-japan1-4 ===adobe-japan1-4
784                  =adobe-japan1-5 ==adobe-japan1-5 ===adobe-japan1-5
785                  =adobe-japan1-6 ==adobe-japan1-6 ===adobe-japan1-6
786                  =>>adobe-japan1-0 =+>adobe-japan1-0
787                  =>>adobe-japan1-1 =+>adobe-japan1-1
788                  =>>adobe-japan1-2 =+>adobe-japan1-2
789                  =>>adobe-japan1-3 =+>adobe-japan1-3
790                  =>>adobe-japan1-4 =+>adobe-japan1-4
791                  =>>adobe-japan1-5 =+>adobe-japan1-5
792                  =>>adobe-japan1-6 =+>adobe-japan1-6
793                  =>cbeta =cbeta =>>cbeta ==cbeta ===cbeta
794                  =zinbun-oracle =>zinbun-oracle
795                  =daijiten ==daijiten ===daijiten ==>daijiten
796                  ===hng-jou ===hng-keg ===hng-dng ===hng-mam
797                  ===hng-drt ===hng-kgk ===hng-myz ===hng-kda
798                  ===hng-khi ===hng-khm ===hng-fhs ===hng-hok
799                  ===hng-kyd ===hng-sok
800                  ===hng-yhk ===hng-kak ===hng-kar ===hng-kae
801                  ===hng-sys ===hng-tsu ===hng-tzj
802                  ===hng-hos ===hng-kkh ===hng-nak ===hng-jhk
803                  ===hng-hod ===hng-gok ===hng-ink ===hng-nto
804                  ===hng-nkm ===hng-k24 ===hng-ini ===hng-nkk
805                  ===hng-kcc ===hng-kcj ===hng-kbk ===hng-sik
806                  ===hng-skk ===hng-kyu ===hng-ksk ===hng-wan
807                  ===hng-okd ===hng-wad ===hng-kmi
808                  ===hng-zkd ===hng-doh ===hng-jyu ===hng-tzs
809                  ===hng-sai ===hng-kad ===hng-kss
810                  ===hng-kyo ===hng-ykk ===hng-saa
811                  ===hng-sab ===hng-wks ===hng-wke ===hng-smk
812                  ===hng-sgs ===hng-sts ===hng-kkd
813                  =>shuowen-jiguge =shuowen-jiguge
814                  ===shuowen-jiguge4 ===shuowen-jiguge5 ===shuowen-jiguge-A30))
815          ;; (string-match "^=adobe-" (symbol-name name))
816          )
817         "(%-18s . %05d)\t; %c")
818        ((memq name '(=hanyo-denshi/ks
819                      ==hanyo-denshi/ks ===hanyo-denshi/ks
820                      =>>hanyo-denshi/ks
821                      =koseki ==koseki
822                      =mj ==mj ===mj =>>mj =>mj
823                      =zihai mojikyo))
824         "(%-18s . %06d)\t; %c")
825        ((memq name '(=hanyo-denshi/tk =>hanyo-denshi/tk ==hanyo-denshi/tk))
826         "(%-18s . %08d)\t; %c")
827        ((>= (charset-dimension name) 2)
828         "(%-18s . #x%04X)\t; %c")
829        (t
830         "(%-18s . #x%02X)\t; %c"))
831       name
832       (if (= (charset-iso-graphic-plane name) 1)
833           (logior value
834                   (cond ((= (charset-dimension name) 1)
835                          #x80)
836                         ((= (charset-dimension name) 2)
837                          #x8080)
838                         ((= (charset-dimension name) 3)
839                          #x808080)
840                         (t 0)))
841         value)
842       (char-db-decode-isolated-char name value)))
843     (if (and (= (charset-chars name) 94)
844              (= (charset-dimension name) 2))
845         (insert (format " [%02d-%02d]"
846                         (- (lsh value -8) 32)
847                         (- (logand value 255) 32))))
848     )
849    (t
850     (insert (format "(%-18s . %s)" name value))
851     ))
852   (insert line-breaking))
853
854 (defun char-db-insert-relation-feature (char name value line-breaking
855                                              ccss readable)
856   (insert (format "(%-18s%s " name line-breaking))
857   (let ((lbs (concat "\n" (make-string (current-column) ?\ )))
858         separator cell sources required-features
859         ret)
860     (while (consp value)
861       (setq cell (car value))
862       (if (integerp cell)
863           (setq cell (decode-char '=ucs cell)))
864       (cond
865        ((eq name '->subsumptive)
866         (when (or (not (some (lambda (atr)
867                                (get-char-attribute cell atr))
868                              char-db-ignored-attributes))
869                   (some (lambda (ccs)
870                           (encode-char cell ccs 'defined-only))
871                         ccss))
872           (if separator
873               (insert lbs))
874           (let ((char-db-ignored-attributes
875                  (cons '<-subsumptive
876                        char-db-ignored-attributes)))
877             (insert-char-attributes cell readable nil nil 'for-sub-node))
878           (setq separator lbs))
879         )
880        ((characterp cell)
881         (setq sources
882               (get-char-attribute
883                char (intern (format "%s*sources" name))))
884         (setq required-features nil)
885         (dolist (source sources)
886           (cond
887            ((memq source '(JP
888                            JP/Jouyou
889                            shinjigen shinjigen@1ed shinjigen@rev))
890             (setq required-features
891                   (union required-features
892                          '(=jis-x0208
893                            =jis-x0208@1990
894                            =jis-x0213-1@2000
895                            =jis-x0213-1@2004
896                            =jis-x0213-2
897                            =jis-x0212
898                            =jis-x0208@1983
899                            =jis-x0208@1978
900                            =shinjigen
901                            =shinjigen@1ed
902                            =shinjigen@rev
903                            =shinjigen/+p@rev))))
904            ((eq source 'CN)
905             (setq required-features
906                   (union required-features
907                          '(=gb2312
908                            =gb12345
909                            =iso-ir165)))))
910           (cond
911            ((find-charset (setq ret (intern (format "=%s" source))))
912             (setq required-features
913                   (cons ret required-features)))
914            (t (setq required-features
915                     (cons source required-features)))))
916         (cond ((string-match "@JP" (symbol-name name))
917                (setq required-features
918                      (union required-features
919                             '(=jis-x0208
920                               =jis-x0208@1990
921                               =jis-x0213-1-2000
922                               =jis-x0213-2-2000
923                               =jis-x0212
924                               =jis-x0208@1983
925                               =jis-x0208@1978))))
926               ((string-match "@CN" (symbol-name name))
927                (setq required-features
928                      (union required-features
929                             '(=gb2312
930                               =gb12345
931                               =iso-ir165)))))
932         (if separator
933             (insert lbs))
934         (if readable
935             (insert (format "%S" cell))
936           (char-db-insert-char-spec cell readable
937                                     nil
938                                     required-features))
939         (setq separator lbs))
940        ((consp cell)
941         (if separator
942             (insert lbs))
943         (if (consp (car cell))
944             (char-db-insert-char-spec cell readable)
945           (char-db-insert-char-reference cell readable))
946         (setq separator lbs))
947        (t
948         (if separator
949             (insert separator))
950         (insert (prin1-to-string cell))
951         (setq separator " ")))
952       (setq value (cdr value)))
953     (insert ")")
954     (insert line-breaking)))
955
956 (defun insert-char-attributes (char &optional readable attributes column
957                                     for-sub-node)
958   (unless column
959     (setq column (current-column)))
960   (let (name value ; has-long-ccs-name
961         rest
962         radical strokes
963         (line-breaking
964          (concat "\n" (make-string (1+ column) ?\ )))
965         lbs cell separator ret
966         key al cal
967         dest-ccss ; sources required-features
968         ccss)
969     (let (atr-d)
970       (setq attributes
971             (sort (if attributes
972                       (if (consp attributes)
973                           (progn
974                             (dolist (name attributes)
975                               (unless (memq name char-db-ignored-attributes)
976                                 (if (find-charset name)
977                                     (push name ccss))
978                                 (push name atr-d)))
979                             atr-d))
980                     (dolist (name (char-attribute-list))
981                       (unless (memq name char-db-ignored-attributes)
982                         (if (find-charset name)
983                             (push name ccss))
984                         (push name atr-d)))
985                     atr-d)
986                   #'char-attribute-name<)))
987     (insert "(")
988     (when (memq '<-subsumptive attributes)
989       (when (or readable (not for-sub-node))
990         (when (setq value (get-char-attribute char '<-subsumptive))
991           (char-db-insert-relation-feature char '<-subsumptive value
992                                            line-breaking
993                                            ccss readable)))
994       (setq attributes (delq '<-subsumptive attributes)))
995     (when (and (memq '<-denotational attributes)
996                (setq value (get-char-attribute char '<-denotational)))
997       (char-db-insert-relation-feature char '<-denotational value
998                                        line-breaking
999                                        ccss readable)
1000       (setq attributes (delq '<-denotational attributes)))
1001     (when (and (memq '<-denotational@component attributes)
1002                (setq value (get-char-attribute char '<-denotational@component)))
1003       (char-db-insert-relation-feature char '<-denotational@component value
1004                                        line-breaking
1005                                        ccss readable)
1006       (setq attributes (delq '<-denotational@component attributes)))
1007     (when (and (memq '<-denotational@usage attributes)
1008                (setq value (get-char-attribute char '<-denotational@usage)))
1009       (char-db-insert-relation-feature char '<-denotational@usage value
1010                                        line-breaking
1011                                        ccss readable)
1012       (setq attributes (delq '<-denotational@usage attributes)))
1013     (when (and (memq 'name attributes)
1014                (setq value (get-char-attribute char 'name)))
1015       (insert (format
1016                (if (> (+ (current-column) (length value)) 48)
1017                    "(name . %S)%s"
1018                  "(name               . %S)%s")
1019                value line-breaking))
1020       (setq attributes (delq 'name attributes))
1021       )
1022     (when (and (memq 'name* attributes)
1023                (setq value (get-char-attribute char 'name*)))
1024       (insert (format
1025                (if (> (+ (current-column) (length value)) 48)
1026                    "(name* . %S)%s"
1027                  "(name*              . %S)%s")
1028                value line-breaking))
1029       (setq attributes (delq 'name* attributes))
1030       )
1031     (when (and (memq 'script attributes)
1032                (setq value (get-char-attribute char 'script)))
1033       (insert (format "(script\t\t%s)%s"
1034                       (mapconcat (function prin1-to-string)
1035                                  value " ")
1036                       line-breaking))
1037       (setq attributes (delq 'script attributes))
1038       )
1039     (dolist (name '(=>ucs =>ucs*))
1040       (when (and (memq name attributes)
1041                  (setq value (get-char-attribute char name)))
1042         (insert (format "(%-18s . #x%04X)\t; %c%s"
1043                         name value (decode-char '=ucs value)
1044                         line-breaking))
1045         (setq attributes (delq name attributes))))
1046     (dolist (name '(=>ucs@gb =>ucs@big5))
1047       (when (and (memq name attributes)
1048                  (setq value (get-char-attribute char name)))
1049         (insert (format "(%-18s . #x%04X)\t; %c%s"
1050                         name value
1051                         (decode-char (intern
1052                                       (concat "="
1053                                               (substring
1054                                                (symbol-name name) 2)))
1055                                      value)
1056                         line-breaking))
1057         (setq attributes (delq name attributes))
1058         ))
1059     ;; (dolist (name '(=>daikanwa))
1060     ;;   (when (and (memq name attributes)
1061     ;;              (setq value (get-char-attribute char name)))
1062     ;;     (insert
1063     ;;      (if (integerp value)
1064     ;;          (format "(%-18s . %05d)\t; %c%s"
1065     ;;                  name value (decode-char '=daikanwa value)
1066     ;;                  line-breaking)
1067     ;;        (format "(%-18s %s)\t; %c%s"
1068     ;;                name
1069     ;;                (mapconcat (function prin1-to-string)
1070     ;;                           value " ")
1071     ;;                (char-representative-of-daikanwa char)
1072     ;;                line-breaking)))
1073     ;;     (setq attributes (delq name attributes))))
1074     (when (and (memq 'general-category attributes)
1075                (setq value (get-char-attribute char 'general-category)))
1076       (insert (format
1077                "(general-category\t%s) ; %s%s"
1078                (mapconcat (lambda (cell)
1079                             (format "%S" cell))
1080                           value " ")
1081                (cond ((rassoc value unidata-normative-category-alist)
1082                       "Normative Category")
1083                      ((rassoc value unidata-informative-category-alist)
1084                       "Informative Category")
1085                      (t
1086                       "Unknown Category"))
1087                line-breaking))
1088       (setq attributes (delq 'general-category attributes))
1089       )
1090     (when (and (memq 'bidi-category attributes)
1091                (setq value (get-char-attribute char 'bidi-category)))
1092       (insert (format "(bidi-category\t. %S)%s"
1093                       value
1094                       line-breaking))
1095       (setq attributes (delq 'bidi-category attributes))
1096       )
1097     (unless (or (not (memq 'mirrored attributes))
1098                 (eq (setq value (get-char-attribute char 'mirrored 'empty))
1099                     'empty))
1100       (insert (format "(mirrored\t\t. %S)%s"
1101                       value
1102                       line-breaking))
1103       (setq attributes (delq 'mirrored attributes))
1104       )
1105     (cond
1106      ((and (memq 'decimal-digit-value attributes)
1107            (setq value (get-char-attribute char 'decimal-digit-value)))
1108       (insert (format "(decimal-digit-value . %S)%s"
1109                       value
1110                       line-breaking))
1111       (setq attributes (delq 'decimal-digit-value attributes))
1112       (when (and (memq 'digit-value attributes)
1113                  (setq value (get-char-attribute char 'digit-value)))
1114         (insert (format "(digit-value\t . %S)%s"
1115                         value
1116                         line-breaking))
1117         (setq attributes (delq 'digit-value attributes))
1118         )
1119       (when (and (memq 'numeric-value attributes)
1120                  (setq value (get-char-attribute char 'numeric-value)))
1121         (insert (format "(numeric-value\t . %S)%s"
1122                         value
1123                         line-breaking))
1124         (setq attributes (delq 'numeric-value attributes))
1125         )
1126       )
1127      (t
1128       (when (and (memq 'digit-value attributes)
1129                  (setq value (get-char-attribute char 'digit-value)))
1130         (insert (format "(digit-value\t. %S)%s"
1131                         value
1132                         line-breaking))
1133         (setq attributes (delq 'digit-value attributes))
1134         )
1135       (when (and (memq 'numeric-value attributes)
1136                  (setq value (get-char-attribute char 'numeric-value)))
1137         (insert (format "(numeric-value\t. %S)%s"
1138                         value
1139                         line-breaking))
1140         (setq attributes (delq 'numeric-value attributes))
1141         )))
1142     (when (and (memq 'iso-10646-comment attributes)
1143                (setq value (get-char-attribute char 'iso-10646-comment)))
1144       (insert (format "(iso-10646-comment\t. %S)%s"
1145                       value
1146                       line-breaking))
1147       (setq attributes (delq 'iso-10646-comment attributes))
1148       )
1149     (when (and (memq 'morohashi-daikanwa attributes)
1150                (setq value (get-char-attribute char 'morohashi-daikanwa)))
1151       (insert (format "(morohashi-daikanwa\t%s)%s"
1152                       (mapconcat (function prin1-to-string) value " ")
1153                       line-breaking))
1154       (setq attributes (delq 'morohashi-daikanwa attributes))
1155       )
1156     (setq radical nil
1157           strokes nil)
1158     (when (and (memq 'ideographic-radical attributes)
1159                (setq value (get-char-attribute char 'ideographic-radical)))
1160       (setq radical value)
1161       (insert (format "(ideographic-radical . %S)\t; %c%s"
1162                       radical
1163                       (ideographic-radical radical)
1164                       line-breaking))
1165       (setq attributes (delq 'ideographic-radical attributes))
1166       )
1167     (when (and (memq 'shuowen-radical attributes)
1168                (setq value (get-char-attribute char 'shuowen-radical)))
1169       (insert (format "(shuowen-radical\t. %S)\t; %c%s"
1170                       value
1171                       (shuowen-radical value)
1172                       line-breaking))
1173       (setq attributes (delq 'shuowen-radical attributes))
1174       )
1175     (let (key)
1176       (dolist (domain
1177                (append
1178                 char-db-feature-domains
1179                 (let (dest domain)
1180                   (dolist (feature (char-attribute-list))
1181                     (setq feature (symbol-name feature))
1182                     (when (string-match
1183                            "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
1184                            feature)
1185                       (setq domain (intern (match-string 2 feature)))
1186                      (unless (memq domain dest)
1187                        (setq dest (cons domain dest)))))
1188                   (sort dest #'string<))))
1189         (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
1190         (when (and (memq key attributes)
1191                    (setq value (get-char-attribute char key)))
1192           (setq radical value)
1193           (insert (format "(%s . %S)\t; %c%s"
1194                           key
1195                           radical
1196                           (ideographic-radical radical)
1197                           line-breaking))
1198           (setq attributes (delq key attributes))
1199           )
1200         (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
1201         (when (and (memq key attributes)
1202                    (setq value (get-char-attribute char key)))
1203           (setq strokes value)
1204           (insert (format "(%s . %S)%s"
1205                           key
1206                           strokes
1207                           line-breaking))
1208           (setq attributes (delq key attributes))
1209           )
1210         (setq key (intern (format "%s@%s" 'total-strokes domain)))
1211         (when (and (memq key attributes)
1212                    (setq value (get-char-attribute char key)))
1213           (insert (format "(%s       . %S)%s"
1214                           key
1215                           value
1216                           line-breaking))
1217           (setq attributes (delq key attributes))
1218           )
1219         (dolist (feature '(ideographic-radical
1220                            ideographic-strokes
1221                            total-strokes))
1222           (setq key (intern (format "%s@%s*sources" feature domain)))
1223           (when (and (memq key attributes)
1224                      (setq value (get-char-attribute char key)))
1225             (insert (format "(%s%s" key line-breaking))
1226             (dolist (cell value)
1227               (insert (format " %s" cell)))
1228             (insert ")")
1229             (insert line-breaking)
1230             (setq attributes (delq key attributes))
1231             ))
1232         ))
1233     (when (and (memq 'ideographic-strokes attributes)
1234                (setq value (get-char-attribute char 'ideographic-strokes)))
1235       (setq strokes value)
1236       (insert (format "(ideographic-strokes . %S)%s"
1237                       strokes
1238                       line-breaking))
1239       (setq attributes (delq 'ideographic-strokes attributes))
1240       )
1241     (when (and (memq 'kangxi-radical attributes)
1242                (setq value (get-char-attribute char 'kangxi-radical)))
1243       (unless (eq value radical)
1244         (insert (format "(kangxi-radical\t . %S)\t; %c%s"
1245                         value
1246                         (ideographic-radical value)
1247                         line-breaking))
1248         (or radical
1249             (setq radical value)))
1250       (setq attributes (delq 'kangxi-radical attributes))
1251       )
1252     (when (and (memq 'kangxi-strokes attributes)
1253                (setq value (get-char-attribute char 'kangxi-strokes)))
1254       (unless (eq value strokes)
1255         (insert (format "(kangxi-strokes\t . %S)%s"
1256                         value
1257                         line-breaking))
1258         (or strokes
1259             (setq strokes value)))
1260       (setq attributes (delq 'kangxi-strokes attributes))
1261       )
1262     ;; (when (and (memq 'japanese-radical attributes)
1263     ;;            (setq value (get-char-attribute char 'japanese-radical)))
1264     ;;   (unless (eq value radical)
1265     ;;     (insert (format "(japanese-radical\t . %S)\t; %c%s"
1266     ;;                     value
1267     ;;                     (ideographic-radical value)
1268     ;;                     line-breaking))
1269     ;;     (or radical
1270     ;;         (setq radical value)))
1271     ;;   (setq attributes (delq 'japanese-radical attributes))
1272     ;;   )
1273     (when (and (memq 'japanese-strokes attributes)
1274                (setq value (get-char-attribute char 'japanese-strokes)))
1275       (unless (eq value strokes)
1276         (insert (format "(japanese-strokes\t . %S)%s"
1277                         value
1278                         line-breaking))
1279         (or strokes
1280             (setq strokes value)))
1281       (setq attributes (delq 'japanese-strokes attributes))
1282       )
1283     (when (and (memq 'cns-radical attributes)
1284                (setq value (get-char-attribute char 'cns-radical)))
1285       (insert (format "(cns-radical\t . %S)\t; %c%s"
1286                       value
1287                       (ideographic-radical value)
1288                       line-breaking))
1289       (setq attributes (delq 'cns-radical attributes))
1290       )
1291     (when (and (memq 'cns-strokes attributes)
1292                (setq value (get-char-attribute char 'cns-strokes)))
1293       (unless (eq value strokes)
1294         (insert (format "(cns-strokes\t . %S)%s"
1295                         value
1296                         line-breaking))
1297         (or strokes
1298             (setq strokes value)))
1299       (setq attributes (delq 'cns-strokes attributes))
1300       )
1301     ;; (when (and (memq 'ideographic- attributes)
1302     ;;            (setq value (get-char-attribute char 'ideographic-)))
1303     ;;   (insert "(ideographic-       ")
1304     ;;   (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1305     ;;         separator nil)
1306     ;;   (while (consp value)
1307     ;;     (setq cell (car value))
1308     ;;     (if (integerp cell)
1309     ;;         (setq cell (decode-char '=ucs cell)))
1310     ;;     (cond ((characterp cell)
1311     ;;            (if separator
1312     ;;                (insert lbs))
1313     ;;            (if readable
1314     ;;                (insert (format "%S" cell))
1315     ;;              (char-db-insert-char-spec cell readable))
1316     ;;            (setq separator lbs))
1317     ;;           ((consp cell)
1318     ;;            (if separator
1319     ;;                (insert lbs))
1320     ;;            (if (consp (car cell))
1321     ;;                (char-db-insert-char-spec cell readable)
1322     ;;              (char-db-insert-char-reference cell readable))
1323     ;;            (setq separator lbs))
1324     ;;           (t
1325     ;;            (if separator
1326     ;;                (insert separator))
1327     ;;            (insert (prin1-to-string cell))
1328     ;;            (setq separator " ")))
1329     ;;     (setq value (cdr value)))
1330     ;;   (insert ")")
1331     ;;   (insert line-breaking)
1332     ;;   (setq attributes (delq 'ideographic- attributes)))
1333     (when (and (memq 'total-strokes attributes)
1334                (setq value (get-char-attribute char 'total-strokes)))
1335       (insert (format "(total-strokes       . %S)%s"
1336                       value
1337                       line-breaking))
1338       (setq attributes (delq 'total-strokes attributes))
1339       )
1340     ;; (when (and (memq '->ideograph attributes)
1341     ;;            (setq value (get-char-attribute char '->ideograph)))
1342     ;;   (insert (format "(->ideograph\t%s)%s"
1343     ;;                   (mapconcat (lambda (code)
1344     ;;                                (cond ((symbolp code)
1345     ;;                                       (symbol-name code))
1346     ;;                                      ((integerp code)
1347     ;;                                       (format "#x%04X" code))
1348     ;;                                      (t
1349     ;;                                       (format "%s %S"
1350     ;;                                               line-breaking code))))
1351     ;;                              value " ")
1352     ;;                   line-breaking))
1353     ;;   (setq attributes (delq '->ideograph attributes))
1354     ;;   )
1355     ;; (when (and (memq '->decomposition attributes)
1356     ;;            (setq value (get-char-attribute char '->decomposition)))
1357     ;;   (insert (format "(->decomposition\t%s)%s"
1358     ;;                   (mapconcat (lambda (code)
1359     ;;                                (cond ((symbolp code)
1360     ;;                                       (symbol-name code))
1361     ;;                                      ((characterp code)
1362     ;;                                       (if readable
1363     ;;                                           (format "%S" code)
1364     ;;                                         (format "#x%04X"
1365     ;;                                                 (char-int code))
1366     ;;                                         ))
1367     ;;                                      ((integerp code)
1368     ;;                                       (format "#x%04X" code))
1369     ;;                                      (t
1370     ;;                                       (format "%s%S" line-breaking code))))
1371     ;;                              value " ")
1372     ;;                   line-breaking))
1373     ;;   (setq attributes (delq '->decomposition attributes))
1374     ;;   )
1375     (if (equal (get-char-attribute char '->titlecase)
1376                (get-char-attribute char '->uppercase))
1377         (setq attributes (delq '->titlecase attributes)))
1378     (when (and (memq '->mojikyo attributes)
1379                (setq value (get-char-attribute char '->mojikyo)))
1380       (insert (format "(->mojikyo\t\t. %06d)\t; %c%s"
1381                       value (decode-char 'mojikyo value)
1382                       line-breaking))
1383       (setq attributes (delq '->mojikyo attributes))
1384       )
1385     (unless readable
1386       (dolist (ignored '(composition
1387                          ->denotational <-subsumptive ->ucs-unified
1388                          ;; ->ideographic-component-forms
1389                          ))
1390         (setq attributes (delq ignored attributes))))
1391     (while attributes
1392       (setq name (car attributes))
1393       (unless (eq (setq value (get-char-attribute char name 'value-is-empty))
1394                   'value-is-empty)
1395         (cond ((setq ret (find-charset name))
1396                (setq name (charset-name ret))
1397                (when (not (memq name dest-ccss))
1398                  (setq dest-ccss (cons name dest-ccss))
1399                  (char-db-insert-ccs-feature name value line-breaking))
1400                )
1401               ((string-match "^=>ucs@" (symbol-name name))
1402                (insert (format "(%-18s . #x%04X)\t; %c%s"
1403                                name value (decode-char '=ucs value)
1404                                line-breaking))
1405                )
1406               ((eq name 'jisx0208-1978/4X)
1407                (insert (format "(%-18s . #x%04X)%s"
1408                                name value
1409                                line-breaking))
1410                )
1411               ((and
1412                 (not readable)
1413                 (not (eq name '->subsumptive))
1414                 (not (eq name '->uppercase))
1415                 (not (eq name '->lowercase))
1416                 (not (eq name '->titlecase))
1417                 (not (eq name '->canonical))
1418                 (not (eq name '->Bopomofo))
1419                 (not (eq name '->mistakable))
1420                 (not (eq name '->ideographic-variants))
1421                 (null (get-char-attribute
1422                        char (intern (format "%s*sources" name))))
1423                 (not (string-match "\\*sources$" (symbol-name name)))
1424                 (null (get-char-attribute
1425                        char (intern (format "%s*note" name))))
1426                 (not (string-match "\\*note$" (symbol-name name)))
1427                 (or (eq name '<-identical)
1428                     (eq name '<-uppercase)
1429                     (eq name '<-lowercase)
1430                     (eq name '<-titlecase)
1431                     (eq name '<-canonical)
1432                     (eq name '<-ideographic-variants)
1433                     ;; (eq name '<-synonyms)
1434                     (string-match "^<-synonyms" (symbol-name name))
1435                     (eq name '<-mistakable)
1436                     (when (string-match "^->" (symbol-name name))
1437                       (cond
1438                        ((string-match "^->fullwidth" (symbol-name name))
1439                         (not (and (consp value)
1440                                   (characterp (car value))
1441                                   (encode-char
1442                                    (car value) '=ucs 'defined-only)))
1443                         )
1444                        (t)))
1445                     ))
1446                )
1447               ((or (eq name 'ideographic-structure)
1448                    (eq name 'ideographic-structure@apparent)
1449                    (eq name 'ideographic-structure@apparent/leftmost)
1450                    (eq name 'ideographic-structure@apparent/rightmost)
1451                    (eq name 'ideographic-combination)
1452                    ;; (eq name 'ideographic-)
1453                    (eq name '=decomposition)
1454                    (char-feature-base-name= '=decomposition name)
1455                    (char-feature-base-name= '=>decomposition name)
1456                    ;; (string-match "^=>*decomposition\\(@[^*]+\\)?$"
1457                    ;;               (symbol-name name))
1458                    (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name name))
1459                    (string-match "^\\(->\\|<-\\)[^*]*\\*sources$"
1460                                  (symbol-name name))
1461                    )
1462                (char-db-insert-relation-feature char name value
1463                                                 line-breaking
1464                                                 ccss readable))
1465               ;; ((memq name '(ideograph=
1466               ;;               original-ideograph-of
1467               ;;               ancient-ideograph-of
1468               ;;               vulgar-ideograph-of
1469               ;;               wrong-ideograph-of
1470               ;;               ;; simplified-ideograph-of
1471               ;;               ideographic-variants
1472               ;;               ;; ideographic-different-form-of
1473               ;;               ))
1474               ;;  (insert (format "(%-18s%s " name line-breaking))
1475               ;;  (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1476               ;;        separator nil)
1477               ;;  (while (consp value)
1478               ;;    (setq cell (car value))
1479               ;;    (if (and (consp cell)
1480               ;;             (consp (car cell)))
1481               ;;        (progn
1482               ;;          (if separator
1483               ;;              (insert lbs))
1484               ;;          (char-db-insert-alist cell readable)
1485               ;;          (setq separator lbs))
1486               ;;      (if separator
1487               ;;          (insert separator))
1488               ;;      (insert (prin1-to-string cell))
1489               ;;      (setq separator " "))
1490               ;;    (setq value (cdr value)))
1491               ;;  (insert ")")
1492               ;;  (insert line-breaking))
1493               ((consp value)
1494                (insert (format "(%-18s " name))
1495                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1496                      separator nil)
1497                (while (consp value)
1498                  (setq cell (car value))
1499                  (if (and (consp cell)
1500                           (consp (car cell))
1501                           (setq ret (condition-case nil
1502                                         (find-char cell)
1503                                       (error nil))))
1504                      (progn
1505                        (setq rest cell
1506                              al nil
1507                              cal nil)
1508                        (while rest
1509                          (setq key (car (car rest)))
1510                          (if (find-charset key)
1511                              (setq cal (cons key cal))
1512                            (setq al (cons key al)))
1513                          (setq rest (cdr rest)))
1514                        (if separator
1515                            (insert lbs))
1516                        (insert-char-attributes ret
1517                                                readable
1518                                                al ; cal
1519                                                nil 'for-sub-node)
1520                        (setq separator lbs))
1521                    (setq ret (prin1-to-string cell))
1522                    (if separator
1523                        (if (< (+ (current-column)
1524                                  (length ret)
1525                                  (length separator))
1526                               76)
1527                            (insert separator)
1528                          (insert lbs)))
1529                    (insert ret)
1530                    (setq separator " "))
1531                  (setq value (cdr value)))
1532                (insert ")")
1533                (insert line-breaking))
1534               (t
1535                (insert (format "(%-18s" name))
1536                (setq ret (prin1-to-string value))
1537                (unless (< (+ (current-column)
1538                              (length ret)
1539                              3)
1540                           76)
1541                  (insert line-breaking))
1542                (insert " . " ret ")" line-breaking)
1543                ;; (insert (format "(%-18s . %S)%s"
1544                ;;                 name value
1545                ;;                 line-breaking))
1546                )
1547               ))
1548       (setq attributes (cdr attributes)))
1549     (insert ")")))
1550
1551 (defun insert-char-data (char &optional readable
1552                               attributes)
1553   (save-restriction
1554     (narrow-to-region (point)(point))
1555     (insert "(define-char
1556   '")
1557     (insert-char-attributes char readable attributes)
1558     (insert ")\n")
1559     (goto-char (point-min))
1560     (while (re-search-forward "[ \t]+$" nil t)
1561       (replace-match ""))
1562     ;; from tabify.
1563     (goto-char (point-min))
1564     (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
1565       (let ((column (current-column))
1566             (indent-tabs-mode t))
1567         (delete-region (match-beginning 0) (point))
1568         (indent-to column)))
1569     (goto-char (point-max))
1570     ;; (tabify (point-min)(point-max))
1571     ))
1572
1573 (defun insert-char-data-with-variant (char &optional printable
1574                                            no-ucs-unified
1575                                            script excluded-script)
1576   (insert-char-data char printable)
1577   (let ((variants (char-variants char))
1578         rest
1579         variant vs ret)
1580     (setq variants (sort variants #'<))
1581     (setq rest variants)
1582     (setq variants (cons char variants))
1583     (while rest
1584       (setq variant (car rest))
1585       (unless (get-char-attribute variant '<-subsumptive)
1586         (if (and (or (null script)
1587                      (null (setq vs (get-char-attribute variant 'script)))
1588                      (memq script vs))
1589                  (or (null excluded-script)
1590                      (null (setq vs (get-char-attribute variant 'script)))
1591                      (not (memq excluded-script vs))))
1592             (unless (and no-ucs-unified (get-char-attribute variant '=ucs))
1593               (insert-char-data variant printable)
1594               (if (setq ret (char-variants variant))
1595                   (while ret
1596                     (or (memq (car ret) variants)
1597                         ;; (get-char-attribute (car ret) '<-subsumptive)
1598                         (setq rest (nconc rest (list (car ret)))))
1599                     (setq ret (cdr ret)))))))
1600       (setq rest (cdr rest)))))
1601
1602 (defun insert-char-range-data (min max &optional script excluded-script)
1603   (let ((code min)
1604         char)
1605     (while (<= code max)
1606       (setq char (decode-char '=ucs code))
1607       (if (encode-char char '=ucs 'defined-only)
1608           (insert-char-data-with-variant char nil 'no-ucs-unified
1609                                          script excluded-script))
1610       (setq code (1+ code)))))
1611
1612 (defun write-char-range-data-to-file (min max file
1613                                           &optional script excluded-script)
1614   (let ((coding-system-for-write char-db-file-coding-system))
1615     (with-temp-buffer
1616       (insert (format ";; -*- coding: %s -*-\n"
1617                       char-db-file-coding-system))
1618       (insert-char-range-data min max script excluded-script)
1619       (write-region (point-min)(point-max) file))))
1620
1621 (defvar what-character-original-window-configuration)
1622
1623 ;;;###autoload
1624 (defun what-char-definition (char)
1625   (interactive (list (char-after)))
1626   (let ((buf (get-buffer-create "*Character Description*"))
1627         (the-buf (current-buffer))
1628         (win-conf (current-window-configuration)))
1629     (pop-to-buffer buf)
1630     (make-local-variable 'what-character-original-window-configuration)
1631     (setq what-character-original-window-configuration win-conf)
1632     (setq buffer-read-only nil)
1633     (erase-buffer)
1634     (condition-case err
1635         (progn
1636           (insert-char-data-with-variant char 'printable)
1637           (unless (char-attribute-alist char)
1638             (insert (format ";; = %c\n"
1639                             (let* ((rest (split-char char))
1640                                    (ccs (pop rest))
1641                                    (code (pop rest)))
1642                               (while rest
1643                                 (setq code (logior (lsh code 8)
1644                                                    (pop rest))))
1645                               (decode-char ccs code)))))
1646           ;; (char-db-update-comment)
1647           (set-buffer-modified-p nil)
1648           (view-mode the-buf (lambda (buf)
1649                                (set-window-configuration
1650                                 what-character-original-window-configuration)
1651                                ))
1652           (goto-char (point-min)))
1653       (error (progn
1654                (set-window-configuration
1655                 what-character-original-window-configuration)
1656                (signal (car err) (cdr err)))))))
1657
1658
1659 ;;; @ end
1660 ;;;
1661
1662 (provide 'char-db-util)
1663
1664 ;;; char-db-util.el ends here