Reformatted.
[chise/xemacs-chise.git] / lisp / utf-2000 / char-db-util.el
1 ;;; char-db-util.el --- Character Database utility -*- coding: utf-8-er; -*-
2
3 ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
4 ;;   2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019,
5 ;;   2020, 2021, 2022, 2023 MORIOKA Tomohiko.
6
7 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
8 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
9
10 ;; This file is part of XEmacs CHISE.
11
12 ;; XEmacs CHISE is free software; you can redistribute it and/or
13 ;; modify it under the terms of the GNU General Public License as
14 ;; published by the Free Software Foundation; either version 2, or (at
15 ;; your option) any later version.
16
17 ;; XEmacs CHISE is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 ;; General Public License for more details.
21
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
24 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
25 ;; Boston, MA 02111-1307, USA.
26
27 ;;; Code:
28
29 (require 'chise-subr)
30 (require 'ideograph-subr)
31
32 (defconst unidata-normative-category-alist
33   '(("Lu" letter        uppercase)
34     ("Ll" letter        lowercase)
35     ("Lt" letter        titlecase)
36     ("Mn" mark          non-spacing)
37     ("Mc" mark          spacing-combining)
38     ("Me" mark          enclosing)
39     ("Nd" number        decimal-digit)
40     ("Nl" number        letter)
41     ("No" number        other)
42     ("Zs" separator     space)
43     ("Zl" separator     line)
44     ("Zp" separator     paragraph)
45     ("Cc" other         control)
46     ("Cf" other         format)
47     ("Cs" other         surrogate)
48     ("Co" other         private-use)
49     ("Cn" other         not-assigned)))
50
51 (defconst unidata-informative-category-alist
52   '(("Lm" letter        modifier)
53     ("Lo" letter        other)
54     ("Pc" punctuation   connector)
55     ("Pd" punctuation   dash)
56     ("Ps" punctuation   open)
57     ("Pe" punctuation   close)
58     ("Pi" punctuation   initial-quote)
59     ("Pf" punctuation   final-quote)
60     ("Po" punctuation   other)
61     ("Sm" symbol        math)
62     ("Sc" symbol        currency)
63     ("Sk" symbol        modifier)
64     ("So" symbol        other)
65     ))
66
67 (defconst shuowen-radicals
68   [?一 ?上 ?示 ?三 ?王 ?玉 ?玨 ?气 ?士 ?丨 ; 010
69    ?屮 ?艸 ?蓐 ?茻 ?小 ?八 ?釆 ?半 ?牛 ?犛 ; 020
70    ?告 ?口 ?凵 ?吅 ?哭 ?走 ?止 ?癶 ?步 ?此 ; 030
71    ?正 ?是 ?辵 ?彳 ?廴 ?㢟 ?行 ?齒 ?牙 ?足 ; 040
72    ?疋 ?品 ?龠 ?冊 ?㗊 ?舌 ?干 ?𧮫 ?只 ?㕯 ; 050
73    ?句 ?丩 ?古 ?十 ?卅 ?言 ?誩 ?音 ?䇂 ?丵 ; 060
74    ?菐 ?𠬞 ?𠬜 ?共 ?異 ?舁 ?𦥑 ?䢅 ?爨 ?革 ; 070
75    ?鬲 ?䰜 ?爪 ?𠃨 ?鬥 ?又 ?𠂇 ?史 ?支 ?𦘒 ; 080
76    ?聿 ?畫 ?隶 ?臤 ?臣 ?殳 ?殺 ?𠘧 ?寸 ?皮 ; 090
77    ?㼱 ?攴 ?敎 ?卜 ?用 ?爻 ?㸚 ?𥄎 ?目 ?䀠 ; 100
78    ?眉 ?盾 ?自 ?白 ?鼻 ?皕 ?習 ?羽 ?隹 ?奞 ; 110
79    ?雈 ?𦫳 ?𥄕 ?羊 ?羴 ?瞿 ?雔 ?雥 ?鳥 ?烏 ; 120
80    ?𠦒 ?冓 ?幺 ?𢆶 ?叀 ?玄 ?予 ?放 ?𠬪 ?𣦼 ; 130
81    ?歺 ?死 ?冎 ?骨 ?肉 ?筋 ?刀 ?刃 ?㓞 ?丯 ; 140
82    ?耒 ?角 ?竹 ?箕 ?丌 ?左 ?工 ?㠭 ?巫 ?甘 ; 150
83    ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?旨 ?喜 ?壴 ; 160
84 ;  ?旨 ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?喜 ?壴 ; 160
85    ?鼓 ?豈 ?豆 ?豊 ?豐 ?䖒 ?虍 ?虎 ?虤 ?皿 ; 170
86    ?𠙴 ?去 ?血 ?丶 ?丹 ?青 ?井 ?皀 ?鬯 ?食 ; 180
87    ?亼 ?會 ?倉 ?入 ?缶 ?矢 ?高 ?冂 ?𩫏 ?京 ; 190
88    ?亯 ?𣆪 ?畗 ?㐭 ?嗇 ?來 ?麥 ?夊 ?舛 ?䑞 ; 200
89    ?韋 ?弟 ?夂 ?久 ?桀 ?木 ?東 ?林 ?才 ?叒 ; 210
90    ?之 ?帀 ?出 ?𣎵 ?生 ?乇 ?𠂹 ?𠌶 ?華 ?𥝌 ; 220
91    ?稽 ?巢 ?桼 ?束 ?㯻 ?囗 ?員 ?貝 ?邑 ?𨛜 ; 230
92    ?日 ?旦 ?倝 ?㫃 ?冥 ?晶 ?月 ?有 ?明 ?囧 ; 240
93    ?夕 ?多 ?毌 ?𢎘 ?𣐺 ?卣 ?齊 ?朿 ?片 ?鼎 ; 250
94    ?克 ?彔 ?禾 ?秝 ?黍 ?香 ?米 ?毇 ?臼 ?凶 ; 260
95    ?𣎳 ?𣏟 ?麻 ?尗 ?耑 ?韭 ?瓜 ?瓠 ?宀 ?宮 ; 270
96    ?呂 ?穴 ?㝱 ?疒 ?冖 ?𠔼 ?冃 ?㒳 ?网 ?襾 ; 280
97    ?巾 ?巿 ?帛 ?白 ?㡀 ?黹 ?人 ?𠤎 ?匕 ?从 ; 290
98    ?比 ?北 ?丘 ?㐺 ?𡈼 ?重 ?臥 ?身 ?㐆 ?衣 ; 300
99    ?裘 ?老 ?毛 ?毳 ?尸 ?尺 ?尾 ?履 ?舟 ?方 ; 310
100    ?儿 ?兄 ?兂 ?皃 ?𠑹 ?先 ?秃 ?見 ?覞 ?欠 ; 320
101    ?㱃 ?㳄 ?旡 ?頁 ?𦣻 ?面 ?丏 ?首 ?𥄉 ?須 ; 330
102    ?彡 ?彣 ?文 ?髟 ?后 ?司 ?卮 ?卩 ?印 ?色 ; 340
103    ?𠨍 ?辟 ?勹 ?包 ?茍 ?鬼 ?甶 ?厶 ?嵬 ?山 ; 350
104    ?屾 ?屵 ?广 ?厂 ?丸 ?危 ?石 ?長 ?勿 ?冄 ; 360
105    ?而 ?豕 ?㣇 ?彑 ?豚 ?豸 ?𤉡 ?易 ?象 ?馬 ; 370
106    ?𢊁 ?鹿 ?麤 ?㲋 ?兔 ?萈 ?犬 ?㹜 ?鼠 ?能 ; 380
107    ?熊 ?火 ?炎 ?黑 ?囪 ?焱 ?炙 ?赤 ?大 ?亦 ; 390
108    ?夨 ?夭 ?交 ?尣 ?壺 ?壹 ?㚔 ?奢 ?亢 ?夲 ; 400
109    ?夰 ?亣 ?夫 ?立 ?竝 ?囟 ?思 ?心 ?惢 ?水 ; 410
110    ?沝 ?瀕 ?𡿨 ?巜 ?川 ?泉 ?灥 ?永 ?𠂢 ?谷 ; 420
111    ?仌 ?雨 ?雲 ?魚 ?𩺰 ?燕 ?龍 ?飛 ?非 ?卂 ; 430
112    ?𠃉 ?不 ?至 ?西 ?鹵 ?鹽 ?戶 ?門 ?耳 ?𦣝 ; 440
113    ?手 ?𠦬 ?女 ?毋 ?民 ?丿 ?𠂆 ?乁 ?氏 ?氐 ; 450
114    ?戈 ?戉 ?我 ?亅 ?琴 ?𠃊 ?亡 ?匸 ?匚 ?曲 ; 460
115    ?甾 ?瓦 ?弓 ?弜 ?弦 ?系 ?糸 ?素 ?絲 ?率 ; 470
116    ?虫 ?䖵 ?蟲 ?風 ?它 ?龜 ?黽 ?卵 ?二 ?土 ; 480
117    ?垚 ?堇 ?里 ?田 ?畕 ?黃 ?男 ?力 ?劦 ?金 ; 490
118    ?幵 ?勺 ?几 ?且 ?斤 ?斗 ?矛 ?車 ?𠂤 ?𨸏 ; 500
119    ?𨺅 ?厽 ?四 ?宁 ?叕 ?亞 ?五 ?六 ?七 ?九 ; 510
120    ?禸 ?嘼 ?甲 ?乙 ?丙 ?丁 ?戊 ?己 ?巴 ?庚 ; 520
121    ?辛 ?辡 ?壬 ?癸 ?子 ?了 ?孨 ?𠫓 ?丑 ?寅 ; 530
122    ?卯 ?辰 ?巳 ?午 ?未 ?申 ?酉 ?酋 ?戌 ?亥 ; 540
123    ])
124
125 (defun shuowen-radical (number)
126   (aref shuowen-radicals (1- number)))
127
128 (defvar char-db-file-coding-system 'utf-8-mcs-er)
129
130 (defvar char-db-ignored-attributes '(ideographic-products))
131
132 (defvar char-db-coded-charset-priority-list
133   '(ascii
134     control-1
135     latin-iso8859-1
136     latin-iso8859-2
137     latin-iso8859-3
138     latin-iso8859-4
139     latin-iso8859-9
140     latin-jisx0201
141     cyrillic-iso8859-5
142     greek-iso8859-7
143     thai-tis620
144     ;; =mj
145     ;; =adobe-japan1-0
146     ;; =adobe-japan1-1
147     ;; =adobe-japan1-2
148     ;; =adobe-japan1-3
149     ;; =adobe-japan1-4
150     ;; =adobe-japan1-5
151     ;; =adobe-japan1-6
152     =jis-x0208
153     =jis-x0208@1978
154     =jis-x0208@1983
155     japanese-jisx0212
156     chinese-gb2312
157     =jis-x0208@1990
158     chinese-cns11643-1
159     chinese-cns11643-2
160     chinese-cns11643-3
161     chinese-cns11643-4
162     chinese-cns11643-5
163     chinese-cns11643-6
164     chinese-cns11643-7
165     =jis-x0213-1
166     =jis-x0213-1@2000
167     =jis-x0213-1@2004
168     =jis-x0213-2
169     korean-ksc5601
170     chinese-isoir165
171     katakana-jisx0201
172     hebrew-iso8859-8
173     chinese-gb12345
174     latin-viscii
175     ethiopic-ucs
176     =big5-cdp
177     =hanyo-denshi/ja
178     =hanyo-denshi/jb
179     =hanyo-denshi/jc
180     =hanyo-denshi/jd
181     =hanyo-denshi/ft
182     =hanyo-denshi/ia
183     =hanyo-denshi/ib
184     =hanyo-denshi/hg
185     =hanyo-denshi/ip
186     =hanyo-denshi/jt
187     =hanyo-denshi/ks
188     =hanyo-denshi/tk
189     ;; ==mj
190     ;; ==adobe-japan1-0
191     ;; ==adobe-japan1-1
192     ;; ==adobe-japan1-2
193     ;; ==adobe-japan1-3
194     ;; ==adobe-japan1-4
195     ;; ==adobe-japan1-5
196     ;; ==adobe-japan1-6
197     ==jis-x0208
198     ==jis-x0213-1
199     ==jis-x0213-2
200     ==jis-x0212
201     ==hanyo-denshi/ja
202     ==hanyo-denshi/jb
203     ==hanyo-denshi/jc
204     ==hanyo-denshi/ft
205     ==hanyo-denshi/ia
206     ==hanyo-denshi/ib
207     ==hanyo-denshi/hg
208     ==hanyo-denshi/jt
209     ==hanyo-denshi/ks
210     ==hanyo-denshi/tk
211     =gt
212     =gt-k
213     =daikanwa
214     =daijiten
215     =daikanwa@rev2
216     =daikanwa@rev1
217     =daikanwa/+p
218     =daikanwa/ho
219     ==daikanwa
220     ==daijiten
221     =cbeta
222     ideograph-hanziku-1
223     ideograph-hanziku-2
224     ideograph-hanziku-3
225     ideograph-hanziku-4
226     ideograph-hanziku-5
227     ideograph-hanziku-6
228     ideograph-hanziku-7
229     ideograph-hanziku-8
230     ideograph-hanziku-9
231     ideograph-hanziku-10
232     ideograph-hanziku-11
233     ideograph-hanziku-12
234     ;; =>>>adobe-japan1-0
235     ;; =>>>adobe-japan1-1
236     ;; =>>>adobe-japan1-2
237     ;; =>>>adobe-japan1-3
238     ;; =>>>adobe-japan1-4
239     ;; =>>>adobe-japan1-5
240     ;; =>>>adobe-japan1-6
241     ;; =>>>jis-x0208
242     ;; =>>>jis-x0213-1
243     ;; =>>>jis-x0213-2
244     ;; =>>>hanyo-denshi/ja
245     ;; =>>>hanyo-denshi/jb
246     ;; =>>>hanyo-denshi/jc
247     ;; =>>>hanyo-denshi/ft
248     ;; =>>>hanyo-denshi/ib
249     ;; =>>>hanyo-denshi/hg
250     ;; =>>>hanyo-denshi/jt
251     ;; =>>>hanyo-denshi/ks
252     ;; =>>>gt
253     =>>mj
254     =>>adobe-japan1-0
255     =>>adobe-japan1-1
256     =>>adobe-japan1-2
257     =>>adobe-japan1-3
258     =>>adobe-japan1-4
259     =>>adobe-japan1-5
260     =>>adobe-japan1-6
261     =>>jis-x0208
262     =>>jis-x0213-1
263     =>>jis-x0213-1@2000
264     =>>jis-x0213-1@2004
265     =>>jis-x0213-2
266     =>>jis-x0208@1978
267     =>>hanyo-denshi/ft
268     =>>hanyo-denshi/jt
269     =>>hanyo-denshi/ks
270     =>>gt
271     =>>daikanwa
272     =>>cbeta
273     =+>jis-x0208
274     =+>jis-x0213-1
275     =+>jis-x0213-2
276     =+>adobe-japan1-0
277     =+>adobe-japan1-1
278     =+>adobe-japan1-2
279     =+>adobe-japan1-3
280     =+>adobe-japan1-4
281     =+>adobe-japan1-5
282     =+>adobe-japan1-6
283     =+>jis-x0208@1978
284     =>jis-x0208
285     =>jis-x0208@1997
286     =>jis-x0213-1
287     =>jis-x0213-1@2000
288     =>jis-x0213-1@2004
289     =>jis-x0213-2
290     ==>ucs@bucs
291     ==>daijiten
292     ;; =>ucs@hanyo-denshi
293     =>ucs@iso
294     =>ucs@unicode
295     =>ucs@jis
296     =>ucs@cns
297     =>ucs@ks
298     =+>ucs@iso
299     =+>ucs@unicode
300     =+>ucs@jis
301     =+>ucs@jis/1990
302     =+>ucs@cns
303     =+>ucs@ks
304     =>>ucs@iso
305     =>>ucs@unicode
306     =>>ucs@jis
307     =>>ucs@JP
308     =>>ucs@cns
309     =>>>ucs@iso
310     =>>>ucs@unicode
311     ==ucs@iso
312     ==ucs@unicode
313     ;; ==ucs@cns
314     ==gb2312
315     ==ks-x1001
316     ==cns11643-1
317     ==cns11643-2
318     ==cns11643-3
319     ==cns11643-4
320     ==cns11643-5
321     ==cns11643-6
322     ==cns11643-7
323     ==gt
324     ==jis-x0208@1990
325     ;; ==jis-x0208@1983
326     ==jis-x0208@1978
327     ==gt-k
328     =ucs@iso
329     =ucs@unicode
330     =ucs@cns
331     ==big5-cdp
332     ==cbeta
333     =>>big5-cdp
334     =>>gt-k
335     =+>gt
336     =+>big5-cdp
337     =>big5-cdp
338     =>big5-cdp@iwds-1
339     =>mj
340     =>hanyo-denshi/tk
341     =>iwds-1
342     =big5
343     =big5-eten
344     =zinbun-oracle
345     =>zinbun-oracle
346     =ruimoku-v6
347     =>>ruimoku-v6
348     ==ruimoku-v6
349     =>ruimoku-v6
350     =jef-china3
351     =shinjigen
352     =>cns11643-5
353     =>cns11643-7
354     =>gt
355     =>gt-k
356     =>daikanwa
357     =>daikanwa/ho
358     =>cbeta
359     =ucs-var-001
360     =ucs-var-002
361     =ucs-var-003
362     =ucs-var-004
363     =ucs-var-005
364     =ucs-var-006
365     =ucs-var-007
366     =ucs-var-008
367     =ucs-var-010
368     =ucs-itaiji-001
369     =ucs-itaiji-002
370     =ucs-itaiji-003
371     =ucs-itaiji-004
372     =ucs-itaiji-005
373     =ucs-itaiji-006
374     =ucs-itaiji-007
375     =ucs-itaiji-008
376     =ucs-itaiji-009
377     =ucs-itaiji-010
378     =ucs-itaiji-011
379     =ucs-itaiji-012
380     =ucs-itaiji-014
381     =ucs-itaiji-016
382     =+>ucs-var-003
383     =>ucs-itaiji-001
384     =>ucs-itaiji-002
385     =>ucs-itaiji-003
386     =>ucs-itaiji-004
387     =>ucs-itaiji-005
388     =>ucs-itaiji-006
389     =>ucs-itaiji-007
390     =>ucs-itaiji-008
391     =>ucs-itaiji-009
392     =>ucs-itaiji-010
393     =>ucs-itaiji-011
394     =>ucs-itaiji-012
395     =>ucs-itaiji-014
396     =>ucs-itaiji-016
397     =>ucs-var-001
398     =big5-cdp-var-001
399     =big5-cdp-var-002
400     =big5-cdp-var-003
401     =big5-cdp-var-004
402     =big5-cdp-var-005
403     =big5-cdp-var-007
404     =big5-cdp-var-008
405     =big5-cdp-var-010
406     =big5-cdp-itaiji-001
407     =big5-cdp-itaiji-002
408     =big5-cdp-itaiji-003
409     =big5-cdp-itaiji-004
410     =big5-cdp-itaiji-005
411     =>big5-cdp-itaiji-001
412     =>big5-cdp-itaiji-002
413     =>big5-cdp-itaiji-003
414     =>big5-cdp-itaiji-004
415     =>big5-cdp-itaiji-005
416     =>big5-cdp-itaiji-006
417     =>big5-cdp-var-001
418     =>ucs@iwds-1
419     =>ucs@cognate
420     =>ucs@component
421     =>ucs@iwds-1/normalized
422     =>ucs-itaiji-001@iwds-1
423     =>ucs-itaiji-002@iwds-1
424     =>ucs-itaiji-003@iwds-1
425     =>ucs-itaiji-006@iwds-1
426     =>big5-cdp@component
427     =>big5-cdp@cognate
428     ==ucs@gb
429     =ucs@gb
430     ==ucs-var-002
431     ==ucs-itaiji-002
432     =ucs@JP/hanazono
433     =koseki
434     =shuowen-jiguge
435     =>shuowen-jiguge
436     ===shuowen-jiguge5
437     ===shuowen-jiguge4
438     ===chise-hdic-ktb-seal))
439
440
441 ;;; @ char-db formatters
442 ;;;
443
444 (defun char-db-make-char-spec (char)
445   (let (ret char-spec)
446     (cond ((characterp char)
447            (cond ((and (setq ret (encode-char char '=ucs 'defined-only))
448                        (not (and (<= #xE000 ret)(<= ret #xF8FF))))
449                   (setq char-spec (list (cons '=ucs ret)))
450                   (cond ((setq ret (get-char-attribute char 'name))
451                          (setq char-spec (cons (cons 'name ret) char-spec))
452                          )
453                         ((setq ret (get-char-attribute char 'name*))
454                          (setq char-spec (cons (cons 'name* ret) char-spec))
455                          ))
456                   )
457                  ((encode-char char '=mj 'defined-only)
458                   (setq char-spec nil)
459                   (dolist (ccs (charset-list))
460                     (if (and (or (eq ccs '=mj)
461                                  ;; (eq (charset-property ccs 'iso-ir) 177)
462                                  (string-match "=ucs@" (symbol-name ccs))
463                                  )
464                              (setq ccs (charset-name ccs))
465                              (null (assq ccs char-spec))
466                              (setq ret (encode-char char ccs 'defined-only)))
467                         (setq char-spec (cons (cons ccs ret) char-spec))))
468                   )
469                  ((encode-char char '==mj 'defined-only)
470                   (setq char-spec nil)
471                   (dolist (ccs (charset-list))
472                     (if (and (or (eq ccs '==mj)
473                                  ;; (eq (charset-property ccs 'iso-ir) 177)
474                                  (string-match "=ucs@" (symbol-name ccs))
475                                  )
476                              (setq ccs (charset-name ccs))
477                              (null (assq ccs char-spec))
478                              (setq ret (encode-char char ccs 'defined-only)))
479                         (setq char-spec (cons (cons ccs ret) char-spec))))
480                   )
481                  ((encode-char char '=adobe-japan1 'defined-only)
482                   (setq char-spec nil)
483                   (dolist (ccs (charset-list))
484                     (if (and (or (memq ccs
485                                        '(=adobe-japan1-0
486                                          =adobe-japan1-1
487                                          =adobe-japan1-2
488                                          =adobe-japan1-3
489                                          =adobe-japan1-4
490                                          =adobe-japan1-5
491                                          =adobe-japan1-6
492                                          ))
493                                  ;; (eq (charset-property ccs 'iso-ir) 177)
494                                  (string-match "=ucs@" (symbol-name ccs))
495                                  )
496                              (setq ccs (charset-name ccs))
497                              (null (assq ccs char-spec))
498                              (setq ret (encode-char char ccs 'defined-only)))
499                         (setq char-spec (cons (cons ccs ret) char-spec))))
500                   )
501                  ((encode-char char '==adobe-japan1 'defined-only)
502                   (setq char-spec nil)
503                   (dolist (ccs (charset-list))
504                     (if (and (or (memq ccs
505                                        '(==adobe-japan1-0
506                                          ==adobe-japan1-1
507                                          ==adobe-japan1-2
508                                          ==adobe-japan1-3
509                                          ==adobe-japan1-4
510                                          ==adobe-japan1-5
511                                          ==adobe-japan1-6
512                                          ))
513                                  ;; (eq (charset-property ccs 'iso-ir) 177)
514                                  (string-match "=ucs@" (symbol-name ccs))
515                                  )
516                              (setq ccs (charset-name ccs))
517                              (null (assq ccs char-spec))
518                              (setq ret (encode-char char ccs 'defined-only)))
519                         (setq char-spec (cons (cons ccs ret) char-spec))))
520                   )
521                  ((setq ret
522                         (catch 'tag
523                           (let ((rest char-db-coded-charset-priority-list)
524                                 ccs)
525                             (while rest
526                               (setq ccs (charset-name
527                                          (find-charset (car rest))))
528                               (if (setq ret
529                                         (encode-char char ccs
530                                                      'defined-only))
531                                   (throw 'tag (cons ccs ret)))
532                               (setq rest (cdr rest))))))
533                   (setq char-spec (list ret))
534                   (dolist (ccs (delq (car ret) (charset-list)))
535                     (if (and (or (charset-iso-final-char ccs)
536                                  (memq ccs
537                                        '(=daikanwa
538                                          =daikanwa@rev2
539                                          ;; =gt-k
540                                          =jis-x0208@1997
541                                          ))
542                                  (and (eq (charset-property ccs 'iso-ir) 177)
543                                       (not (eq (charset-name ccs) '=ucs@big5)))
544                                  ;; (string-match "=ucs@" (symbol-name ccs))
545                                  )
546                              (setq ccs (charset-name ccs))
547                              (null (assq ccs char-spec))
548                              (setq ret (encode-char char ccs 'defined-only)))
549                         (setq char-spec (cons (cons ccs ret) char-spec))))
550                   (if (null char-spec)
551                       (setq char-spec (split-char char)))
552                   (cond ((setq ret (get-char-attribute char 'name))
553                          (setq char-spec (cons (cons 'name ret) char-spec))
554                          )
555                         ((setq ret (get-char-attribute char 'name*))
556                          (setq char-spec (cons (cons 'name* ret) char-spec))
557                          ))
558                   )
559                  ((setq ret (get-char-attribute
560                              char 'ideographic-combination))
561                   (setq char-spec
562                         (cons (cons 'ideographic-combination ret)
563                               char-spec))
564                   ))
565            char-spec)
566           ((consp char)
567            char))))
568     
569 (defun char-db-insert-char-spec (char &optional readable column
570                                       required-features)
571   (unless column
572     (setq column (current-column)))
573   (let (char-spec temp-char)
574     (setq char-spec (char-db-make-char-spec char))
575     (unless (or (characterp char) ; char
576                 (condition-case nil
577                     (setq char (find-char char-spec))
578                   (error nil)))
579       ;; define temporary character
580       ;;   Current implementation is dirty.
581       (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
582                                          char-spec)))
583       (remove-char-attribute temp-char 'ideograph-daikanwa)
584       (setq char temp-char))
585     (insert-char-attributes char
586                             readable
587                             (union (mapcar #'car char-spec)
588                                    required-features)
589                             nil 'for-sub-node)
590     (when temp-char
591       ;; undefine temporary character
592       ;;   Current implementation is dirty.
593       (setq char-spec (char-attribute-alist temp-char))
594       (while char-spec
595         (remove-char-attribute temp-char (car (car char-spec)))
596         (setq char-spec (cdr char-spec))))))
597
598 (defun char-db-insert-alist (alist &optional readable column)
599   (unless column
600     (setq column (current-column)))
601   (let ((line-breaking
602          (concat "\n" (make-string (1+ column) ?\ )))
603         name value
604         ret al ; cal
605         key
606         lbs cell rest separator)
607     (insert "(")
608     (while alist
609       (setq name (car (car alist))
610             value (cdr (car alist)))
611       (cond ((eq name 'char)
612              (insert "(char . ")
613              (if (setq ret (condition-case nil
614                                (find-char value)
615                              (error nil)))
616                  (progn
617                    (setq al nil
618                          ;; cal nil
619                          )
620                    (while value
621                      (setq key (car (car value)))
622                      ;; (if (find-charset key)
623                      ;;     (setq cal (cons key cal))
624                      (setq al (cons key al))
625                      ;; )
626                      (setq value (cdr value)))
627                    (insert-char-attributes ret
628                                            readable
629                                            (or al 'none) ; cal
630                                            nil 'for-sub-node))
631                (insert (prin1-to-string value)))
632              (insert ")")
633              (insert line-breaking))
634             ((consp value)
635              (insert (format "(%-18s " name))
636              (setq lbs (concat "\n" (make-string (current-column) ?\ )))
637              (while (consp value)
638                (setq cell (car value))
639                (if (and (consp cell)
640                         (consp (car cell))
641                         (setq ret (condition-case nil
642                                       (find-char cell)
643                                     (error nil)))
644                         )
645                    (progn
646                      (setq rest cell
647                            al nil
648                            ;; cal nil
649                            )
650                      (while rest
651                        (setq key (car (car rest)))
652                        ;; (if (find-charset key)
653                        ;;     (setq cal (cons key cal))
654                        (setq al (cons key al))
655                        ;; )
656                        (setq rest (cdr rest)))
657                      (if separator
658                          (insert lbs))
659                      (insert-char-attributes ret
660                                              readable
661                                              al ; cal
662                                              nil 'for-sub-node)
663                      (setq separator lbs))
664                  (if separator
665                      (insert separator))
666                  (insert (prin1-to-string cell))
667                  (setq separator " "))
668                (setq value (cdr value)))
669              (insert ")")
670              (insert line-breaking))
671             (t
672              (insert (format "(%-18s . %S)%s"
673                              name value
674                              line-breaking))))
675       (setq alist (cdr alist))))
676   (insert ")"))
677
678 (defun char-db-insert-char-reference (plist &optional readable column)
679   (unless column
680     (setq column (current-column)))
681   (let ((line-breaking
682          (concat "\n" (make-string (1+ column) ?\ )))
683         (separator "")
684         name value)
685     (insert "(")
686     (while plist
687       (setq name (pop plist))
688       (setq value (pop plist))
689       (cond ((eq name :char)
690              (insert separator)
691              (insert ":char\t")
692              (cond ((numberp value)
693                     (setq value (decode-char '=ucs value)))
694                    ;; ((consp value)
695                    ;;  (setq value (or (find-char value)
696                    ;;                  value)))
697                    )
698              (char-db-insert-char-spec value readable)
699              (insert line-breaking)
700              (setq separator ""))
701             ((eq name :radical)
702              (insert (format "%s%s\t%d ; %c%s"
703                              separator
704                              name value
705                              (ideographic-radical value)
706                              line-breaking))
707              (setq separator ""))
708             (t
709              (insert (format "%s%s\t%S" separator name value))
710              (setq separator line-breaking)))
711       ))
712   (insert ")"))
713
714 (defun char-db-decode-isolated-char (ccs code-point)
715   (let (ret)
716     (setq ret
717           (cond ((eq ccs 'arabic-iso8859-6)
718                  (decode-char ccs code-point))
719                 ;; ((eq ccs '=gt)
720                 ;;  (decode-builtin-char '==gt code-point))
721                 ((and (memq ccs '(=gt-pj-1
722                                   =gt-pj-2
723                                   =gt-pj-3
724                                   =gt-pj-4
725                                   =gt-pj-5
726                                   =gt-pj-6
727                                   =gt-pj-7
728                                   =gt-pj-8
729                                   =gt-pj-9
730                                   =gt-pj-10
731                                   =gt-pj-11))
732                       (setq ret (decode-char ccs code-point))
733                       (setq ret (encode-char ret '=gt 'defined-only)))
734                  (decode-builtin-char '=gt ret))
735                 (t
736                  (decode-builtin-char ccs code-point))))
737     (cond ((null ret)
738            (or (decode-char ccs code-point)
739                (define-char (list (cons ccs code-point)))))
740           ((and (<= 0 (char-int ret))
741                 (<= (char-int ret) #x1F))
742            (decode-char '=ucs (+ #x2400 (char-int ret))))
743           ((= (char-int ret) #x7F)
744            ?\u2421)
745           (t ret))))
746
747 (defvar char-db-convert-obsolete-format t)
748
749 (defun char-db-insert-ccs-feature (name value line-breaking)
750   (cond
751    ((integerp value)
752     (insert
753      (format
754       (cond
755        ((memq name '(=>iwds-1
756                      ==shinjigen
757                      =shinjigen
758                      =shinjigen@1ed ==shinjigen@1ed
759                      =shinjigen@rev ==shinjigen@rev
760                      =shinjigen/+p@rev ==shinjigen/+p@rev
761                      ===daikanwa/ho ==daikanwa/ho
762                      =daikanwa/ho =>>daikanwa/ho =>daikanwa/ho))
763         "(%-18s .  %04d)\t; %c")
764        ((eq name '=shinjigen@1ed/24pr)
765         "(%-18s . %04d)\t; %c")
766        ((or
767          (memq name
768                '(===daikanwa
769                  ==daikanwa =daikanwa =>>daikanwa =>daikanwa
770                  =daikanwa@rev1 =daikanwa@rev2
771                  =daikanwa/+p ==daikanwa/+p ===daikanwa/+p
772                  =>>daikanwa/+p
773                  =daikanwa/+2p =>>daikanwa/+2p
774                  =gt ==gt ===gt
775                  =>>gt =+>gt =>gt
776                  =gt-k ==gt-k ===gt-k
777                  =>>gt-k =>gt-k
778                  =adobe-japan1-0 ==adobe-japan1-0 ===adobe-japan1-0
779                  =adobe-japan1-1 ==adobe-japan1-1 ===adobe-japan1-1
780                  =adobe-japan1-2 ==adobe-japan1-2 ===adobe-japan1-2
781                  =adobe-japan1-3 ==adobe-japan1-3 ===adobe-japan1-3
782                  =adobe-japan1-4 ==adobe-japan1-4 ===adobe-japan1-4
783                  =adobe-japan1-5 ==adobe-japan1-5 ===adobe-japan1-5
784                  =adobe-japan1-6 ==adobe-japan1-6 ===adobe-japan1-6
785                  =>>adobe-japan1-0 =+>adobe-japan1-0
786                  =>>adobe-japan1-1 =+>adobe-japan1-1
787                  =>>adobe-japan1-2 =+>adobe-japan1-2
788                  =>>adobe-japan1-3 =+>adobe-japan1-3
789                  =>>adobe-japan1-4 =+>adobe-japan1-4
790                  =>>adobe-japan1-5 =+>adobe-japan1-5
791                  =>>adobe-japan1-6 =+>adobe-japan1-6
792                  =>cbeta =cbeta =>>cbeta ==cbeta ===cbeta
793                  =zinbun-oracle =>zinbun-oracle
794                  =daijiten ==daijiten ===daijiten ==>daijiten
795                  ===hng-jou ===hng-keg ===hng-dng ===hng-mam
796                  ===hng-drt ===hng-kgk ===hng-myz ===hng-kda
797                  ===hng-khi ===hng-khm ===hng-fhs ===hng-hok
798                  ===hng-kyd ===hng-sok
799                  ===hng-yhk ===hng-kak ===hng-kar ===hng-kae
800                  ===hng-sys ===hng-tsu ===hng-tzj
801                  ===hng-hos ===hng-kkh ===hng-nak ===hng-jhk
802                  ===hng-hod ===hng-gok ===hng-ink ===hng-nto
803                  ===hng-nkm ===hng-k24 ===hng-ini ===hng-nkk
804                  ===hng-kcc ===hng-kcj ===hng-kbk ===hng-sik
805                  ===hng-skk ===hng-kyu ===hng-ksk ===hng-wan
806                  ===hng-okd ===hng-wad ===hng-kmi
807                  ===hng-zkd ===hng-doh ===hng-jyu ===hng-tzs
808                  ===hng-sai ===hng-kad ===hng-kss
809                  ===hng-kyo ===hng-ykk ===hng-saa
810                  ===hng-sab ===hng-wks ===hng-wke ===hng-smk
811                  ===hng-sgs ===hng-sts ===hng-kkd
812                  =>shuowen-jiguge =shuowen-jiguge
813                  ===shuowen-jiguge4 ===shuowen-jiguge5 ===shuowen-jiguge-A30))
814          ;; (string-match "^=adobe-" (symbol-name name))
815          )
816         "(%-18s . %05d)\t; %c")
817        ((memq name '(=hanyo-denshi/ks
818                      ==hanyo-denshi/ks ===hanyo-denshi/ks
819                      =>>hanyo-denshi/ks
820                      =koseki ==koseki
821                      =mj ==mj ===mj =>>mj =>mj
822                      =zihai mojikyo))
823         "(%-18s . %06d)\t; %c")
824        ((memq name '(=hanyo-denshi/tk =>hanyo-denshi/tk ==hanyo-denshi/tk))
825         "(%-18s . %08d)\t; %c")
826        ((>= (charset-dimension name) 2)
827         "(%-18s . #x%04X)\t; %c")
828        (t
829         "(%-18s . #x%02X)\t; %c"))
830       name
831       (if (= (charset-iso-graphic-plane name) 1)
832           (logior value
833                   (cond ((= (charset-dimension name) 1)
834                          #x80)
835                         ((= (charset-dimension name) 2)
836                          #x8080)
837                         ((= (charset-dimension name) 3)
838                          #x808080)
839                         (t 0)))
840         value)
841       (char-db-decode-isolated-char name value)))
842     (if (and (= (charset-chars name) 94)
843              (= (charset-dimension name) 2))
844         (insert (format " [%02d-%02d]"
845                         (- (lsh value -8) 32)
846                         (- (logand value 255) 32))))
847     )
848    (t
849     (insert (format "(%-18s . %s)" name value))
850     ))
851   (insert line-breaking))
852
853 (defun char-db-insert-relation-feature (char name value line-breaking
854                                              ccss readable)
855   (insert (format "(%-18s%s " name line-breaking))
856   (let ((lbs (concat "\n" (make-string (current-column) ?\ )))
857         separator cell sources required-features
858         ret)
859     (while (consp value)
860       (setq cell (car value))
861       (if (integerp cell)
862           (setq cell (decode-char '=ucs cell)))
863       (cond
864        ((eq name '->subsumptive)
865         (when (or (not (some (lambda (atr)
866                                (get-char-attribute cell atr))
867                              char-db-ignored-attributes))
868                   (some (lambda (ccs)
869                           (encode-char cell ccs 'defined-only))
870                         ccss))
871           (if separator
872               (insert lbs))
873           (let ((char-db-ignored-attributes
874                  (cons '<-subsumptive
875                        char-db-ignored-attributes)))
876             (insert-char-attributes cell readable nil nil 'for-sub-node))
877           (setq separator lbs))
878         )
879        ((characterp cell)
880         (setq sources
881               (get-char-attribute
882                char (intern (format "%s*sources" name))))
883         (setq required-features nil)
884         (dolist (source sources)
885           (cond
886            ((memq source '(JP
887                            JP/Jouyou
888                            shinjigen shinjigen@1ed shinjigen@rev))
889             (setq required-features
890                   (union required-features
891                          '(=jis-x0208
892                            =jis-x0208@1990
893                            =jis-x0213-1@2000
894                            =jis-x0213-1@2004
895                            =jis-x0213-2
896                            =jis-x0212
897                            =jis-x0208@1983
898                            =jis-x0208@1978
899                            =shinjigen
900                            =shinjigen@1ed
901                            =shinjigen@rev
902                            =shinjigen/+p@rev))))
903            ((eq source 'CN)
904             (setq required-features
905                   (union required-features
906                          '(=gb2312
907                            =gb12345
908                            =iso-ir165)))))
909           (cond
910            ((find-charset (setq ret (intern (format "=%s" source))))
911             (setq required-features
912                   (cons ret required-features)))
913            (t (setq required-features
914                     (cons source required-features)))))
915         (cond ((string-match "@JP" (symbol-name name))
916                (setq required-features
917                      (union required-features
918                             '(=jis-x0208
919                               =jis-x0208@1990
920                               =jis-x0213-1-2000
921                               =jis-x0213-2-2000
922                               =jis-x0212
923                               =jis-x0208@1983
924                               =jis-x0208@1978))))
925               ((string-match "@CN" (symbol-name name))
926                (setq required-features
927                      (union required-features
928                             '(=gb2312
929                               =gb12345
930                               =iso-ir165)))))
931         (if separator
932             (insert lbs))
933         (if readable
934             (insert (format "%S" cell))
935           (char-db-insert-char-spec cell readable
936                                     nil
937                                     required-features))
938         (setq separator lbs))
939        ((consp cell)
940         (if separator
941             (insert lbs))
942         (if (consp (car cell))
943             (char-db-insert-char-spec cell readable)
944           (char-db-insert-char-reference cell readable))
945         (setq separator lbs))
946        (t
947         (if separator
948             (insert separator))
949         (insert (prin1-to-string cell))
950         (setq separator " ")))
951       (setq value (cdr value)))
952     (insert ")")
953     (insert line-breaking)))
954
955 (defun insert-char-attributes (char &optional readable attributes column
956                                     for-sub-node)
957   (unless column
958     (setq column (current-column)))
959   (let (name value ; has-long-ccs-name
960         rest
961         radical strokes
962         (line-breaking
963          (concat "\n" (make-string (1+ column) ?\ )))
964         lbs cell separator ret
965         key al cal
966         dest-ccss ; sources required-features
967         ccss)
968     (let (atr-d)
969       (setq attributes
970             (sort (if attributes
971                       (if (consp attributes)
972                           (progn
973                             (dolist (name attributes)
974                               (unless (memq name char-db-ignored-attributes)
975                                 (if (find-charset name)
976                                     (push name ccss))
977                                 (push name atr-d)))
978                             atr-d))
979                     (dolist (name (char-attribute-list))
980                       (unless (memq name char-db-ignored-attributes)
981                         (if (find-charset name)
982                             (push name ccss))
983                         (push name atr-d)))
984                     atr-d)
985                   #'char-attribute-name<)))
986     (insert "(")
987     (when (memq '<-subsumptive attributes)
988       (when (or readable (not for-sub-node))
989         (when (setq value (get-char-attribute char '<-subsumptive))
990           (char-db-insert-relation-feature char '<-subsumptive value
991                                            line-breaking
992                                            ccss readable)))
993       (setq attributes (delq '<-subsumptive attributes)))
994     (when (and (memq '<-denotational attributes)
995                (setq value (get-char-attribute char '<-denotational)))
996       (char-db-insert-relation-feature char '<-denotational value
997                                        line-breaking
998                                        ccss readable)
999       (setq attributes (delq '<-denotational attributes)))
1000     (when (and (memq '<-denotational@component attributes)
1001                (setq value (get-char-attribute char '<-denotational@component)))
1002       (char-db-insert-relation-feature char '<-denotational@component value
1003                                        line-breaking
1004                                        ccss readable)
1005       (setq attributes (delq '<-denotational@component attributes)))
1006     (when (and (memq '<-denotational@usage attributes)
1007                (setq value (get-char-attribute char '<-denotational@usage)))
1008       (char-db-insert-relation-feature char '<-denotational@usage value
1009                                        line-breaking
1010                                        ccss readable)
1011       (setq attributes (delq '<-denotational@usage attributes)))
1012     (when (and (memq 'name attributes)
1013                (setq value (get-char-attribute char 'name)))
1014       (insert (format
1015                (if (> (+ (current-column) (length value)) 48)
1016                    "(name . %S)%s"
1017                  "(name               . %S)%s")
1018                value line-breaking))
1019       (setq attributes (delq 'name attributes))
1020       )
1021     (when (and (memq 'name* attributes)
1022                (setq value (get-char-attribute char 'name*)))
1023       (insert (format
1024                (if (> (+ (current-column) (length value)) 48)
1025                    "(name* . %S)%s"
1026                  "(name*              . %S)%s")
1027                value line-breaking))
1028       (setq attributes (delq 'name* attributes))
1029       )
1030     (when (and (memq 'script attributes)
1031                (setq value (get-char-attribute char 'script)))
1032       (insert (format "(script\t\t%s)%s"
1033                       (mapconcat (function prin1-to-string)
1034                                  value " ")
1035                       line-breaking))
1036       (setq attributes (delq 'script attributes))
1037       )
1038     (dolist (name '(=>ucs =>ucs*))
1039       (when (and (memq name attributes)
1040                  (setq value (get-char-attribute char name)))
1041         (insert (format "(%-18s . #x%04X)\t; %c%s"
1042                         name value (decode-char '=ucs value)
1043                         line-breaking))
1044         (setq attributes (delq name attributes))))
1045     (dolist (name '(=>ucs@gb =>ucs@big5))
1046       (when (and (memq name attributes)
1047                  (setq value (get-char-attribute char name)))
1048         (insert (format "(%-18s . #x%04X)\t; %c%s"
1049                         name value
1050                         (decode-char (intern
1051                                       (concat "="
1052                                               (substring
1053                                                (symbol-name name) 2)))
1054                                      value)
1055                         line-breaking))
1056         (setq attributes (delq name attributes))
1057         ))
1058     ;; (dolist (name '(=>daikanwa))
1059     ;;   (when (and (memq name attributes)
1060     ;;              (setq value (get-char-attribute char name)))
1061     ;;     (insert
1062     ;;      (if (integerp value)
1063     ;;          (format "(%-18s . %05d)\t; %c%s"
1064     ;;                  name value (decode-char '=daikanwa value)
1065     ;;                  line-breaking)
1066     ;;        (format "(%-18s %s)\t; %c%s"
1067     ;;                name
1068     ;;                (mapconcat (function prin1-to-string)
1069     ;;                           value " ")
1070     ;;                (char-representative-of-daikanwa char)
1071     ;;                line-breaking)))
1072     ;;     (setq attributes (delq name attributes))))
1073     (when (and (memq 'general-category attributes)
1074                (setq value (get-char-attribute char 'general-category)))
1075       (insert (format
1076                "(general-category\t%s) ; %s%s"
1077                (mapconcat (lambda (cell)
1078                             (format "%S" cell))
1079                           value " ")
1080                (cond ((rassoc value unidata-normative-category-alist)
1081                       "Normative Category")
1082                      ((rassoc value unidata-informative-category-alist)
1083                       "Informative Category")
1084                      (t
1085                       "Unknown Category"))
1086                line-breaking))
1087       (setq attributes (delq 'general-category attributes))
1088       )
1089     (when (and (memq 'bidi-category attributes)
1090                (setq value (get-char-attribute char 'bidi-category)))
1091       (insert (format "(bidi-category\t. %S)%s"
1092                       value
1093                       line-breaking))
1094       (setq attributes (delq 'bidi-category attributes))
1095       )
1096     (unless (or (not (memq 'mirrored attributes))
1097                 (eq (setq value (get-char-attribute char 'mirrored 'empty))
1098                     'empty))
1099       (insert (format "(mirrored\t\t. %S)%s"
1100                       value
1101                       line-breaking))
1102       (setq attributes (delq 'mirrored attributes))
1103       )
1104     (cond
1105      ((and (memq 'decimal-digit-value attributes)
1106            (setq value (get-char-attribute char 'decimal-digit-value)))
1107       (insert (format "(decimal-digit-value . %S)%s"
1108                       value
1109                       line-breaking))
1110       (setq attributes (delq 'decimal-digit-value attributes))
1111       (when (and (memq 'digit-value attributes)
1112                  (setq value (get-char-attribute char 'digit-value)))
1113         (insert (format "(digit-value\t . %S)%s"
1114                         value
1115                         line-breaking))
1116         (setq attributes (delq 'digit-value attributes))
1117         )
1118       (when (and (memq 'numeric-value attributes)
1119                  (setq value (get-char-attribute char 'numeric-value)))
1120         (insert (format "(numeric-value\t . %S)%s"
1121                         value
1122                         line-breaking))
1123         (setq attributes (delq 'numeric-value attributes))
1124         )
1125       )
1126      (t
1127       (when (and (memq 'digit-value attributes)
1128                  (setq value (get-char-attribute char 'digit-value)))
1129         (insert (format "(digit-value\t. %S)%s"
1130                         value
1131                         line-breaking))
1132         (setq attributes (delq 'digit-value attributes))
1133         )
1134       (when (and (memq 'numeric-value attributes)
1135                  (setq value (get-char-attribute char 'numeric-value)))
1136         (insert (format "(numeric-value\t. %S)%s"
1137                         value
1138                         line-breaking))
1139         (setq attributes (delq 'numeric-value attributes))
1140         )))
1141     (when (and (memq 'iso-10646-comment attributes)
1142                (setq value (get-char-attribute char 'iso-10646-comment)))
1143       (insert (format "(iso-10646-comment\t. %S)%s"
1144                       value
1145                       line-breaking))
1146       (setq attributes (delq 'iso-10646-comment attributes))
1147       )
1148     (when (and (memq 'morohashi-daikanwa attributes)
1149                (setq value (get-char-attribute char 'morohashi-daikanwa)))
1150       (insert (format "(morohashi-daikanwa\t%s)%s"
1151                       (mapconcat (function prin1-to-string) value " ")
1152                       line-breaking))
1153       (setq attributes (delq 'morohashi-daikanwa attributes))
1154       )
1155     (setq radical nil
1156           strokes nil)
1157     (when (and (memq 'ideographic-radical attributes)
1158                (setq value (get-char-attribute char 'ideographic-radical)))
1159       (setq radical value)
1160       (insert (format "(ideographic-radical . %S)\t; %c%s"
1161                       radical
1162                       (ideographic-radical radical)
1163                       line-breaking))
1164       (setq attributes (delq 'ideographic-radical attributes))
1165       )
1166     (when (and (memq 'shuowen-radical attributes)
1167                (setq value (get-char-attribute char 'shuowen-radical)))
1168       (insert (format "(shuowen-radical\t. %S)\t; %c%s"
1169                       value
1170                       (shuowen-radical value)
1171                       line-breaking))
1172       (setq attributes (delq 'shuowen-radical attributes))
1173       )
1174     (let (key)
1175       (dolist (domain
1176                (append
1177                 char-db-feature-domains
1178                 (let (dest domain)
1179                   (dolist (feature (char-attribute-list))
1180                     (setq feature (symbol-name feature))
1181                     (when (string-match
1182                            "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
1183                            feature)
1184                       (setq domain (intern (match-string 2 feature)))
1185                      (unless (memq domain dest)
1186                        (setq dest (cons domain dest)))))
1187                   (sort dest #'string<))))
1188         (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
1189         (when (and (memq key attributes)
1190                    (setq value (get-char-attribute char key)))
1191           (setq radical value)
1192           (insert (format "(%s . %S)\t; %c%s"
1193                           key
1194                           radical
1195                           (ideographic-radical radical)
1196                           line-breaking))
1197           (setq attributes (delq key attributes))
1198           )
1199         (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
1200         (when (and (memq key attributes)
1201                    (setq value (get-char-attribute char key)))
1202           (setq strokes value)
1203           (insert (format "(%s . %S)%s"
1204                           key
1205                           strokes
1206                           line-breaking))
1207           (setq attributes (delq key attributes))
1208           )
1209         (setq key (intern (format "%s@%s" 'total-strokes domain)))
1210         (when (and (memq key attributes)
1211                    (setq value (get-char-attribute char key)))
1212           (insert (format "(%s       . %S)%s"
1213                           key
1214                           value
1215                           line-breaking))
1216           (setq attributes (delq key attributes))
1217           )
1218         (dolist (feature '(ideographic-radical
1219                            ideographic-strokes
1220                            total-strokes))
1221           (setq key (intern (format "%s@%s*sources" feature domain)))
1222           (when (and (memq key attributes)
1223                      (setq value (get-char-attribute char key)))
1224             (insert (format "(%s%s" key line-breaking))
1225             (dolist (cell value)
1226               (insert (format " %s" cell)))
1227             (insert ")")
1228             (insert line-breaking)
1229             (setq attributes (delq key attributes))
1230             ))
1231         ))
1232     (when (and (memq 'ideographic-strokes attributes)
1233                (setq value (get-char-attribute char 'ideographic-strokes)))
1234       (setq strokes value)
1235       (insert (format "(ideographic-strokes . %S)%s"
1236                       strokes
1237                       line-breaking))
1238       (setq attributes (delq 'ideographic-strokes attributes))
1239       )
1240     (when (and (memq 'kangxi-radical attributes)
1241                (setq value (get-char-attribute char 'kangxi-radical)))
1242       (unless (eq value radical)
1243         (insert (format "(kangxi-radical\t . %S)\t; %c%s"
1244                         value
1245                         (ideographic-radical value)
1246                         line-breaking))
1247         (or radical
1248             (setq radical value)))
1249       (setq attributes (delq 'kangxi-radical attributes))
1250       )
1251     (when (and (memq 'kangxi-strokes attributes)
1252                (setq value (get-char-attribute char 'kangxi-strokes)))
1253       (unless (eq value strokes)
1254         (insert (format "(kangxi-strokes\t . %S)%s"
1255                         value
1256                         line-breaking))
1257         (or strokes
1258             (setq strokes value)))
1259       (setq attributes (delq 'kangxi-strokes attributes))
1260       )
1261     ;; (when (and (memq 'japanese-radical attributes)
1262     ;;            (setq value (get-char-attribute char 'japanese-radical)))
1263     ;;   (unless (eq value radical)
1264     ;;     (insert (format "(japanese-radical\t . %S)\t; %c%s"
1265     ;;                     value
1266     ;;                     (ideographic-radical value)
1267     ;;                     line-breaking))
1268     ;;     (or radical
1269     ;;         (setq radical value)))
1270     ;;   (setq attributes (delq 'japanese-radical attributes))
1271     ;;   )
1272     (when (and (memq 'japanese-strokes attributes)
1273                (setq value (get-char-attribute char 'japanese-strokes)))
1274       (unless (eq value strokes)
1275         (insert (format "(japanese-strokes\t . %S)%s"
1276                         value
1277                         line-breaking))
1278         (or strokes
1279             (setq strokes value)))
1280       (setq attributes (delq 'japanese-strokes attributes))
1281       )
1282     (when (and (memq 'cns-radical attributes)
1283                (setq value (get-char-attribute char 'cns-radical)))
1284       (insert (format "(cns-radical\t . %S)\t; %c%s"
1285                       value
1286                       (ideographic-radical value)
1287                       line-breaking))
1288       (setq attributes (delq 'cns-radical attributes))
1289       )
1290     (when (and (memq 'cns-strokes attributes)
1291                (setq value (get-char-attribute char 'cns-strokes)))
1292       (unless (eq value strokes)
1293         (insert (format "(cns-strokes\t . %S)%s"
1294                         value
1295                         line-breaking))
1296         (or strokes
1297             (setq strokes value)))
1298       (setq attributes (delq 'cns-strokes attributes))
1299       )
1300     ;; (when (and (memq 'ideographic- attributes)
1301     ;;            (setq value (get-char-attribute char 'ideographic-)))
1302     ;;   (insert "(ideographic-       ")
1303     ;;   (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1304     ;;         separator nil)
1305     ;;   (while (consp value)
1306     ;;     (setq cell (car value))
1307     ;;     (if (integerp cell)
1308     ;;         (setq cell (decode-char '=ucs cell)))
1309     ;;     (cond ((characterp cell)
1310     ;;            (if separator
1311     ;;                (insert lbs))
1312     ;;            (if readable
1313     ;;                (insert (format "%S" cell))
1314     ;;              (char-db-insert-char-spec cell readable))
1315     ;;            (setq separator lbs))
1316     ;;           ((consp cell)
1317     ;;            (if separator
1318     ;;                (insert lbs))
1319     ;;            (if (consp (car cell))
1320     ;;                (char-db-insert-char-spec cell readable)
1321     ;;              (char-db-insert-char-reference cell readable))
1322     ;;            (setq separator lbs))
1323     ;;           (t
1324     ;;            (if separator
1325     ;;                (insert separator))
1326     ;;            (insert (prin1-to-string cell))
1327     ;;            (setq separator " ")))
1328     ;;     (setq value (cdr value)))
1329     ;;   (insert ")")
1330     ;;   (insert line-breaking)
1331     ;;   (setq attributes (delq 'ideographic- attributes)))
1332     (when (and (memq 'total-strokes attributes)
1333                (setq value (get-char-attribute char 'total-strokes)))
1334       (insert (format "(total-strokes       . %S)%s"
1335                       value
1336                       line-breaking))
1337       (setq attributes (delq 'total-strokes attributes))
1338       )
1339     ;; (when (and (memq '->ideograph attributes)
1340     ;;            (setq value (get-char-attribute char '->ideograph)))
1341     ;;   (insert (format "(->ideograph\t%s)%s"
1342     ;;                   (mapconcat (lambda (code)
1343     ;;                                (cond ((symbolp code)
1344     ;;                                       (symbol-name code))
1345     ;;                                      ((integerp code)
1346     ;;                                       (format "#x%04X" code))
1347     ;;                                      (t
1348     ;;                                       (format "%s %S"
1349     ;;                                               line-breaking code))))
1350     ;;                              value " ")
1351     ;;                   line-breaking))
1352     ;;   (setq attributes (delq '->ideograph attributes))
1353     ;;   )
1354     ;; (when (and (memq '->decomposition attributes)
1355     ;;            (setq value (get-char-attribute char '->decomposition)))
1356     ;;   (insert (format "(->decomposition\t%s)%s"
1357     ;;                   (mapconcat (lambda (code)
1358     ;;                                (cond ((symbolp code)
1359     ;;                                       (symbol-name code))
1360     ;;                                      ((characterp code)
1361     ;;                                       (if readable
1362     ;;                                           (format "%S" code)
1363     ;;                                         (format "#x%04X"
1364     ;;                                                 (char-int code))
1365     ;;                                         ))
1366     ;;                                      ((integerp code)
1367     ;;                                       (format "#x%04X" code))
1368     ;;                                      (t
1369     ;;                                       (format "%s%S" line-breaking code))))
1370     ;;                              value " ")
1371     ;;                   line-breaking))
1372     ;;   (setq attributes (delq '->decomposition attributes))
1373     ;;   )
1374     (if (equal (get-char-attribute char '->titlecase)
1375                (get-char-attribute char '->uppercase))
1376         (setq attributes (delq '->titlecase attributes)))
1377     (when (and (memq '->mojikyo attributes)
1378                (setq value (get-char-attribute char '->mojikyo)))
1379       (insert (format "(->mojikyo\t\t. %06d)\t; %c%s"
1380                       value (decode-char 'mojikyo value)
1381                       line-breaking))
1382       (setq attributes (delq '->mojikyo attributes))
1383       )
1384     (unless readable
1385       (dolist (ignored '(composition
1386                          ->denotational <-subsumptive ->ucs-unified
1387                          ;; ->ideographic-component-forms
1388                          ))
1389         (setq attributes (delq ignored attributes))))
1390     (while attributes
1391       (setq name (car attributes))
1392       (unless (eq (setq value (get-char-attribute char name 'value-is-empty))
1393                   'value-is-empty)
1394         (cond ((setq ret (find-charset name))
1395                (setq name (charset-name ret))
1396                (when (not (memq name dest-ccss))
1397                  (setq dest-ccss (cons name dest-ccss))
1398                  (char-db-insert-ccs-feature name value line-breaking))
1399                )
1400               ((string-match "^=>ucs@" (symbol-name name))
1401                (insert (format "(%-18s . #x%04X)\t; %c%s"
1402                                name value (decode-char '=ucs value)
1403                                line-breaking))
1404                )
1405               ((eq name 'jisx0208-1978/4X)
1406                (insert (format "(%-18s . #x%04X)%s"
1407                                name value
1408                                line-breaking))
1409                )
1410               ((and
1411                 (not readable)
1412                 (not (eq name '->subsumptive))
1413                 (not (eq name '->uppercase))
1414                 (not (eq name '->lowercase))
1415                 (not (eq name '->titlecase))
1416                 (not (eq name '->canonical))
1417                 (not (eq name '->Bopomofo))
1418                 (not (eq name '->mistakable))
1419                 (not (eq name '->ideographic-variants))
1420                 (null (get-char-attribute
1421                        char (intern (format "%s*sources" name))))
1422                 (not (string-match "\\*sources$" (symbol-name name)))
1423                 (null (get-char-attribute
1424                        char (intern (format "%s*note" name))))
1425                 (not (string-match "\\*note$" (symbol-name name)))
1426                 (or (eq name '<-identical)
1427                     (eq name '<-uppercase)
1428                     (eq name '<-lowercase)
1429                     (eq name '<-titlecase)
1430                     (eq name '<-canonical)
1431                     (eq name '<-ideographic-variants)
1432                     ;; (eq name '<-synonyms)
1433                     (string-match "^<-synonyms" (symbol-name name))
1434                     (eq name '<-mistakable)
1435                     (when (string-match "^->" (symbol-name name))
1436                       (cond
1437                        ((string-match "^->fullwidth" (symbol-name name))
1438                         (not (and (consp value)
1439                                   (characterp (car value))
1440                                   (encode-char
1441                                    (car value) '=ucs 'defined-only)))
1442                         )
1443                        (t)))
1444                     ))
1445                )
1446               ((or (eq name 'ideographic-structure)
1447                    (eq name 'ideographic-structure@apparent)
1448                    (eq name 'ideographic-structure@apparent/leftmost)
1449                    (eq name 'ideographic-structure@apparent/rightmost)
1450                    (eq name 'ideographic-combination)
1451                    ;; (eq name 'ideographic-)
1452                    (eq name '=decomposition)
1453                    (char-feature-base-name= '=decomposition name)
1454                    (char-feature-base-name= '=>decomposition name)
1455                    ;; (string-match "^=>*decomposition\\(@[^*]+\\)?$"
1456                    ;;               (symbol-name name))
1457                    (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name name))
1458                    (string-match "^\\(->\\|<-\\)[^*]*\\*sources$"
1459                                  (symbol-name name))
1460                    )
1461                (char-db-insert-relation-feature char name value
1462                                                 line-breaking
1463                                                 ccss readable))
1464               ;; ((memq name '(ideograph=
1465               ;;               original-ideograph-of
1466               ;;               ancient-ideograph-of
1467               ;;               vulgar-ideograph-of
1468               ;;               wrong-ideograph-of
1469               ;;               ;; simplified-ideograph-of
1470               ;;               ideographic-variants
1471               ;;               ;; ideographic-different-form-of
1472               ;;               ))
1473               ;;  (insert (format "(%-18s%s " name line-breaking))
1474               ;;  (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1475               ;;        separator nil)
1476               ;;  (while (consp value)
1477               ;;    (setq cell (car value))
1478               ;;    (if (and (consp cell)
1479               ;;             (consp (car cell)))
1480               ;;        (progn
1481               ;;          (if separator
1482               ;;              (insert lbs))
1483               ;;          (char-db-insert-alist cell readable)
1484               ;;          (setq separator lbs))
1485               ;;      (if separator
1486               ;;          (insert separator))
1487               ;;      (insert (prin1-to-string cell))
1488               ;;      (setq separator " "))
1489               ;;    (setq value (cdr value)))
1490               ;;  (insert ")")
1491               ;;  (insert line-breaking))
1492               ((consp value)
1493                (insert (format "(%-18s " name))
1494                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1495                      separator nil)
1496                (while (consp value)
1497                  (setq cell (car value))
1498                  (if (and (consp cell)
1499                           (consp (car cell))
1500                           (setq ret (condition-case nil
1501                                         (find-char cell)
1502                                       (error nil))))
1503                      (progn
1504                        (setq rest cell
1505                              al nil
1506                              cal nil)
1507                        (while rest
1508                          (setq key (car (car rest)))
1509                          (if (find-charset key)
1510                              (setq cal (cons key cal))
1511                            (setq al (cons key al)))
1512                          (setq rest (cdr rest)))
1513                        (if separator
1514                            (insert lbs))
1515                        (insert-char-attributes ret
1516                                                readable
1517                                                al ; cal
1518                                                nil 'for-sub-node)
1519                        (setq separator lbs))
1520                    (setq ret (prin1-to-string cell))
1521                    (if separator
1522                        (if (< (+ (current-column)
1523                                  (length ret)
1524                                  (length separator))
1525                               76)
1526                            (insert separator)
1527                          (insert lbs)))
1528                    (insert ret)
1529                    (setq separator " "))
1530                  (setq value (cdr value)))
1531                (insert ")")
1532                (insert line-breaking))
1533               (t
1534                (insert (format "(%-18s" name))
1535                (setq ret (prin1-to-string value))
1536                (unless (< (+ (current-column)
1537                              (length ret)
1538                              3)
1539                           76)
1540                  (insert line-breaking))
1541                (insert " . " ret ")" line-breaking)
1542                ;; (insert (format "(%-18s . %S)%s"
1543                ;;                 name value
1544                ;;                 line-breaking))
1545                )
1546               ))
1547       (setq attributes (cdr attributes)))
1548     (insert ")")))
1549
1550 (defun insert-char-data (char &optional readable
1551                               attributes)
1552   (save-restriction
1553     (narrow-to-region (point)(point))
1554     (insert "(define-char
1555   '")
1556     (insert-char-attributes char readable attributes)
1557     (insert ")\n")
1558     (goto-char (point-min))
1559     (while (re-search-forward "[ \t]+$" nil t)
1560       (replace-match ""))
1561     ;; from tabify.
1562     (goto-char (point-min))
1563     (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
1564       (let ((column (current-column))
1565             (indent-tabs-mode t))
1566         (delete-region (match-beginning 0) (point))
1567         (indent-to column)))
1568     (goto-char (point-max))
1569     ;; (tabify (point-min)(point-max))
1570     ))
1571
1572 (defun insert-char-data-with-variant (char &optional printable
1573                                            no-ucs-unified
1574                                            script excluded-script)
1575   (insert-char-data char printable)
1576   (let ((variants (char-variants char))
1577         rest
1578         variant vs ret)
1579     (setq variants (sort variants #'<))
1580     (setq rest variants)
1581     (setq variants (cons char variants))
1582     (while rest
1583       (setq variant (car rest))
1584       (unless (get-char-attribute variant '<-subsumptive)
1585         (if (and (or (null script)
1586                      (null (setq vs (get-char-attribute variant 'script)))
1587                      (memq script vs))
1588                  (or (null excluded-script)
1589                      (null (setq vs (get-char-attribute variant 'script)))
1590                      (not (memq excluded-script vs))))
1591             (unless (and no-ucs-unified (get-char-attribute variant '=ucs))
1592               (insert-char-data variant printable)
1593               (if (setq ret (char-variants variant))
1594                   (while ret
1595                     (or (memq (car ret) variants)
1596                         ;; (get-char-attribute (car ret) '<-subsumptive)
1597                         (setq rest (nconc rest (list (car ret)))))
1598                     (setq ret (cdr ret)))))))
1599       (setq rest (cdr rest)))))
1600
1601 (defun insert-char-range-data (min max &optional script excluded-script)
1602   (let ((code min)
1603         char)
1604     (while (<= code max)
1605       (setq char (decode-char '=ucs code))
1606       (if (encode-char char '=ucs 'defined-only)
1607           (insert-char-data-with-variant char nil 'no-ucs-unified
1608                                          script excluded-script))
1609       (setq code (1+ code)))))
1610
1611 (defun write-char-range-data-to-file (min max file
1612                                           &optional script excluded-script)
1613   (let ((coding-system-for-write char-db-file-coding-system))
1614     (with-temp-buffer
1615       (insert (format ";; -*- coding: %s -*-\n"
1616                       char-db-file-coding-system))
1617       (insert-char-range-data min max script excluded-script)
1618       (write-region (point-min)(point-max) file))))
1619
1620 (defvar what-character-original-window-configuration)
1621
1622 ;;;###autoload
1623 (defun what-char-definition (char)
1624   (interactive (list (char-after)))
1625   (let ((buf (get-buffer-create "*Character Description*"))
1626         (the-buf (current-buffer))
1627         (win-conf (current-window-configuration)))
1628     (pop-to-buffer buf)
1629     (make-local-variable 'what-character-original-window-configuration)
1630     (setq what-character-original-window-configuration win-conf)
1631     (setq buffer-read-only nil)
1632     (erase-buffer)
1633     (condition-case err
1634         (progn
1635           (insert-char-data-with-variant char 'printable)
1636           (unless (char-attribute-alist char)
1637             (insert (format ";; = %c\n"
1638                             (let* ((rest (split-char char))
1639                                    (ccs (pop rest))
1640                                    (code (pop rest)))
1641                               (while rest
1642                                 (setq code (logior (lsh code 8)
1643                                                    (pop rest))))
1644                               (decode-char ccs code)))))
1645           ;; (char-db-update-comment)
1646           (set-buffer-modified-p nil)
1647           (view-mode the-buf (lambda (buf)
1648                                (set-window-configuration
1649                                 what-character-original-window-configuration)
1650                                ))
1651           (goto-char (point-min)))
1652       (error (progn
1653                (set-window-configuration
1654                 what-character-original-window-configuration)
1655                (signal (car err) (cdr err)))))))
1656
1657
1658 ;;; @ end
1659 ;;;
1660
1661 (provide 'char-db-util)
1662
1663 ;;; char-db-util.el ends here