c79d6300ecab0efe64ff604961c152ae45befc35
[chise/xemacs-chise.git] / lisp / utf-2000 / char-db-util.el
1 ;;; char-db-util.el --- Character Database utility -*- coding: utf-8-er; -*-
2
3 ;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,
4 ;;   2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 MORIOKA Tomohiko.
5
6 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
7 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
8
9 ;; This file is part of XEmacs CHISE.
10
11 ;; XEmacs CHISE is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
15
16 ;; XEmacs CHISE is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
23 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Code:
27
28 (require 'chise-subr)
29 (require 'ideograph-subr)
30
31 (defconst unidata-normative-category-alist
32   '(("Lu" letter        uppercase)
33     ("Ll" letter        lowercase)
34     ("Lt" letter        titlecase)
35     ("Mn" mark          non-spacing)
36     ("Mc" mark          spacing-combining)
37     ("Me" mark          enclosing)
38     ("Nd" number        decimal-digit)
39     ("Nl" number        letter)
40     ("No" number        other)
41     ("Zs" separator     space)
42     ("Zl" separator     line)
43     ("Zp" separator     paragraph)
44     ("Cc" other         control)
45     ("Cf" other         format)
46     ("Cs" other         surrogate)
47     ("Co" other         private-use)
48     ("Cn" other         not-assigned)))
49
50 (defconst unidata-informative-category-alist
51   '(("Lm" letter        modifier)
52     ("Lo" letter        other)
53     ("Pc" punctuation   connector)
54     ("Pd" punctuation   dash)
55     ("Ps" punctuation   open)
56     ("Pe" punctuation   close)
57     ("Pi" punctuation   initial-quote)
58     ("Pf" punctuation   final-quote)
59     ("Po" punctuation   other)
60     ("Sm" symbol        math)
61     ("Sc" symbol        currency)
62     ("Sk" symbol        modifier)
63     ("So" symbol        other)
64     ))
65
66 (defconst shuowen-radicals
67   [?一 ?上 ?示 ?三 ?王 ?玉 ?玨 ?气 ?士 ?丨 ; 010
68    ?屮 ?艸 ?蓐 ?茻 ?小 ?八 ?釆 ?半 ?牛 ?犛 ; 020
69    ?告 ?口 ?凵 ?吅 ?哭 ?走 ?止 ?癶 ?步 ?此 ; 030
70    ?正 ?是 ?辵 ?彳 ?廴 ?㢟 ?行 ?齒 ?牙 ?足 ; 040
71    ?疋 ?品 ?龠 ?冊 ?㗊 ?舌 ?干 ?𧮫 ?只 ?㕯 ; 050
72    ?句 ?丩 ?古 ?十 ?卅 ?言 ?誩 ?音 ?䇂 ?丵 ; 060
73    ?菐 ?廾 ?𠬜 ?共 ?異 ?舁 ?𦥑 ?䢅 ?爨 ?革 ; 070
74    ?鬲 ?䰜 ?爪 ?𠃨 ?鬥 ?又 ?𠂇 ?史 ?支 ?𦘒 ; 080
75    ?聿 ?畫 ?隶 ?臤 ?臣 ?殳 ?殺 ?𠘧 ?寸 ?皮 ; 090
76    ?㼱 ?攴 ?敎 ?卜 ?用 ?爻 ?㸚 ?𥄎 ?目 ?䀠 ; 100
77    ?眉 ?盾 ?自 ?白 ?鼻 ?皕 ?習 ?羽 ?隹 ?奞 ; 110
78    ?雈 ?𦫳 ?𥄕 ?羊 ?羴 ?瞿 ?雔 ?雥 ?鳥 ?烏 ; 120
79    ?𠦒 ?冓 ?幺 ?𢆶 ?叀 ?玄 ?予 ?放 ?𠬪 ?𣦼 ; 130
80    ?歺 ?死 ?冎 ?骨 ?肉 ?筋 ?刀 ?刃 ?㓞 ?丯 ; 140
81    ?耒 ?角 ?竹 ?箕 ?丌 ?左 ?工 ?㠭 ?巫 ?甘 ; 150
82    ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?旨 ?喜 ?壴 ; 160
83 ;  ?旨 ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?喜 ?壴 ; 160
84    ?鼓 ?豈 ?豆 ?豊 ?豐 ?䖒 ?虍 ?虎 ?虤 ?皿 ; 170
85    ?𠙴 ?去 ?血 ?丶 ?丹 ?青 ?井 ?皀 ?鬯 ?食 ; 180
86    ?亼 ?會 ?倉 ?入 ?缶 ?矢 ?高 ?冂 ?𩫏 ?京 ; 190
87    ?亯 ?𣆪 ?畗 ?㐭 ?嗇 ?來 ?麥 ?夊 ?舛 ?䑞 ; 200
88    ?韋 ?弟 ?夂 ?久 ?桀 ?木 ?東 ?林 ?才 ?叒 ; 210
89    ?之 ?帀 ?出 ?𣎵 ?生 ?乇 ?𠂹 ?𠌶 ?華 ?𥝌 ; 220
90    ?稽 ?巢 ?桼 ?束 ?㯻 ?囗 ?員 ?貝 ?邑 ?𨛜 ; 230
91    ?日 ?旦 ?倝 ?㫃 ?冥 ?晶 ?月 ?有 ?明 ?囧 ; 240
92    ?夕 ?多 ?毌 ?𢎘 ?𣐺 ?𠧪 ?齊 ?朿 ?片 ?鼎 ; 250
93    ?克 ?彔 ?禾 ?秝 ?黍 ?香 ?米 ?毇 ?臼 ?凶 ; 260
94    ?𣎳 ?𣏟 ?麻 ?尗 ?耑 ?韭 ?瓜 ?瓠 ?宀 ?宮 ; 270
95    ?呂 ?穴 ?㝱 ?疒 ?冖 ?𠔼 ?冃 ?㒳 ?网 ?襾 ; 280
96    ?巾 ?巿 ?帛 ?白 ?㡀 ?黹 ?人 ?𠤎 ?匕 ?从 ; 290
97    ?比 ?北 ?丘 ?㐺 ?𡈼 ?重 ?臥 ?身 ?㐆 ?衣 ; 300
98    ?裘 ?老 ?毛 ?毳 ?尸 ?尺 ?尾 ?履 ?舟 ?方 ; 310
99    ?儿 ?兄 ?兂 ?皃 ?𠑹 ?先 ?秃 ?見 ?覞 ?欠 ; 320
100    ?㱃 ?㳄 ?旡 ?頁 ?𦣻 ?面 ?丏 ?首 ?𥄉 ?須 ; 330
101    ?彡 ?彣 ?文 ?髟 ?后 ?司 ?卮 ?卩 ?印 ?色 ; 340
102    ?𠨍 ?辟 ?勹 ?包 ?茍 ?鬼 ?甶 ?厶 ?嵬 ?山 ; 350
103    ?屾 ?屵 ?广 ?厂 ?丸 ?危 ?石 ?長 ?勿 ?冄 ; 360
104    ?而 ?豕 ?㣇 ?彑 ?豚 ?豸 ?𤉡 ?易 ?象 ?馬 ; 370
105    ?𢊁 ?鹿 ?麤 ?㲋 ?兔 ?萈 ?犬 ?㹜 ?鼠 ?能 ; 380
106    ?熊 ?火 ?炎 ?黑 ?囪 ?焱 ?炙 ?赤 ?大 ?亦 ; 390
107    ?夨 ?夭 ?交 ?尣 ?壺 ?壹 ?㚔 ?奢 ?亢 ?夲 ; 400
108    ])
109
110 (defun shuowen-radical (number)
111   (aref shuowen-radicals (1- number)))
112
113 (defvar char-db-file-coding-system 'utf-8-mcs-er)
114
115 (defvar char-db-ignored-attributes '(ideographic-products))
116
117 (defvar char-db-coded-charset-priority-list
118   '(ascii
119     control-1
120     latin-iso8859-1
121     latin-iso8859-2
122     latin-iso8859-3
123     latin-iso8859-4
124     latin-iso8859-9
125     latin-jisx0201
126     cyrillic-iso8859-5
127     greek-iso8859-7
128     thai-tis620
129     ;; =mj
130     ;; =adobe-japan1-0
131     ;; =adobe-japan1-1
132     ;; =adobe-japan1-2
133     ;; =adobe-japan1-3
134     ;; =adobe-japan1-4
135     ;; =adobe-japan1-5
136     ;; =adobe-japan1-6
137     =jis-x0208
138     =jis-x0208@1978
139     =jis-x0208@1983
140     japanese-jisx0212
141     chinese-gb2312
142     =jis-x0208@1990
143     chinese-cns11643-1
144     chinese-cns11643-2
145     chinese-cns11643-3
146     chinese-cns11643-4
147     chinese-cns11643-5
148     chinese-cns11643-6
149     chinese-cns11643-7
150     =jis-x0213-1
151     =jis-x0213-1@2000
152     =jis-x0213-1@2004
153     =jis-x0213-2
154     korean-ksc5601
155     chinese-isoir165
156     katakana-jisx0201
157     hebrew-iso8859-8
158     chinese-gb12345
159     latin-viscii
160     ethiopic-ucs
161     =big5-cdp
162     =hanyo-denshi/ja
163     =hanyo-denshi/jb
164     =hanyo-denshi/jc
165     =hanyo-denshi/jd
166     =hanyo-denshi/ft
167     =hanyo-denshi/ia
168     =hanyo-denshi/ib
169     =hanyo-denshi/hg
170     =hanyo-denshi/jt
171     =hanyo-denshi/ks
172     =hanyo-denshi/tk
173     ;; ==mj
174     ;; ==adobe-japan1-0
175     ;; ==adobe-japan1-1
176     ;; ==adobe-japan1-2
177     ;; ==adobe-japan1-3
178     ;; ==adobe-japan1-4
179     ;; ==adobe-japan1-5
180     ;; ==adobe-japan1-6
181     ==jis-x0208
182     ==jis-x0213-1
183     ==jis-x0213-2
184     ==jis-x0212
185     ==hanyo-denshi/ja
186     ==hanyo-denshi/jb
187     ==hanyo-denshi/jc
188     ==hanyo-denshi/ft
189     ==hanyo-denshi/ia
190     ==hanyo-denshi/ib
191     ==hanyo-denshi/hg
192     ==hanyo-denshi/jt
193     ==hanyo-denshi/ks
194     ==hanyo-denshi/tk
195     =gt
196     =gt-k
197     =daikanwa
198     =daijiten
199     =daikanwa@rev2
200     =daikanwa@rev1
201     =daikanwa/+p
202     ==daikanwa
203     ==daijiten
204     =cbeta
205     ideograph-hanziku-1
206     ideograph-hanziku-2
207     ideograph-hanziku-3
208     ideograph-hanziku-4
209     ideograph-hanziku-5
210     ideograph-hanziku-6
211     ideograph-hanziku-7
212     ideograph-hanziku-8
213     ideograph-hanziku-9
214     ideograph-hanziku-10
215     ideograph-hanziku-11
216     ideograph-hanziku-12
217     ;; =>>>adobe-japan1-0
218     ;; =>>>adobe-japan1-1
219     ;; =>>>adobe-japan1-2
220     ;; =>>>adobe-japan1-3
221     ;; =>>>adobe-japan1-4
222     ;; =>>>adobe-japan1-5
223     ;; =>>>adobe-japan1-6
224     ;; =>>>jis-x0208
225     ;; =>>>jis-x0213-1
226     ;; =>>>jis-x0213-2
227     ;; =>>>hanyo-denshi/ja
228     ;; =>>>hanyo-denshi/jb
229     ;; =>>>hanyo-denshi/jc
230     ;; =>>>hanyo-denshi/ft
231     ;; =>>>hanyo-denshi/ib
232     ;; =>>>hanyo-denshi/hg
233     ;; =>>>hanyo-denshi/jt
234     ;; =>>>hanyo-denshi/ks
235     ;; =>>>gt
236     =>>adobe-japan1-0
237     =>>adobe-japan1-1
238     =>>adobe-japan1-2
239     =>>adobe-japan1-3
240     =>>adobe-japan1-4
241     =>>adobe-japan1-5
242     =>>adobe-japan1-6
243     =>>jis-x0208
244     =>>jis-x0213-1
245     =>>jis-x0213-1@2000
246     =>>jis-x0213-1@2004
247     =>>jis-x0213-2
248     =>>jis-x0208@1978
249     =>>hanyo-denshi/ft
250     =>>hanyo-denshi/jt
251     =>>hanyo-denshi/ks
252     =>>gt
253     =>>daikanwa
254     =>>cbeta
255     =+>jis-x0208
256     =+>jis-x0213-1
257     =+>jis-x0213-2
258     =+>adobe-japan1-0
259     =+>adobe-japan1-1
260     =+>adobe-japan1-2
261     =+>adobe-japan1-3
262     =+>adobe-japan1-4
263     =+>adobe-japan1-5
264     =+>adobe-japan1-6
265     =+>jis-x0208@1978
266     =>jis-x0208
267     =>jis-x0208@1997
268     =>jis-x0213-1
269     =>jis-x0213-1@2000
270     =>jis-x0213-1@2004
271     =>jis-x0213-2
272     ==>ucs@bucs
273     ==>daijiten
274     =>iwds-1
275     ;; =>ucs@hanyo-denshi
276     =>ucs@iso
277     =>ucs@unicode
278     =>ucs@jis
279     =>ucs@cns
280     =>ucs@ks
281     =+>ucs@iso
282     =+>ucs@unicode
283     =+>ucs@jis
284     =+>ucs@jis/1990
285     =+>ucs@cns
286     =+>ucs@ks
287     =>>ucs@iso
288     =>>ucs@unicode
289     =>>ucs@jis
290     =>>ucs@cns
291     =>>>ucs@iso
292     =>>>ucs@unicode
293     ==ucs@iso
294     ==ucs@unicode
295     ;; ==ucs@cns
296     ==gb2312
297     ==ks-x1001
298     ==cns11643-1
299     ==cns11643-2
300     ==cns11643-3
301     ==cns11643-4
302     ==cns11643-5
303     ==cns11643-6
304     ==cns11643-7
305     ==gt
306     ==jis-x0208@1990
307     ;; ==jis-x0208@1983
308     ==jis-x0208@1978
309     ==gt-k
310     =ucs@iso
311     =ucs@unicode
312     =ucs@cns
313     ==big5-cdp
314     ==cbeta
315     =>>big5-cdp
316     =>>gt-k
317     =+>gt
318     =+>big5-cdp
319     =>gt
320     =>mj
321     =>big5-cdp
322     =>daikanwa
323     =>daikanwa/ho
324     =>cns11643-5
325     =>cns11643-7
326     =big5
327     =big5-eten
328     =>gt-k
329     =zinbun-oracle
330     =>zinbun-oracle
331     =ruimoku-v6
332     =>>ruimoku-v6
333     ==ruimoku-v6
334     =jef-china3
335     =>cbeta
336     =shinjigen
337     =ucs-var-001
338     =ucs-var-002
339     =ucs-var-003
340     =ucs-var-004
341     =ucs-var-005
342     =ucs-var-006
343     =ucs-var-007
344     =ucs-var-008
345     =ucs-var-010
346     =ucs-itaiji-001
347     =ucs-itaiji-002
348     =ucs-itaiji-003
349     =ucs-itaiji-004
350     =ucs-itaiji-005
351     =ucs-itaiji-006
352     =ucs-itaiji-007
353     =ucs-itaiji-008
354     =ucs-itaiji-009
355     =ucs-itaiji-010
356     =ucs-itaiji-011
357     =ucs-itaiji-012
358     =>ucs-itaiji-001
359     =>ucs-itaiji-002
360     =>ucs-itaiji-003
361     =>ucs-itaiji-004
362     =>ucs-itaiji-005
363     =>ucs-itaiji-006
364     =>ucs-itaiji-007
365     =>ucs-itaiji-008
366     =>ucs-itaiji-009
367     =>ucs-itaiji-010
368     =>ucs-itaiji-011
369     =big5-cdp-var-001
370     =big5-cdp-var-002
371     =big5-cdp-var-003
372     =big5-cdp-var-004
373     =big5-cdp-var-005
374     =big5-cdp-var-010
375     =big5-cdp-itaiji-001
376     =big5-cdp-itaiji-002
377     =big5-cdp-itaiji-003
378     =>big5-cdp-itaiji-001
379     =>big5-cdp-itaiji-003
380     =>big5-cdp-var-001
381     =>ucs@iwds-1
382     =>ucs@cognate
383     =>ucs@component
384     =>ucs@iwds-1/normalized
385     =>ucs-itaiji-001@iwds-1
386     =>ucs-itaiji-002@iwds-1
387     =>big5-cdp@iwds-1
388     =>big5-cdp@component
389     =>big5-cdp@cognate
390     ==ucs@gb
391     =ucs@gb
392     ==ucs-var-002
393     =ucs@JP/hanazono))
394
395
396 ;;; @ char-db formatters
397 ;;;
398
399 (defun char-db-make-char-spec (char)
400   (let (ret char-spec)
401     (cond ((characterp char)
402            (cond ((and (setq ret (encode-char char '=ucs 'defined-only))
403                        (not (and (<= #xE000 ret)(<= ret #xF8FF))))
404                   (setq char-spec (list (cons '=ucs ret)))
405                   (cond ((setq ret (get-char-attribute char 'name))
406                          (setq char-spec (cons (cons 'name ret) char-spec))
407                          )
408                         ((setq ret (get-char-attribute char 'name*))
409                          (setq char-spec (cons (cons 'name* ret) char-spec))
410                          ))
411                   )
412                  ((encode-char char '=mj 'defined-only)
413                   (setq char-spec nil)
414                   (dolist (ccs (charset-list))
415                     (if (and (or (eq ccs '=mj)
416                                  ;; (eq (charset-property ccs 'iso-ir) 177)
417                                  (string-match "=ucs@" (symbol-name ccs))
418                                  )
419                              (setq ccs (charset-name ccs))
420                              (null (assq ccs char-spec))
421                              (setq ret (encode-char char ccs 'defined-only)))
422                         (setq char-spec (cons (cons ccs ret) char-spec))))
423                   )
424                  ((encode-char char '==mj 'defined-only)
425                   (setq char-spec nil)
426                   (dolist (ccs (charset-list))
427                     (if (and (or (eq ccs '==mj)
428                                  ;; (eq (charset-property ccs 'iso-ir) 177)
429                                  (string-match "=ucs@" (symbol-name ccs))
430                                  )
431                              (setq ccs (charset-name ccs))
432                              (null (assq ccs char-spec))
433                              (setq ret (encode-char char ccs 'defined-only)))
434                         (setq char-spec (cons (cons ccs ret) char-spec))))
435                   )
436                  ((encode-char char '=adobe-japan1 'defined-only)
437                   (setq char-spec nil)
438                   (dolist (ccs (charset-list))
439                     (if (and (or (memq ccs
440                                        '(=adobe-japan1-0
441                                          =adobe-japan1-1
442                                          =adobe-japan1-2
443                                          =adobe-japan1-3
444                                          =adobe-japan1-4
445                                          =adobe-japan1-5
446                                          =adobe-japan1-6
447                                          ))
448                                  ;; (eq (charset-property ccs 'iso-ir) 177)
449                                  (string-match "=ucs@" (symbol-name ccs))
450                                  )
451                              (setq ccs (charset-name ccs))
452                              (null (assq ccs char-spec))
453                              (setq ret (encode-char char ccs 'defined-only)))
454                         (setq char-spec (cons (cons ccs ret) char-spec))))
455                   )
456                  ((encode-char char '==adobe-japan1 'defined-only)
457                   (setq char-spec nil)
458                   (dolist (ccs (charset-list))
459                     (if (and (or (memq ccs
460                                        '(==adobe-japan1-0
461                                          ==adobe-japan1-1
462                                          ==adobe-japan1-2
463                                          ==adobe-japan1-3
464                                          ==adobe-japan1-4
465                                          ==adobe-japan1-5
466                                          ==adobe-japan1-6
467                                          ))
468                                  ;; (eq (charset-property ccs 'iso-ir) 177)
469                                  (string-match "=ucs@" (symbol-name ccs))
470                                  )
471                              (setq ccs (charset-name ccs))
472                              (null (assq ccs char-spec))
473                              (setq ret (encode-char char ccs 'defined-only)))
474                         (setq char-spec (cons (cons ccs ret) char-spec))))
475                   )
476                  ((setq ret
477                         (catch 'tag
478                           (let ((rest char-db-coded-charset-priority-list)
479                                 ccs)
480                             (while rest
481                               (setq ccs (charset-name
482                                          (find-charset (car rest))))
483                               (if (setq ret
484                                         (encode-char char ccs
485                                                      'defined-only))
486                                   (throw 'tag (cons ccs ret)))
487                               (setq rest (cdr rest))))))
488                   (setq char-spec (list ret))
489                   (dolist (ccs (delq (car ret) (charset-list)))
490                     (if (and (or (charset-iso-final-char ccs)
491                                  (memq ccs
492                                        '(=daikanwa
493                                          =daikanwa@rev2
494                                          ;; =gt-k
495                                          =jis-x0208@1997
496                                          ))
497                                  (and (eq (charset-property ccs 'iso-ir) 177)
498                                       (not (eq (charset-name ccs) '=ucs@big5)))
499                                  ;; (string-match "=ucs@" (symbol-name ccs))
500                                  )
501                              (setq ccs (charset-name ccs))
502                              (null (assq ccs char-spec))
503                              (setq ret (encode-char char ccs 'defined-only)))
504                         (setq char-spec (cons (cons ccs ret) char-spec))))
505                   (if (null char-spec)
506                       (setq char-spec (split-char char)))
507                   (cond ((setq ret (get-char-attribute char 'name))
508                          (setq char-spec (cons (cons 'name ret) char-spec))
509                          )
510                         ((setq ret (get-char-attribute char 'name*))
511                          (setq char-spec (cons (cons 'name* ret) char-spec))
512                          ))
513                   )
514                  ((setq ret (get-char-attribute
515                              char 'ideographic-combination))
516                   (setq char-spec
517                         (cons (cons 'ideographic-combination ret)
518                               char-spec))
519                   ))
520            char-spec)
521           ((consp char)
522            char))))
523     
524 (defun char-db-insert-char-spec (char &optional readable column
525                                       required-features)
526   (unless column
527     (setq column (current-column)))
528   (let (char-spec temp-char)
529     (setq char-spec (char-db-make-char-spec char))
530     (unless (or (characterp char) ; char
531                 (condition-case nil
532                     (setq char (find-char char-spec))
533                   (error nil)))
534       ;; define temporary character
535       ;;   Current implementation is dirty.
536       (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
537                                          char-spec)))
538       (remove-char-attribute temp-char 'ideograph-daikanwa)
539       (setq char temp-char))
540     (insert-char-attributes char
541                             readable
542                             (union (mapcar #'car char-spec)
543                                    required-features)
544                             nil 'for-sub-node)
545     (when temp-char
546       ;; undefine temporary character
547       ;;   Current implementation is dirty.
548       (setq char-spec (char-attribute-alist temp-char))
549       (while char-spec
550         (remove-char-attribute temp-char (car (car char-spec)))
551         (setq char-spec (cdr char-spec))))))
552
553 (defun char-db-insert-alist (alist &optional readable column)
554   (unless column
555     (setq column (current-column)))
556   (let ((line-breaking
557          (concat "\n" (make-string (1+ column) ?\ )))
558         name value
559         ret al ; cal
560         key
561         lbs cell rest separator)
562     (insert "(")
563     (while alist
564       (setq name (car (car alist))
565             value (cdr (car alist)))
566       (cond ((eq name 'char)
567              (insert "(char . ")
568              (if (setq ret (condition-case nil
569                                (find-char value)
570                              (error nil)))
571                  (progn
572                    (setq al nil
573                          ;; cal nil
574                          )
575                    (while value
576                      (setq key (car (car value)))
577                      ;; (if (find-charset key)
578                      ;;     (setq cal (cons key cal))
579                      (setq al (cons key al))
580                      ;; )
581                      (setq value (cdr value)))
582                    (insert-char-attributes ret
583                                            readable
584                                            (or al 'none) ; cal
585                                            nil 'for-sub-node))
586                (insert (prin1-to-string value)))
587              (insert ")")
588              (insert line-breaking))
589             ((consp value)
590              (insert (format "(%-18s " name))
591              (setq lbs (concat "\n" (make-string (current-column) ?\ )))
592              (while (consp value)
593                (setq cell (car value))
594                (if (and (consp cell)
595                         (consp (car cell))
596                         (setq ret (condition-case nil
597                                       (find-char cell)
598                                     (error nil)))
599                         )
600                    (progn
601                      (setq rest cell
602                            al nil
603                            ;; cal nil
604                            )
605                      (while rest
606                        (setq key (car (car rest)))
607                        ;; (if (find-charset key)
608                        ;;     (setq cal (cons key cal))
609                        (setq al (cons key al))
610                        ;; )
611                        (setq rest (cdr rest)))
612                      (if separator
613                          (insert lbs))
614                      (insert-char-attributes ret
615                                              readable
616                                              al ; cal
617                                              nil 'for-sub-node)
618                      (setq separator lbs))
619                  (if separator
620                      (insert separator))
621                  (insert (prin1-to-string cell))
622                  (setq separator " "))
623                (setq value (cdr value)))
624              (insert ")")
625              (insert line-breaking))
626             (t
627              (insert (format "(%-18s . %S)%s"
628                              name value
629                              line-breaking))))
630       (setq alist (cdr alist))))
631   (insert ")"))
632
633 (defun char-db-insert-char-reference (plist &optional readable column)
634   (unless column
635     (setq column (current-column)))
636   (let ((line-breaking
637          (concat "\n" (make-string (1+ column) ?\ )))
638         (separator "")
639         name value)
640     (insert "(")
641     (while plist
642       (setq name (pop plist))
643       (setq value (pop plist))
644       (cond ((eq name :char)
645              (insert separator)
646              (insert ":char\t")
647              (cond ((numberp value)
648                     (setq value (decode-char '=ucs value)))
649                    ;; ((consp value)
650                    ;;  (setq value (or (find-char value)
651                    ;;                  value)))
652                    )
653              (char-db-insert-char-spec value readable)
654              (insert line-breaking)
655              (setq separator ""))
656             ((eq name :radical)
657              (insert (format "%s%s\t%d ; %c%s"
658                              separator
659                              name value
660                              (ideographic-radical value)
661                              line-breaking))
662              (setq separator ""))
663             (t
664              (insert (format "%s%s\t%S" separator name value))
665              (setq separator line-breaking)))
666       ))
667   (insert ")"))
668
669 (defun char-db-decode-isolated-char (ccs code-point)
670   (let (ret)
671     (setq ret
672           (cond ((eq ccs 'arabic-iso8859-6)
673                  (decode-char ccs code-point))
674                 ;; ((eq ccs '=gt)
675                 ;;  (decode-builtin-char '==gt code-point))
676                 ((and (memq ccs '(=gt-pj-1
677                                   =gt-pj-2
678                                   =gt-pj-3
679                                   =gt-pj-4
680                                   =gt-pj-5
681                                   =gt-pj-6
682                                   =gt-pj-7
683                                   =gt-pj-8
684                                   =gt-pj-9
685                                   =gt-pj-10
686                                   =gt-pj-11))
687                       (setq ret (decode-char ccs code-point))
688                       (setq ret (encode-char ret '=gt 'defined-only)))
689                  (decode-builtin-char '=gt ret))
690                 (t
691                  (decode-builtin-char ccs code-point))))
692     (cond ((null ret)
693            (or (decode-char ccs code-point)
694                (define-char (list (cons ccs code-point)))))
695           ((and (<= 0 (char-int ret))
696                 (<= (char-int ret) #x1F))
697            (decode-char '=ucs (+ #x2400 (char-int ret))))
698           ((= (char-int ret) #x7F)
699            ?\u2421)
700           (t ret))))
701
702 (defvar char-db-convert-obsolete-format t)
703
704 (defun char-db-insert-ccs-feature (name value line-breaking)
705   (cond
706    ((integerp value)
707     (insert
708      (format
709       (cond
710        ((memq name '(=>iwds-1
711                      ==shinjigen
712                      =shinjigen
713                      =shinjigen@1ed ==shinjigen@1ed
714                      =shinjigen@rev ==shinjigen@rev
715                      =shinjigen/+p@rev ==shinjigen/+p@rev
716                      ===daikanwa/ho ==daikanwa/ho
717                      =daikanwa/ho =>>daikanwa/ho =>daikanwa/ho))
718         "(%-18s .  %04d)\t; %c")
719        ((eq name '=shinjigen@1ed/24pr)
720         "(%-18s . %04d)\t; %c")
721        ((or
722          (memq name
723                '(===daikanwa
724                  ==daikanwa =daikanwa =>>daikanwa =>daikanwa
725                  =daikanwa@rev1 =daikanwa@rev2
726                  =daikanwa/+p ==daikanwa/+p ===daikanwa/+p
727                  =>>daikanwa/+p
728                  =daikanwa/+2p =>>daikanwa/+2p
729                  =gt ==gt ===gt
730                  =>>gt =+>gt =>gt
731                  =gt-k ==gt-k ===gt-k
732                  =>>gt-k =>gt-k
733                  =adobe-japan1-0 ==adobe-japan1-0 ===adobe-japan1-0
734                  =adobe-japan1-1 ==adobe-japan1-1 ===adobe-japan1-1
735                  =adobe-japan1-2 ==adobe-japan1-2 ===adobe-japan1-2
736                  =adobe-japan1-3 ==adobe-japan1-3 ===adobe-japan1-3
737                  =adobe-japan1-4 ==adobe-japan1-4 ===adobe-japan1-4
738                  =adobe-japan1-5 ==adobe-japan1-5 ===adobe-japan1-5
739                  =adobe-japan1-6 ==adobe-japan1-6 ===adobe-japan1-6
740                  =>>adobe-japan1-0 =+>adobe-japan1-0
741                  =>>adobe-japan1-1 =+>adobe-japan1-1
742                  =>>adobe-japan1-2 =+>adobe-japan1-2
743                  =>>adobe-japan1-3 =+>adobe-japan1-3
744                  =>>adobe-japan1-4 =+>adobe-japan1-4
745                  =>>adobe-japan1-5 =+>adobe-japan1-5
746                  =>>adobe-japan1-6 =+>adobe-japan1-6
747                  =>cbeta =cbeta =>>cbeta ==cbeta ===cbeta
748                  =zinbun-oracle =>zinbun-oracle
749                  =daijiten ==daijiten ===daijiten ==>daijiten
750                  ===hng-jou ===hng-keg ===hng-dng ===hng-mam
751                  ===hng-drt ===hng-kgk ===hng-myz ===hng-kda
752                  ===hng-khi ===hng-khm ===hng-fhs ===hng-hok
753                  ===hng-kyd ===hng-sok
754                  ===hng-yhk ===hng-kak ===hng-kar ===hng-kae
755                  ===hng-sys ===hng-tsu ===hng-tzj
756                  ===hng-hos ===hng-kkh ===hng-nak ===hng-jhk
757                  ===hng-hod ===hng-gok ===hng-ink ===hng-nto
758                  ===hng-nkm ===hng-k24 ===hng-ini ===hng-nkk
759                  ===hng-kcc ===hng-kcj ===hng-kbk ===hng-sik
760                  ===hng-skk ===hng-kyu ===hng-ksk ===hng-wan
761                  ===hng-okd ===hng-wad ===hng-kmi
762                  ===hng-zkd ===hng-doh ===hng-jyu ===hng-tzs
763                  ===hng-sai ===hng-kad ===hng-kss
764                  ===hng-kyo ===hng-ykk ===hng-saa
765                  ===hng-sab ===hng-wks ===hng-wke ===hng-smk
766                  ===hng-sgs ===hng-sts
767                  =shuowen-jiguge ===shuowen-jiguge4 ===shuowen-jiguge5))
768          ;; (string-match "^=adobe-" (symbol-name name))
769          )
770         "(%-18s . %05d)\t; %c")
771        ((memq name '(=hanyo-denshi/ks
772                      ==hanyo-denshi/ks ===hanyo-denshi/ks
773                      =>>hanyo-denshi/ks
774                      =koseki ==koseki
775                      =mj ==mj ===mj =>>mj =>mj
776                      =zihai mojikyo))
777         "(%-18s . %06d)\t; %c")
778        ((memq name '(=hanyo-denshi/tk ==hanyo-denshi/tk))
779         "(%-18s . %08d)\t; %c")
780        ((>= (charset-dimension name) 2)
781         "(%-18s . #x%04X)\t; %c")
782        (t
783         "(%-18s . #x%02X)\t; %c"))
784       name
785       (if (= (charset-iso-graphic-plane name) 1)
786           (logior value
787                   (cond ((= (charset-dimension name) 1)
788                          #x80)
789                         ((= (charset-dimension name) 2)
790                          #x8080)
791                         ((= (charset-dimension name) 3)
792                          #x808080)
793                         (t 0)))
794         value)
795       (char-db-decode-isolated-char name value)))
796     (if (and (= (charset-chars name) 94)
797              (= (charset-dimension name) 2))
798         (insert (format " [%02d-%02d]"
799                         (- (lsh value -8) 32)
800                         (- (logand value 255) 32))))
801     )
802    (t
803     (insert (format "(%-18s . %s)" name value))
804     ))
805   (insert line-breaking))
806
807 (defun char-db-insert-relation-feature (char name value line-breaking
808                                              ccss readable)
809   (insert (format "(%-18s%s " name line-breaking))
810   (let ((lbs (concat "\n" (make-string (current-column) ?\ )))
811         separator cell sources required-features
812         ret)
813     (while (consp value)
814       (setq cell (car value))
815       (if (integerp cell)
816           (setq cell (decode-char '=ucs cell)))
817       (cond
818        ((eq name '->subsumptive)
819         (when (or (not (some (lambda (atr)
820                                (get-char-attribute cell atr))
821                              char-db-ignored-attributes))
822                   (some (lambda (ccs)
823                           (encode-char cell ccs 'defined-only))
824                         ccss))
825           (if separator
826               (insert lbs))
827           (let ((char-db-ignored-attributes
828                  (cons '<-subsumptive
829                        char-db-ignored-attributes)))
830             (insert-char-attributes cell readable nil nil 'for-sub-node))
831           (setq separator lbs))
832         )
833        ((characterp cell)
834         (setq sources
835               (get-char-attribute
836                char (intern (format "%s*sources" name))))
837         (setq required-features nil)
838         (dolist (source sources)
839           (cond
840            ((memq source '(JP
841                            JP/Jouyou
842                            shinjigen shinjigen@1ed shinjigen@rev))
843             (setq required-features
844                   (union required-features
845                          '(=jis-x0208
846                            =jis-x0208@1990
847                            =jis-x0213-1@2000
848                            =jis-x0213-1@2004
849                            =jis-x0213-2
850                            =jis-x0212
851                            =jis-x0208@1983
852                            =jis-x0208@1978
853                            =shinjigen
854                            =shinjigen@1ed
855                            =shinjigen@rev
856                            =shinjigen/+p@rev))))
857            ((eq source 'CN)
858             (setq required-features
859                   (union required-features
860                          '(=gb2312
861                            =gb12345
862                            =iso-ir165)))))
863           (cond
864            ((find-charset (setq ret (intern (format "=%s" source))))
865             (setq required-features
866                   (cons ret required-features)))
867            (t (setq required-features
868                     (cons source required-features)))))
869         (cond ((string-match "@JP" (symbol-name name))
870                (setq required-features
871                      (union required-features
872                             '(=jis-x0208
873                               =jis-x0208@1990
874                               =jis-x0213-1-2000
875                               =jis-x0213-2-2000
876                               =jis-x0212
877                               =jis-x0208@1983
878                               =jis-x0208@1978))))
879               ((string-match "@CN" (symbol-name name))
880                (setq required-features
881                      (union required-features
882                             '(=gb2312
883                               =gb12345
884                               =iso-ir165)))))
885         (if separator
886             (insert lbs))
887         (if readable
888             (insert (format "%S" cell))
889           (char-db-insert-char-spec cell readable
890                                     nil
891                                     required-features))
892         (setq separator lbs))
893        ((consp cell)
894         (if separator
895             (insert lbs))
896         (if (consp (car cell))
897             (char-db-insert-char-spec cell readable)
898           (char-db-insert-char-reference cell readable))
899         (setq separator lbs))
900        (t
901         (if separator
902             (insert separator))
903         (insert (prin1-to-string cell))
904         (setq separator " ")))
905       (setq value (cdr value)))
906     (insert ")")
907     (insert line-breaking)))
908
909 (defun insert-char-attributes (char &optional readable attributes column
910                                     for-sub-node)
911   (unless column
912     (setq column (current-column)))
913   (let (name value ; has-long-ccs-name
914         rest
915         radical strokes
916         (line-breaking
917          (concat "\n" (make-string (1+ column) ?\ )))
918         lbs cell separator ret
919         key al cal
920         dest-ccss ; sources required-features
921         ccss)
922     (let (atr-d)
923       (setq attributes
924             (sort (if attributes
925                       (if (consp attributes)
926                           (progn
927                             (dolist (name attributes)
928                               (unless (memq name char-db-ignored-attributes)
929                                 (if (find-charset name)
930                                     (push name ccss))
931                                 (push name atr-d)))
932                             atr-d))
933                     (dolist (name (char-attribute-list))
934                       (unless (memq name char-db-ignored-attributes)
935                         (if (find-charset name)
936                             (push name ccss))
937                         (push name atr-d)))
938                     atr-d)
939                   #'char-attribute-name<)))
940     (insert "(")
941     (when (memq '<-subsumptive attributes)
942       (when (or readable (not for-sub-node))
943         (when (setq value (get-char-attribute char '<-subsumptive))
944           (char-db-insert-relation-feature char '<-subsumptive value
945                                            line-breaking
946                                            ccss readable)))
947       (setq attributes (delq '<-subsumptive attributes)))
948     (when (and (memq '<-denotational attributes)
949                (setq value (get-char-attribute char '<-denotational)))
950       (char-db-insert-relation-feature char '<-denotational value
951                                        line-breaking
952                                        ccss readable)
953       (setq attributes (delq '<-denotational attributes)))
954     (when (and (memq '<-denotational@component attributes)
955                (setq value (get-char-attribute char '<-denotational@component)))
956       (char-db-insert-relation-feature char '<-denotational@component value
957                                        line-breaking
958                                        ccss readable)
959       (setq attributes (delq '<-denotational@component attributes)))
960     (when (and (memq '<-denotational@usage attributes)
961                (setq value (get-char-attribute char '<-denotational@usage)))
962       (char-db-insert-relation-feature char '<-denotational@usage value
963                                        line-breaking
964                                        ccss readable)
965       (setq attributes (delq '<-denotational@usage attributes)))
966     (when (and (memq 'name attributes)
967                (setq value (get-char-attribute char 'name)))
968       (insert (format
969                (if (> (+ (current-column) (length value)) 48)
970                    "(name . %S)%s"
971                  "(name               . %S)%s")
972                value line-breaking))
973       (setq attributes (delq 'name attributes))
974       )
975     (when (and (memq 'name* attributes)
976                (setq value (get-char-attribute char 'name*)))
977       (insert (format
978                (if (> (+ (current-column) (length value)) 48)
979                    "(name* . %S)%s"
980                  "(name*              . %S)%s")
981                value line-breaking))
982       (setq attributes (delq 'name* attributes))
983       )
984     (when (and (memq 'script attributes)
985                (setq value (get-char-attribute char 'script)))
986       (insert (format "(script\t\t%s)%s"
987                       (mapconcat (function prin1-to-string)
988                                  value " ")
989                       line-breaking))
990       (setq attributes (delq 'script attributes))
991       )
992     (dolist (name '(=>ucs =>ucs*))
993       (when (and (memq name attributes)
994                  (setq value (get-char-attribute char name)))
995         (insert (format "(%-18s . #x%04X)\t; %c%s"
996                         name value (decode-char '=ucs value)
997                         line-breaking))
998         (setq attributes (delq name attributes))))
999     (dolist (name '(=>ucs@gb =>ucs@big5))
1000       (when (and (memq name attributes)
1001                  (setq value (get-char-attribute char name)))
1002         (insert (format "(%-18s . #x%04X)\t; %c%s"
1003                         name value
1004                         (decode-char (intern
1005                                       (concat "="
1006                                               (substring
1007                                                (symbol-name name) 2)))
1008                                      value)
1009                         line-breaking))
1010         (setq attributes (delq name attributes))
1011         ))
1012     ;; (dolist (name '(=>daikanwa))
1013     ;;   (when (and (memq name attributes)
1014     ;;              (setq value (get-char-attribute char name)))
1015     ;;     (insert
1016     ;;      (if (integerp value)
1017     ;;          (format "(%-18s . %05d)\t; %c%s"
1018     ;;                  name value (decode-char '=daikanwa value)
1019     ;;                  line-breaking)
1020     ;;        (format "(%-18s %s)\t; %c%s"
1021     ;;                name
1022     ;;                (mapconcat (function prin1-to-string)
1023     ;;                           value " ")
1024     ;;                (char-representative-of-daikanwa char)
1025     ;;                line-breaking)))
1026     ;;     (setq attributes (delq name attributes))))
1027     (when (and (memq 'general-category attributes)
1028                (setq value (get-char-attribute char 'general-category)))
1029       (insert (format
1030                "(general-category\t%s) ; %s%s"
1031                (mapconcat (lambda (cell)
1032                             (format "%S" cell))
1033                           value " ")
1034                (cond ((rassoc value unidata-normative-category-alist)
1035                       "Normative Category")
1036                      ((rassoc value unidata-informative-category-alist)
1037                       "Informative Category")
1038                      (t
1039                       "Unknown Category"))
1040                line-breaking))
1041       (setq attributes (delq 'general-category attributes))
1042       )
1043     (when (and (memq 'bidi-category attributes)
1044                (setq value (get-char-attribute char 'bidi-category)))
1045       (insert (format "(bidi-category\t. %S)%s"
1046                       value
1047                       line-breaking))
1048       (setq attributes (delq 'bidi-category attributes))
1049       )
1050     (unless (or (not (memq 'mirrored attributes))
1051                 (eq (setq value (get-char-attribute char 'mirrored 'empty))
1052                     'empty))
1053       (insert (format "(mirrored\t\t. %S)%s"
1054                       value
1055                       line-breaking))
1056       (setq attributes (delq 'mirrored attributes))
1057       )
1058     (cond
1059      ((and (memq 'decimal-digit-value attributes)
1060            (setq value (get-char-attribute char 'decimal-digit-value)))
1061       (insert (format "(decimal-digit-value . %S)%s"
1062                       value
1063                       line-breaking))
1064       (setq attributes (delq 'decimal-digit-value attributes))
1065       (when (and (memq 'digit-value attributes)
1066                  (setq value (get-char-attribute char 'digit-value)))
1067         (insert (format "(digit-value\t . %S)%s"
1068                         value
1069                         line-breaking))
1070         (setq attributes (delq 'digit-value attributes))
1071         )
1072       (when (and (memq 'numeric-value attributes)
1073                  (setq value (get-char-attribute char 'numeric-value)))
1074         (insert (format "(numeric-value\t . %S)%s"
1075                         value
1076                         line-breaking))
1077         (setq attributes (delq 'numeric-value attributes))
1078         )
1079       )
1080      (t
1081       (when (and (memq 'digit-value attributes)
1082                  (setq value (get-char-attribute char 'digit-value)))
1083         (insert (format "(digit-value\t. %S)%s"
1084                         value
1085                         line-breaking))
1086         (setq attributes (delq 'digit-value attributes))
1087         )
1088       (when (and (memq 'numeric-value attributes)
1089                  (setq value (get-char-attribute char 'numeric-value)))
1090         (insert (format "(numeric-value\t. %S)%s"
1091                         value
1092                         line-breaking))
1093         (setq attributes (delq 'numeric-value attributes))
1094         )))
1095     (when (and (memq 'iso-10646-comment attributes)
1096                (setq value (get-char-attribute char 'iso-10646-comment)))
1097       (insert (format "(iso-10646-comment\t. %S)%s"
1098                       value
1099                       line-breaking))
1100       (setq attributes (delq 'iso-10646-comment attributes))
1101       )
1102     (when (and (memq 'morohashi-daikanwa attributes)
1103                (setq value (get-char-attribute char 'morohashi-daikanwa)))
1104       (insert (format "(morohashi-daikanwa\t%s)%s"
1105                       (mapconcat (function prin1-to-string) value " ")
1106                       line-breaking))
1107       (setq attributes (delq 'morohashi-daikanwa attributes))
1108       )
1109     (setq radical nil
1110           strokes nil)
1111     (when (and (memq 'ideographic-radical attributes)
1112                (setq value (get-char-attribute char 'ideographic-radical)))
1113       (setq radical value)
1114       (insert (format "(ideographic-radical . %S)\t; %c%s"
1115                       radical
1116                       (ideographic-radical radical)
1117                       line-breaking))
1118       (setq attributes (delq 'ideographic-radical attributes))
1119       )
1120     (when (and (memq 'shuowen-radical attributes)
1121                (setq value (get-char-attribute char 'shuowen-radical)))
1122       (insert (format "(shuowen-radical\t. %S)\t; %c%s"
1123                       value
1124                       (shuowen-radical value)
1125                       line-breaking))
1126       (setq attributes (delq 'shuowen-radical attributes))
1127       )
1128     (let (key)
1129       (dolist (domain
1130                (append
1131                 char-db-feature-domains
1132                 (let (dest domain)
1133                   (dolist (feature (char-attribute-list))
1134                     (setq feature (symbol-name feature))
1135                     (when (string-match
1136                            "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
1137                            feature)
1138                       (setq domain (intern (match-string 2 feature)))
1139                      (unless (memq domain dest)
1140                        (setq dest (cons domain dest)))))
1141                   (sort dest #'string<))))
1142         (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
1143         (when (and (memq key attributes)
1144                    (setq value (get-char-attribute char key)))
1145           (setq radical value)
1146           (insert (format "(%s . %S)\t; %c%s"
1147                           key
1148                           radical
1149                           (ideographic-radical radical)
1150                           line-breaking))
1151           (setq attributes (delq key attributes))
1152           )
1153         (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
1154         (when (and (memq key attributes)
1155                    (setq value (get-char-attribute char key)))
1156           (setq strokes value)
1157           (insert (format "(%s . %S)%s"
1158                           key
1159                           strokes
1160                           line-breaking))
1161           (setq attributes (delq key attributes))
1162           )
1163         (setq key (intern (format "%s@%s" 'total-strokes domain)))
1164         (when (and (memq key attributes)
1165                    (setq value (get-char-attribute char key)))
1166           (insert (format "(%s       . %S)%s"
1167                           key
1168                           value
1169                           line-breaking))
1170           (setq attributes (delq key attributes))
1171           )
1172         (dolist (feature '(ideographic-radical
1173                            ideographic-strokes
1174                            total-strokes))
1175           (setq key (intern (format "%s@%s*sources" feature domain)))
1176           (when (and (memq key attributes)
1177                      (setq value (get-char-attribute char key)))
1178             (insert (format "(%s%s" key line-breaking))
1179             (dolist (cell value)
1180               (insert (format " %s" cell)))
1181             (insert ")")
1182             (insert line-breaking)
1183             (setq attributes (delq key attributes))
1184             ))
1185         ))
1186     (when (and (memq 'ideographic-strokes attributes)
1187                (setq value (get-char-attribute char 'ideographic-strokes)))
1188       (setq strokes value)
1189       (insert (format "(ideographic-strokes . %S)%s"
1190                       strokes
1191                       line-breaking))
1192       (setq attributes (delq 'ideographic-strokes attributes))
1193       )
1194     (when (and (memq 'kangxi-radical attributes)
1195                (setq value (get-char-attribute char 'kangxi-radical)))
1196       (unless (eq value radical)
1197         (insert (format "(kangxi-radical\t . %S)\t; %c%s"
1198                         value
1199                         (ideographic-radical value)
1200                         line-breaking))
1201         (or radical
1202             (setq radical value)))
1203       (setq attributes (delq 'kangxi-radical attributes))
1204       )
1205     (when (and (memq 'kangxi-strokes attributes)
1206                (setq value (get-char-attribute char 'kangxi-strokes)))
1207       (unless (eq value strokes)
1208         (insert (format "(kangxi-strokes\t . %S)%s"
1209                         value
1210                         line-breaking))
1211         (or strokes
1212             (setq strokes value)))
1213       (setq attributes (delq 'kangxi-strokes attributes))
1214       )
1215     ;; (when (and (memq 'japanese-radical attributes)
1216     ;;            (setq value (get-char-attribute char 'japanese-radical)))
1217     ;;   (unless (eq value radical)
1218     ;;     (insert (format "(japanese-radical\t . %S)\t; %c%s"
1219     ;;                     value
1220     ;;                     (ideographic-radical value)
1221     ;;                     line-breaking))
1222     ;;     (or radical
1223     ;;         (setq radical value)))
1224     ;;   (setq attributes (delq 'japanese-radical attributes))
1225     ;;   )
1226     (when (and (memq 'japanese-strokes attributes)
1227                (setq value (get-char-attribute char 'japanese-strokes)))
1228       (unless (eq value strokes)
1229         (insert (format "(japanese-strokes\t . %S)%s"
1230                         value
1231                         line-breaking))
1232         (or strokes
1233             (setq strokes value)))
1234       (setq attributes (delq 'japanese-strokes attributes))
1235       )
1236     (when (and (memq 'cns-radical attributes)
1237                (setq value (get-char-attribute char 'cns-radical)))
1238       (insert (format "(cns-radical\t . %S)\t; %c%s"
1239                       value
1240                       (ideographic-radical value)
1241                       line-breaking))
1242       (setq attributes (delq 'cns-radical attributes))
1243       )
1244     (when (and (memq 'cns-strokes attributes)
1245                (setq value (get-char-attribute char 'cns-strokes)))
1246       (unless (eq value strokes)
1247         (insert (format "(cns-strokes\t . %S)%s"
1248                         value
1249                         line-breaking))
1250         (or strokes
1251             (setq strokes value)))
1252       (setq attributes (delq 'cns-strokes attributes))
1253       )
1254     ;; (when (and (memq 'ideographic- attributes)
1255     ;;            (setq value (get-char-attribute char 'ideographic-)))
1256     ;;   (insert "(ideographic-       ")
1257     ;;   (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1258     ;;         separator nil)
1259     ;;   (while (consp value)
1260     ;;     (setq cell (car value))
1261     ;;     (if (integerp cell)
1262     ;;         (setq cell (decode-char '=ucs cell)))
1263     ;;     (cond ((characterp cell)
1264     ;;            (if separator
1265     ;;                (insert lbs))
1266     ;;            (if readable
1267     ;;                (insert (format "%S" cell))
1268     ;;              (char-db-insert-char-spec cell readable))
1269     ;;            (setq separator lbs))
1270     ;;           ((consp cell)
1271     ;;            (if separator
1272     ;;                (insert lbs))
1273     ;;            (if (consp (car cell))
1274     ;;                (char-db-insert-char-spec cell readable)
1275     ;;              (char-db-insert-char-reference cell readable))
1276     ;;            (setq separator lbs))
1277     ;;           (t
1278     ;;            (if separator
1279     ;;                (insert separator))
1280     ;;            (insert (prin1-to-string cell))
1281     ;;            (setq separator " ")))
1282     ;;     (setq value (cdr value)))
1283     ;;   (insert ")")
1284     ;;   (insert line-breaking)
1285     ;;   (setq attributes (delq 'ideographic- attributes)))
1286     (when (and (memq 'total-strokes attributes)
1287                (setq value (get-char-attribute char 'total-strokes)))
1288       (insert (format "(total-strokes       . %S)%s"
1289                       value
1290                       line-breaking))
1291       (setq attributes (delq 'total-strokes attributes))
1292       )
1293     ;; (when (and (memq '->ideograph attributes)
1294     ;;            (setq value (get-char-attribute char '->ideograph)))
1295     ;;   (insert (format "(->ideograph\t%s)%s"
1296     ;;                   (mapconcat (lambda (code)
1297     ;;                                (cond ((symbolp code)
1298     ;;                                       (symbol-name code))
1299     ;;                                      ((integerp code)
1300     ;;                                       (format "#x%04X" code))
1301     ;;                                      (t
1302     ;;                                       (format "%s %S"
1303     ;;                                               line-breaking code))))
1304     ;;                              value " ")
1305     ;;                   line-breaking))
1306     ;;   (setq attributes (delq '->ideograph attributes))
1307     ;;   )
1308     ;; (when (and (memq '->decomposition attributes)
1309     ;;            (setq value (get-char-attribute char '->decomposition)))
1310     ;;   (insert (format "(->decomposition\t%s)%s"
1311     ;;                   (mapconcat (lambda (code)
1312     ;;                                (cond ((symbolp code)
1313     ;;                                       (symbol-name code))
1314     ;;                                      ((characterp code)
1315     ;;                                       (if readable
1316     ;;                                           (format "%S" code)
1317     ;;                                         (format "#x%04X"
1318     ;;                                                 (char-int code))
1319     ;;                                         ))
1320     ;;                                      ((integerp code)
1321     ;;                                       (format "#x%04X" code))
1322     ;;                                      (t
1323     ;;                                       (format "%s%S" line-breaking code))))
1324     ;;                              value " ")
1325     ;;                   line-breaking))
1326     ;;   (setq attributes (delq '->decomposition attributes))
1327     ;;   )
1328     (if (equal (get-char-attribute char '->titlecase)
1329                (get-char-attribute char '->uppercase))
1330         (setq attributes (delq '->titlecase attributes)))
1331     (when (and (memq '->mojikyo attributes)
1332                (setq value (get-char-attribute char '->mojikyo)))
1333       (insert (format "(->mojikyo\t\t. %06d)\t; %c%s"
1334                       value (decode-char 'mojikyo value)
1335                       line-breaking))
1336       (setq attributes (delq '->mojikyo attributes))
1337       )
1338     (unless readable
1339       (dolist (ignored '(composition
1340                          ->denotational <-subsumptive ->ucs-unified
1341                          ;; ->ideographic-component-forms
1342                          ))
1343         (setq attributes (delq ignored attributes))))
1344     (while attributes
1345       (setq name (car attributes))
1346       (unless (eq (setq value (get-char-attribute char name 'value-is-empty))
1347                   'value-is-empty)
1348         (cond ((setq ret (find-charset name))
1349                (setq name (charset-name ret))
1350                (when (not (memq name dest-ccss))
1351                  (setq dest-ccss (cons name dest-ccss))
1352                  (char-db-insert-ccs-feature name value line-breaking))
1353                )
1354               ((string-match "^=>ucs@" (symbol-name name))
1355                (insert (format "(%-18s . #x%04X)\t; %c%s"
1356                                name value (decode-char '=ucs value)
1357                                line-breaking))
1358                )
1359               ((eq name 'jisx0208-1978/4X)
1360                (insert (format "(%-18s . #x%04X)%s"
1361                                name value
1362                                line-breaking))
1363                )
1364               ((and
1365                 (not readable)
1366                 (not (eq name '->subsumptive))
1367                 (not (eq name '->uppercase))
1368                 (not (eq name '->lowercase))
1369                 (not (eq name '->titlecase))
1370                 (not (eq name '->canonical))
1371                 (not (eq name '->Bopomofo))
1372                 (not (eq name '->mistakable))
1373                 (not (eq name '->ideographic-variants))
1374                 (null (get-char-attribute
1375                        char (intern (format "%s*sources" name))))
1376                 (not (string-match "\\*sources$" (symbol-name name)))
1377                 (null (get-char-attribute
1378                        char (intern (format "%s*note" name))))
1379                 (not (string-match "\\*note$" (symbol-name name)))
1380                 (or (eq name '<-identical)
1381                     (eq name '<-uppercase)
1382                     (eq name '<-lowercase)
1383                     (eq name '<-titlecase)
1384                     (eq name '<-canonical)
1385                     (eq name '<-ideographic-variants)
1386                     ;; (eq name '<-synonyms)
1387                     (string-match "^<-synonyms" (symbol-name name))
1388                     (eq name '<-mistakable)
1389                     (when (string-match "^->" (symbol-name name))
1390                       (cond
1391                        ((string-match "^->fullwidth" (symbol-name name))
1392                         (not (and (consp value)
1393                                   (characterp (car value))
1394                                   (encode-char
1395                                    (car value) '=ucs 'defined-only)))
1396                         )
1397                        (t)))
1398                     ))
1399                )
1400               ((or (eq name 'ideographic-structure)
1401                    (eq name 'ideographic-combination)
1402                    ;; (eq name 'ideographic-)
1403                    (eq name '=decomposition)
1404                    (char-feature-base-name= '=decomposition name)
1405                    (char-feature-base-name= '=>decomposition name)
1406                    ;; (string-match "^=>*decomposition\\(@[^*]+\\)?$"
1407                    ;;               (symbol-name name))
1408                    (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name name))
1409                    (string-match "^\\(->\\|<-\\)[^*]*\\*sources$"
1410                                  (symbol-name name))
1411                    )
1412                (char-db-insert-relation-feature char name value
1413                                                 line-breaking
1414                                                 ccss readable))
1415               ;; ((memq name '(ideograph=
1416               ;;               original-ideograph-of
1417               ;;               ancient-ideograph-of
1418               ;;               vulgar-ideograph-of
1419               ;;               wrong-ideograph-of
1420               ;;               ;; simplified-ideograph-of
1421               ;;               ideographic-variants
1422               ;;               ;; ideographic-different-form-of
1423               ;;               ))
1424               ;;  (insert (format "(%-18s%s " name line-breaking))
1425               ;;  (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1426               ;;        separator nil)
1427               ;;  (while (consp value)
1428               ;;    (setq cell (car value))
1429               ;;    (if (and (consp cell)
1430               ;;             (consp (car cell)))
1431               ;;        (progn
1432               ;;          (if separator
1433               ;;              (insert lbs))
1434               ;;          (char-db-insert-alist cell readable)
1435               ;;          (setq separator lbs))
1436               ;;      (if separator
1437               ;;          (insert separator))
1438               ;;      (insert (prin1-to-string cell))
1439               ;;      (setq separator " "))
1440               ;;    (setq value (cdr value)))
1441               ;;  (insert ")")
1442               ;;  (insert line-breaking))
1443               ((consp value)
1444                (insert (format "(%-18s " name))
1445                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1446                      separator nil)
1447                (while (consp value)
1448                  (setq cell (car value))
1449                  (if (and (consp cell)
1450                           (consp (car cell))
1451                           (setq ret (condition-case nil
1452                                         (find-char cell)
1453                                       (error nil))))
1454                      (progn
1455                        (setq rest cell
1456                              al nil
1457                              cal nil)
1458                        (while rest
1459                          (setq key (car (car rest)))
1460                          (if (find-charset key)
1461                              (setq cal (cons key cal))
1462                            (setq al (cons key al)))
1463                          (setq rest (cdr rest)))
1464                        (if separator
1465                            (insert lbs))
1466                        (insert-char-attributes ret
1467                                                readable
1468                                                al ; cal
1469                                                nil 'for-sub-node)
1470                        (setq separator lbs))
1471                    (setq ret (prin1-to-string cell))
1472                    (if separator
1473                        (if (< (+ (current-column)
1474                                  (length ret)
1475                                  (length separator))
1476                               76)
1477                            (insert separator)
1478                          (insert lbs)))
1479                    (insert ret)
1480                    (setq separator " "))
1481                  (setq value (cdr value)))
1482                (insert ")")
1483                (insert line-breaking))
1484               (t
1485                (insert (format "(%-18s" name))
1486                (setq ret (prin1-to-string value))
1487                (unless (< (+ (current-column)
1488                              (length ret)
1489                              3)
1490                           76)
1491                  (insert line-breaking))
1492                (insert " . " ret ")" line-breaking)
1493                ;; (insert (format "(%-18s . %S)%s"
1494                ;;                 name value
1495                ;;                 line-breaking))
1496                )
1497               ))
1498       (setq attributes (cdr attributes)))
1499     (insert ")")))
1500
1501 (defun insert-char-data (char &optional readable
1502                               attributes)
1503   (save-restriction
1504     (narrow-to-region (point)(point))
1505     (insert "(define-char
1506   '")
1507     (insert-char-attributes char readable attributes)
1508     (insert ")\n")
1509     (goto-char (point-min))
1510     (while (re-search-forward "[ \t]+$" nil t)
1511       (replace-match ""))
1512     ;; from tabify.
1513     (goto-char (point-min))
1514     (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
1515       (let ((column (current-column))
1516             (indent-tabs-mode t))
1517         (delete-region (match-beginning 0) (point))
1518         (indent-to column)))
1519     (goto-char (point-max))
1520     ;; (tabify (point-min)(point-max))
1521     ))
1522
1523 (defun insert-char-data-with-variant (char &optional printable
1524                                            no-ucs-unified
1525                                            script excluded-script)
1526   (insert-char-data char printable)
1527   (let ((variants (char-variants char))
1528         rest
1529         variant vs ret)
1530     (setq variants (sort variants #'<))
1531     (setq rest variants)
1532     (setq variants (cons char variants))
1533     (while rest
1534       (setq variant (car rest))
1535       (unless (get-char-attribute variant '<-subsumptive)
1536         (if (and (or (null script)
1537                      (null (setq vs (get-char-attribute variant 'script)))
1538                      (memq script vs))
1539                  (or (null excluded-script)
1540                      (null (setq vs (get-char-attribute variant 'script)))
1541                      (not (memq excluded-script vs))))
1542             (unless (and no-ucs-unified (get-char-attribute variant '=ucs))
1543               (insert-char-data variant printable)
1544               (if (setq ret (char-variants variant))
1545                   (while ret
1546                     (or (memq (car ret) variants)
1547                         ;; (get-char-attribute (car ret) '<-subsumptive)
1548                         (setq rest (nconc rest (list (car ret)))))
1549                     (setq ret (cdr ret)))))))
1550       (setq rest (cdr rest)))))
1551
1552 (defun insert-char-range-data (min max &optional script excluded-script)
1553   (let ((code min)
1554         char)
1555     (while (<= code max)
1556       (setq char (decode-char '=ucs code))
1557       (if (encode-char char '=ucs 'defined-only)
1558           (insert-char-data-with-variant char nil 'no-ucs-unified
1559                                          script excluded-script))
1560       (setq code (1+ code)))))
1561
1562 (defun write-char-range-data-to-file (min max file
1563                                           &optional script excluded-script)
1564   (let ((coding-system-for-write char-db-file-coding-system))
1565     (with-temp-buffer
1566       (insert (format ";; -*- coding: %s -*-\n"
1567                       char-db-file-coding-system))
1568       (insert-char-range-data min max script excluded-script)
1569       (write-region (point-min)(point-max) file))))
1570
1571 (defvar what-character-original-window-configuration)
1572
1573 ;;;###autoload
1574 (defun what-char-definition (char)
1575   (interactive (list (char-after)))
1576   (let ((buf (get-buffer-create "*Character Description*"))
1577         (the-buf (current-buffer))
1578         (win-conf (current-window-configuration)))
1579     (pop-to-buffer buf)
1580     (make-local-variable 'what-character-original-window-configuration)
1581     (setq what-character-original-window-configuration win-conf)
1582     (setq buffer-read-only nil)
1583     (erase-buffer)
1584     (condition-case err
1585         (progn
1586           (insert-char-data-with-variant char 'printable)
1587           (unless (char-attribute-alist char)
1588             (insert (format ";; = %c\n"
1589                             (let* ((rest (split-char char))
1590                                    (ccs (pop rest))
1591                                    (code (pop rest)))
1592                               (while rest
1593                                 (setq code (logior (lsh code 8)
1594                                                    (pop rest))))
1595                               (decode-char ccs code)))))
1596           ;; (char-db-update-comment)
1597           (set-buffer-modified-p nil)
1598           (view-mode the-buf (lambda (buf)
1599                                (set-window-configuration
1600                                 what-character-original-window-configuration)
1601                                ))
1602           (goto-char (point-min)))
1603       (error (progn
1604                (set-window-configuration
1605                 what-character-original-window-configuration)
1606                (signal (car err) (cdr err)))))))
1607
1608
1609 ;;; @ end
1610 ;;;
1611
1612 (provide 'char-db-util)
1613
1614 ;;; char-db-util.el ends here