(char-db-coded-charset-priority-list): Add `=>mj'.
[chise/xemacs-chise.git.1] / lisp / utf-2000 / char-db-util.el
1 ;;; char-db-util.el --- Character Database utility -*- coding: utf-8-er; -*-
2
3 ;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,
4 ;;   2008,2009,2010,2011,2012,2013,2014,2015,2016 MORIOKA Tomohiko.
5
6 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
7 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
8
9 ;; This file is part of XEmacs CHISE.
10
11 ;; XEmacs CHISE is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
15
16 ;; XEmacs CHISE is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
23 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Code:
27
28 (require 'chise-subr)
29 (require 'ideograph-subr)
30
31 (defconst unidata-normative-category-alist
32   '(("Lu" letter        uppercase)
33     ("Ll" letter        lowercase)
34     ("Lt" letter        titlecase)
35     ("Mn" mark          non-spacing)
36     ("Mc" mark          spacing-combining)
37     ("Me" mark          enclosing)
38     ("Nd" number        decimal-digit)
39     ("Nl" number        letter)
40     ("No" number        other)
41     ("Zs" separator     space)
42     ("Zl" separator     line)
43     ("Zp" separator     paragraph)
44     ("Cc" other         control)
45     ("Cf" other         format)
46     ("Cs" other         surrogate)
47     ("Co" other         private-use)
48     ("Cn" other         not-assigned)))
49
50 (defconst unidata-informative-category-alist
51   '(("Lm" letter        modifier)
52     ("Lo" letter        other)
53     ("Pc" punctuation   connector)
54     ("Pd" punctuation   dash)
55     ("Ps" punctuation   open)
56     ("Pe" punctuation   close)
57     ("Pi" punctuation   initial-quote)
58     ("Pf" punctuation   final-quote)
59     ("Po" punctuation   other)
60     ("Sm" symbol        math)
61     ("Sc" symbol        currency)
62     ("Sk" symbol        modifier)
63     ("So" symbol        other)
64     ))
65
66 (defconst shuowen-radicals
67   [?一 ?上 ?示 ?三 ?王 ?玉 ?玨 ?气 ?士 ?丨 ; 010
68    ?屮 ?艸 ?蓐 ?茻 ?小 ?八 ?釆 ?半 ?牛 ?犛 ; 020
69    ?告 ?口 ?凵 ?吅 ?哭 ?走 ?止 ?癶 ?步 ?此 ; 030
70    ?正 ?是 ?辵 ?彳 ?廴 ?㢟 ?行 ?齒 ?牙 ?足 ; 040
71    ?疋 ?品 ?龠 ?冊 ?㗊 ?舌 ?干 ?谷 ?只 ?㕯 ; 050
72    ?句 ?丩 ?古 ?十 ?卅 ?言 ?誩 ?音 ?䇂 ?丵 ; 060
73    ?菐 ?𠬞 ?𠬜 ?共 ?異 ?舁 ?𦥑 ?䢅 ?爨 ?革 ; 070
74    ?鬲 ?䰜 ?爪 ?𠃨 ?鬥 ?又 ?𠂇 ?㕜 ?支 ?𦘒 ; 080
75    ?聿 ?畫 ?隶 ?臤 ?臣 ?殳 ?殺 ?𠘧 ?寸 ?皮 ; 090
76    ?㼱 ?攴 ?敎 ?卜 ?用 ?爻 ?㸚 ?𥄎 ?目 ?䀠 ; 100
77    ?眉 ?盾 ?自 ?白 ?鼻 ?皕 ?習 ?羽 ?隹 ?奞 ; 110
78    ?萑 ?𦫳 ?苜 ?羊 ?羴 ?瞿 ?雔 ?雥 ?鳥 ?烏 ; 120
79    ?𠦒 ?冓 ?幺 ?𢆶 ?叀 ?玄 ?予 ?放 ?𠬪 ?𣦼 ; 130
80    ?歺 ?死 ?冎 ?骨 ?肉 ?筋 ?刀 ?刃 ?㓞 ?丰 ; 140
81    ?耒 ?𧢲 ?竹 ?箕 ?丌 ?左 ?工 ?㠭 ?巫 ?甘 ; 150
82    ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?旨 ?喜 ?壴 ; 160
83 ;  ?旨 ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?喜 ?壴 ; 160
84    ?鼓 ?豈 ?豆 ?豊 ?豐 ?䖒 ?虍 ?虎 ?虤 ?皿 ; 170
85    ?𠙴 ?去 ?血 ?丶 ?丹 ?青 ?井 ?皀 ?鬯 ?食 ; 180
86    ?亼 ?會 ?倉 ?入 ?缶 ?矢 ?高 ?冂 ?𩫏 ?京 ; 190
87    ?亯 ?𣆪 ?畗 ?㐭 ?嗇 ?來 ?麥 ?夊 ?舛 ?䑞 ; 200
88    ?韋 ?弟 ?夂 ?久 ?桀 ?木 ?東 ?林 ?才 ?叒 ; 210
89    ?之 ?帀 ?出 ?𣎵 ?生 ?乇 ?𠂹 ?𠌶 ?華 ?𥝌 ; 220
90    ?稽 ?巢 ?桼 ?束 ?㯻 ?囗 ?員 ?貝 ?邑 ?𨛜 ; 230
91    ?日 ?旦 ?倝 ?㫃 ?冥 ?晶 ?月 ?有 ?明 ?囧 ; 240
92    ?夕 ?多 ?毌 ?𢎘 ?𣐺 ?卣 ?齊 ?朿 ?片 ?鼎 ; 250
93    ?克 ?彔 ?禾 ?秝 ?黍 ?香 ?米 ?毇 ?臼 ?凶 ; 260
94    ?𣎳 ?林 ?麻 ?尗 ?耑 ?韭 ?瓜 ?瓠 ?宀 ?宮 ; 270
95    ?呂 ?穴 ?㝱 ?𤕫 ?冖 ?𠔼 ?冃 ?㒳 ?网 ?襾 ; 280
96    ?巾 ?巿 ?帛 ?白 ?㡀 ?黹 ?人 ?𠤎 ?匕 ?从 ; 290
97    ?比 ?北 ?丘 ?㐺 ?𡈼 ?重 ?臥 ?身 ?㐆 ?衣 ; 300
98    ?裘 ?老 ?毛 ?毳 ?尸 ?尺 ?尾 ?履 ?舟 ?方 ; 310
99    ?儿 ?兄 ?兂 ?皃 ?𠑹 ?先 ?秃 ?見 ?覞 ?欠 ; 320
100    ?㱃 ?㳄 ?旡 ?頁 ?𦣻 ?面 ?丏 ?首 ?𥄉 ?須 ; 330
101    ?彡 ?彣 ?文 ?髟 ?后 ?司 ?卮 ?卩 ?印 ?色 ; 340
102    ?𠨍 ?辟 ?勹 ?包 ?茍 ?鬼 ?甶 ?厶 ?嵬 ?山 ; 350
103    ?屾 ?屵 ?广 ?厂 ?丸 ?危 ?石 ?長 ?勿 ?冄 ; 360
104    ?而 ?豕 ?㣇 ?彑 ?豚 ?豸 ?𤉡 ?易 ?象 ?馬 ; 370
105    ?𢊁 ?鹿 ?麤 ?㲋 ?兔 ?萈 ?犬 ?㹜 ?鼠 ?能 ; 380
106    ?熊 ?火 ?炎 ?黑 ?囪 ?焱 ?炙 ?赤 ?大 ?亦 ; 390
107    ])
108
109 (defun shuowen-radical (number)
110   (aref shuowen-radicals (1- number)))
111
112 (defvar char-db-file-coding-system 'utf-8-mcs-er)
113
114 (defvar char-db-ignored-attributes '(ideographic-products))
115
116 (defvar char-db-coded-charset-priority-list
117   '(ascii
118     control-1
119     latin-iso8859-1
120     latin-iso8859-2
121     latin-iso8859-3
122     latin-iso8859-4
123     latin-iso8859-9
124     latin-jisx0201
125     cyrillic-iso8859-5
126     greek-iso8859-7
127     thai-tis620
128     ;; =mj
129     ;; =adobe-japan1-0
130     ;; =adobe-japan1-1
131     ;; =adobe-japan1-2
132     ;; =adobe-japan1-3
133     ;; =adobe-japan1-4
134     ;; =adobe-japan1-5
135     ;; =adobe-japan1-6
136     =jis-x0208
137     =jis-x0208@1978
138     =jis-x0208@1983
139     japanese-jisx0212
140     chinese-gb2312
141     =jis-x0208@1990
142     chinese-cns11643-1
143     chinese-cns11643-2
144     chinese-cns11643-3
145     chinese-cns11643-4
146     chinese-cns11643-5
147     chinese-cns11643-6
148     chinese-cns11643-7
149     =jis-x0213-1
150     =jis-x0213-1@2000
151     =jis-x0213-1@2004
152     =jis-x0213-2
153     korean-ksc5601
154     chinese-isoir165
155     katakana-jisx0201
156     hebrew-iso8859-8
157     chinese-gb12345
158     latin-viscii
159     ethiopic-ucs
160     =big5-cdp
161     =hanyo-denshi/ja
162     =hanyo-denshi/jb
163     =hanyo-denshi/jc
164     =hanyo-denshi/jd
165     =hanyo-denshi/ft
166     =hanyo-denshi/ia
167     =hanyo-denshi/ib
168     =hanyo-denshi/hg
169     =hanyo-denshi/jt
170     =hanyo-denshi/ks
171     =hanyo-denshi/tk
172     ;; ==mj
173     ;; ==adobe-japan1-0
174     ;; ==adobe-japan1-1
175     ;; ==adobe-japan1-2
176     ;; ==adobe-japan1-3
177     ;; ==adobe-japan1-4
178     ;; ==adobe-japan1-5
179     ;; ==adobe-japan1-6
180     ==jis-x0208
181     ==jis-x0213-1
182     ==jis-x0213-2
183     ==hanyo-denshi/ja
184     ==hanyo-denshi/jb
185     ==hanyo-denshi/jc
186     ==hanyo-denshi/ft
187     ==hanyo-denshi/ib
188     ==hanyo-denshi/hg
189     ==hanyo-denshi/jt
190     ==hanyo-denshi/ks
191     =gt
192     =gt-k
193     =daikanwa
194     =daikanwa@rev2
195     =daikanwa@rev1
196     =daikanwa/+p
197     ==daikanwa
198     =cbeta
199     ideograph-hanziku-1
200     ideograph-hanziku-2
201     ideograph-hanziku-3
202     ideograph-hanziku-4
203     ideograph-hanziku-5
204     ideograph-hanziku-6
205     ideograph-hanziku-7
206     ideograph-hanziku-8
207     ideograph-hanziku-9
208     ideograph-hanziku-10
209     ideograph-hanziku-11
210     ideograph-hanziku-12
211     ;; =>>>adobe-japan1-0
212     ;; =>>>adobe-japan1-1
213     ;; =>>>adobe-japan1-2
214     ;; =>>>adobe-japan1-3
215     ;; =>>>adobe-japan1-4
216     ;; =>>>adobe-japan1-5
217     ;; =>>>adobe-japan1-6
218     ;; =>>>jis-x0208
219     ;; =>>>jis-x0213-1
220     ;; =>>>jis-x0213-2
221     ;; =>>>hanyo-denshi/ja
222     ;; =>>>hanyo-denshi/jb
223     ;; =>>>hanyo-denshi/jc
224     ;; =>>>hanyo-denshi/ft
225     ;; =>>>hanyo-denshi/ib
226     ;; =>>>hanyo-denshi/hg
227     ;; =>>>hanyo-denshi/jt
228     ;; =>>>hanyo-denshi/ks
229     ;; =>>>gt
230     =>>adobe-japan1-0
231     =>>adobe-japan1-1
232     =>>adobe-japan1-2
233     =>>adobe-japan1-3
234     =>>adobe-japan1-4
235     =>>adobe-japan1-5
236     =>>adobe-japan1-6
237     =>>jis-x0208
238     =>>jis-x0213-1
239     =>>jis-x0213-1@2000
240     =>>jis-x0213-1@2004
241     =>>jis-x0213-2
242     =>>jis-x0208@1978
243     =>>hanyo-denshi/ft
244     =>>hanyo-denshi/jt
245     =>>hanyo-denshi/ks
246     =>>gt
247     =>>daikanwa
248     =>>cbeta
249     =+>jis-x0208
250     =+>jis-x0213-1
251     =+>jis-x0213-2
252     =+>adobe-japan1-0
253     =+>adobe-japan1-1
254     =+>adobe-japan1-2
255     =+>adobe-japan1-3
256     =+>adobe-japan1-4
257     =+>adobe-japan1-5
258     =+>adobe-japan1-6
259     =+>jis-x0208@1978
260     =>jis-x0208
261     =>jis-x0208@1997
262     =>jis-x0213-1
263     =>jis-x0213-1@2000
264     =>jis-x0213-1@2004
265     =>jis-x0213-2
266     ==>ucs@bucs
267     =>iwds-1
268     =>ucs@hanyo-denshi
269     =>ucs@iso
270     =>ucs@unicode
271     =>ucs@jis
272     =>ucs@cns
273     =>ucs@ks
274     =+>ucs@iso
275     =+>ucs@unicode
276     =+>ucs@jis
277     =+>ucs@jis/1990
278     =+>ucs@cns
279     =+>ucs@ks
280     =>>ucs@iso
281     =>>ucs@unicode
282     =>>ucs@jis
283     =>>ucs@cns
284     =>>>ucs@iso
285     =>>>ucs@unicode
286     ==ucs@iso
287     ==ucs@unicode
288     ;; ==ucs@cns
289     ==gb2312
290     ==ks-x1001
291     ==cns11643-1
292     ==cns11643-2
293     ==cns11643-3
294     ==cns11643-4
295     ==cns11643-5
296     ==cns11643-6
297     ==cns11643-7
298     ==gt
299     ==jis-x0208@1990
300     ;; ==jis-x0208@1983
301     ==jis-x0208@1978
302     ==gt-k
303     =ucs@iso
304     =ucs@unicode
305     =ucs@cns
306     ==big5-cdp
307     ==cbeta
308     =>>big5-cdp
309     =>>gt-k
310     =+>gt
311     =>gt
312     =>mj
313     =>big5-cdp
314     =>daikanwa
315     =>daikanwa/ho
316     =>cns11643-5
317     =>cns11643-7
318     =big5
319     =big5-eten
320     =>gt-k
321     =zinbun-oracle
322     =>zinbun-oracle
323     =ruimoku-v6
324     =>>ruimoku-v6
325     ==ruimoku-v6
326     =jef-china3
327     =>cbeta
328     =shinjigen
329     =ucs-var-001
330     =ucs-var-002
331     =ucs-var-003
332     =ucs-itaiji-001
333     =ucs-itaiji-002
334     =ucs-itaiji-003
335     =ucs-itaiji-004
336     =ucs-itaiji-005
337     =ucs-itaiji-008
338     =big5-cdp-var-3
339     =big5-cdp-var-5
340     =>ucs@iwds-1))
341
342
343 ;;; @ char-db formatters
344 ;;;
345
346 (defun char-db-make-char-spec (char)
347   (let (ret char-spec)
348     (cond ((characterp char)
349            (cond ((and (setq ret (encode-char char '=ucs 'defined-only))
350                        (not (and (<= #xE000 ret)(<= ret #xF8FF))))
351                   (setq char-spec (list (cons '=ucs ret)))
352                   (cond ((setq ret (get-char-attribute char 'name))
353                          (setq char-spec (cons (cons 'name ret) char-spec))
354                          )
355                         ((setq ret (get-char-attribute char 'name*))
356                          (setq char-spec (cons (cons 'name* ret) char-spec))
357                          ))
358                   )
359                  ((encode-char char '=mj 'defined-only)
360                   (setq char-spec nil)
361                   (dolist (ccs (charset-list))
362                     (if (and (or (eq ccs '=mj)
363                                  ;; (eq (charset-property ccs 'iso-ir) 177)
364                                  (string-match "=ucs@" (symbol-name ccs))
365                                  )
366                              (setq ccs (charset-name ccs))
367                              (null (assq ccs char-spec))
368                              (setq ret (encode-char char ccs 'defined-only)))
369                         (setq char-spec (cons (cons ccs ret) char-spec))))
370                   )
371                  ((encode-char char '==mj 'defined-only)
372                   (setq char-spec nil)
373                   (dolist (ccs (charset-list))
374                     (if (and (or (eq ccs '==mj)
375                                  ;; (eq (charset-property ccs 'iso-ir) 177)
376                                  (string-match "=ucs@" (symbol-name ccs))
377                                  )
378                              (setq ccs (charset-name ccs))
379                              (null (assq ccs char-spec))
380                              (setq ret (encode-char char ccs 'defined-only)))
381                         (setq char-spec (cons (cons ccs ret) char-spec))))
382                   )
383                  ((encode-char char '=adobe-japan1 'defined-only)
384                   (setq char-spec nil)
385                   (dolist (ccs (charset-list))
386                     (if (and (or (memq ccs
387                                        '(=adobe-japan1-0
388                                          =adobe-japan1-1
389                                          =adobe-japan1-2
390                                          =adobe-japan1-3
391                                          =adobe-japan1-4
392                                          =adobe-japan1-5
393                                          =adobe-japan1-6
394                                          ))
395                                  ;; (eq (charset-property ccs 'iso-ir) 177)
396                                  (string-match "=ucs@" (symbol-name ccs))
397                                  )
398                              (setq ccs (charset-name ccs))
399                              (null (assq ccs char-spec))
400                              (setq ret (encode-char char ccs 'defined-only)))
401                         (setq char-spec (cons (cons ccs ret) char-spec))))
402                   )
403                  ((encode-char char '==adobe-japan1 'defined-only)
404                   (setq char-spec nil)
405                   (dolist (ccs (charset-list))
406                     (if (and (or (memq ccs
407                                        '(==adobe-japan1-0
408                                          ==adobe-japan1-1
409                                          ==adobe-japan1-2
410                                          ==adobe-japan1-3
411                                          ==adobe-japan1-4
412                                          ==adobe-japan1-5
413                                          ==adobe-japan1-6
414                                          ))
415                                  ;; (eq (charset-property ccs 'iso-ir) 177)
416                                  (string-match "=ucs@" (symbol-name ccs))
417                                  )
418                              (setq ccs (charset-name ccs))
419                              (null (assq ccs char-spec))
420                              (setq ret (encode-char char ccs 'defined-only)))
421                         (setq char-spec (cons (cons ccs ret) char-spec))))
422                   )
423                  ((setq ret
424                         (catch 'tag
425                           (let ((rest char-db-coded-charset-priority-list)
426                                 ccs)
427                             (while rest
428                               (setq ccs (charset-name
429                                          (find-charset (car rest))))
430                               (if (setq ret
431                                         (encode-char char ccs
432                                                      'defined-only))
433                                   (throw 'tag (cons ccs ret)))
434                               (setq rest (cdr rest))))))
435                   (setq char-spec (list ret))
436                   (dolist (ccs (delq (car ret) (charset-list)))
437                     (if (and (or (charset-iso-final-char ccs)
438                                  (memq ccs
439                                        '(=daikanwa
440                                          =daikanwa@rev2
441                                          ;; =gt-k
442                                          =jis-x0208@1997
443                                          ))
444                                  (eq (charset-property ccs 'iso-ir) 177)
445                                  ;; (string-match "=ucs@" (symbol-name ccs))
446                                  )
447                              (setq ccs (charset-name ccs))
448                              (null (assq ccs char-spec))
449                              (setq ret (encode-char char ccs 'defined-only)))
450                         (setq char-spec (cons (cons ccs ret) char-spec))))
451                   (if (null char-spec)
452                       (setq char-spec (split-char char)))
453                   (cond ((setq ret (get-char-attribute char 'name))
454                          (setq char-spec (cons (cons 'name ret) char-spec))
455                          )
456                         ((setq ret (get-char-attribute char 'name*))
457                          (setq char-spec (cons (cons 'name* ret) char-spec))
458                          ))
459                   )
460                  ((setq ret (get-char-attribute
461                              char 'ideographic-combination))
462                   (setq char-spec
463                         (cons (cons 'ideographic-combination ret)
464                               char-spec))
465                   ))
466            char-spec)
467           ((consp char)
468            char))))
469     
470 (defun char-db-insert-char-spec (char &optional readable column
471                                       required-features)
472   (unless column
473     (setq column (current-column)))
474   (let (char-spec temp-char)
475     (setq char-spec (char-db-make-char-spec char))
476     (unless (or (characterp char) ; char
477                 (condition-case nil
478                     (setq char (find-char char-spec))
479                   (error nil)))
480       ;; define temporary character
481       ;;   Current implementation is dirty.
482       (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
483                                          char-spec)))
484       (remove-char-attribute temp-char 'ideograph-daikanwa)
485       (setq char temp-char))
486     (insert-char-attributes char
487                             readable
488                             (union (mapcar #'car char-spec)
489                                    required-features)
490                             nil 'for-sub-node)
491     (when temp-char
492       ;; undefine temporary character
493       ;;   Current implementation is dirty.
494       (setq char-spec (char-attribute-alist temp-char))
495       (while char-spec
496         (remove-char-attribute temp-char (car (car char-spec)))
497         (setq char-spec (cdr char-spec))))))
498
499 (defun char-db-insert-alist (alist &optional readable column)
500   (unless column
501     (setq column (current-column)))
502   (let ((line-breaking
503          (concat "\n" (make-string (1+ column) ?\ )))
504         name value
505         ret al ; cal
506         key
507         lbs cell rest separator)
508     (insert "(")
509     (while alist
510       (setq name (car (car alist))
511             value (cdr (car alist)))
512       (cond ((eq name 'char)
513              (insert "(char . ")
514              (if (setq ret (condition-case nil
515                                (find-char value)
516                              (error nil)))
517                  (progn
518                    (setq al nil
519                          ;; cal nil
520                          )
521                    (while value
522                      (setq key (car (car value)))
523                      ;; (if (find-charset key)
524                      ;;     (setq cal (cons key cal))
525                      (setq al (cons key al))
526                      ;; )
527                      (setq value (cdr value)))
528                    (insert-char-attributes ret
529                                            readable
530                                            (or al 'none) ; cal
531                                            nil 'for-sub-node))
532                (insert (prin1-to-string value)))
533              (insert ")")
534              (insert line-breaking))
535             ((consp value)
536              (insert (format "(%-18s " name))
537              (setq lbs (concat "\n" (make-string (current-column) ?\ )))
538              (while (consp value)
539                (setq cell (car value))
540                (if (and (consp cell)
541                         (consp (car cell))
542                         (setq ret (condition-case nil
543                                       (find-char cell)
544                                     (error nil)))
545                         )
546                    (progn
547                      (setq rest cell
548                            al nil
549                            ;; cal nil
550                            )
551                      (while rest
552                        (setq key (car (car rest)))
553                        ;; (if (find-charset key)
554                        ;;     (setq cal (cons key cal))
555                        (setq al (cons key al))
556                        ;; )
557                        (setq rest (cdr rest)))
558                      (if separator
559                          (insert lbs))
560                      (insert-char-attributes ret
561                                              readable
562                                              al ; cal
563                                              nil 'for-sub-node)
564                      (setq separator lbs))
565                  (if separator
566                      (insert separator))
567                  (insert (prin1-to-string cell))
568                  (setq separator " "))
569                (setq value (cdr value)))
570              (insert ")")
571              (insert line-breaking))
572             (t
573              (insert (format "(%-18s . %S)%s"
574                              name value
575                              line-breaking))))
576       (setq alist (cdr alist))))
577   (insert ")"))
578
579 (defun char-db-insert-char-reference (plist &optional readable column)
580   (unless column
581     (setq column (current-column)))
582   (let ((line-breaking
583          (concat "\n" (make-string (1+ column) ?\ )))
584         (separator "")
585         name value)
586     (insert "(")
587     (while plist
588       (setq name (pop plist))
589       (setq value (pop plist))
590       (cond ((eq name :char)
591              (insert separator)
592              (insert ":char\t")
593              (cond ((numberp value)
594                     (setq value (decode-char '=ucs value)))
595                    ;; ((consp value)
596                    ;;  (setq value (or (find-char value)
597                    ;;                  value)))
598                    )
599              (char-db-insert-char-spec value readable)
600              (insert line-breaking)
601              (setq separator ""))
602             ((eq name :radical)
603              (insert (format "%s%s\t%d ; %c%s"
604                              separator
605                              name value
606                              (ideographic-radical value)
607                              line-breaking))
608              (setq separator ""))
609             (t
610              (insert (format "%s%s\t%S" separator name value))
611              (setq separator line-breaking)))
612       ))
613   (insert ")"))
614
615 (defun char-db-decode-isolated-char (ccs code-point)
616   (let (ret)
617     (setq ret
618           (cond ((eq ccs 'arabic-iso8859-6)
619                  (decode-char ccs code-point))
620                 ;; ((eq ccs '=gt)
621                 ;;  (decode-builtin-char '==gt code-point))
622                 ((and (memq ccs '(=gt-pj-1
623                                   =gt-pj-2
624                                   =gt-pj-3
625                                   =gt-pj-4
626                                   =gt-pj-5
627                                   =gt-pj-6
628                                   =gt-pj-7
629                                   =gt-pj-8
630                                   =gt-pj-9
631                                   =gt-pj-10
632                                   =gt-pj-11))
633                       (setq ret (decode-char ccs code-point))
634                       (setq ret (encode-char ret '=gt 'defined-only)))
635                  (decode-builtin-char '=gt ret))
636                 (t
637                  (decode-builtin-char ccs code-point))))
638     (cond ((null ret)
639            (or (decode-char ccs code-point)
640                (define-char (list (cons ccs code-point)))))
641           ((and (<= 0 (char-int ret))
642                 (<= (char-int ret) #x1F))
643            (decode-char '=ucs (+ #x2400 (char-int ret))))
644           ((= (char-int ret) #x7F)
645            ?\u2421)
646           (t ret))))
647
648 (defvar char-db-convert-obsolete-format t)
649
650 (defun char-db-insert-ccs-feature (name value line-breaking)
651   (cond
652    ((integerp value)
653     (insert
654      (format
655       (cond
656        ((memq name '(=>iwds-1
657                      ==shinjigen
658                      =shinjigen
659                      =shinjigen@1ed ==shinjigen@1ed
660                      =shinjigen@rev ==shinjigen@rev
661                      =shinjigen/+p@rev ==shinjigen/+p@rev
662                      ===daikanwa/ho ==daikanwa/ho
663                      =daikanwa/ho =>>daikanwa/ho =>daikanwa/ho))
664         "(%-18s .  %04d)\t; %c")
665        ((eq name '=shinjigen@1ed/24pr)
666         "(%-18s . %04d)\t; %c")
667        ((or
668          (memq name
669                '(===daikanwa
670                  ==daikanwa =daikanwa =>>daikanwa =>daikanwa
671                  =daikanwa@rev1 =daikanwa@rev2
672                  =daikanwa/+p ==daikanwa/+p ===daikanwa/+p
673                  =>>daikanwa/+p
674                  =daikanwa/+2p =>>daikanwa/+2p
675                  =gt ==gt ===gt
676                  =>>gt =+>gt =>gt
677                  =gt-k ==gt-k ===gt-k
678                  =>>gt-k =>gt-k
679                  =adobe-japan1-0 ==adobe-japan1-0 ===adobe-japan1-0
680                  =adobe-japan1-1 ==adobe-japan1-1 ===adobe-japan1-1
681                  =adobe-japan1-2 ==adobe-japan1-2 ===adobe-japan1-2
682                  =adobe-japan1-3 ==adobe-japan1-3 ===adobe-japan1-3
683                  =adobe-japan1-4 ==adobe-japan1-4 ===adobe-japan1-4
684                  =adobe-japan1-5 ==adobe-japan1-5 ===adobe-japan1-5
685                  =adobe-japan1-6 ==adobe-japan1-6 ===adobe-japan1-6
686                  =>>adobe-japan1-0 =+>adobe-japan1-0
687                  =>>adobe-japan1-1 =+>adobe-japan1-1
688                  =>>adobe-japan1-2 =+>adobe-japan1-2
689                  =>>adobe-japan1-3 =+>adobe-japan1-3
690                  =>>adobe-japan1-4 =+>adobe-japan1-4
691                  =>>adobe-japan1-5 =+>adobe-japan1-5
692                  =>>adobe-japan1-6 =+>adobe-japan1-6
693                  =>cbeta =cbeta =>>cbeta ==cbeta ===cbeta
694                  =zinbun-oracle =>zinbun-oracle
695                  ===hng-jou ===hng-keg ===hng-dng ===hng-mam
696                  ===hng-drt ===hng-kgk ===hng-myz ===hng-kda
697                  ===hng-khi ===hng-khm ===hng-hok ===hng-kyd ===hng-sok
698                  ===hng-yhk ===hng-kak ===hng-kar ===hng-kae
699                  ===hng-sys ===hng-tsu ===hng-tzj
700                  ===hng-hos ===hng-nak ===hng-jhk
701                  ===hng-hod ===hng-gok ===hng-ink ===hng-nto
702                  ===hng-nkm ===hng-k24 ===hng-nkk
703                  ===hng-kcc ===hng-kcj ===hng-kbk ===hng-sik
704                  ===hng-skk ===hng-kyu ===hng-ksk ===hng-wan
705                  ===hng-okd ===hng-wad ===hng-kmi
706                  ===hng-zkd ===hng-doh ===hng-jyu
707                  ===hng-tzs ===hng-kss ===hng-kyo
708                  ===hng-smk))
709          ;; (string-match "^=adobe-" (symbol-name name))
710          )
711         "(%-18s . %05d)\t; %c")
712        ((memq name '(=hanyo-denshi/ks
713                      ==hanyo-denshi/ks ===hanyo-denshi/ks
714                      =>>hanyo-denshi/ks
715                      =koseki ==koseki
716                      =mj ==mj ===mj =>>mj =>mj
717                      =zihai mojikyo))
718         "(%-18s . %06d)\t; %c")
719        ((memq name '(=hanyo-denshi/tk))
720         "(%-18s . %08d)\t; %c")
721        ((>= (charset-dimension name) 2)
722         "(%-18s . #x%04X)\t; %c")
723        (t
724         "(%-18s . #x%02X)\t; %c"))
725       name
726       (if (= (charset-iso-graphic-plane name) 1)
727           (logior value
728                   (cond ((= (charset-dimension name) 1)
729                          #x80)
730                         ((= (charset-dimension name) 2)
731                          #x8080)
732                         ((= (charset-dimension name) 3)
733                          #x808080)
734                         (t 0)))
735         value)
736       (char-db-decode-isolated-char name value)))
737     (if (and (= (charset-chars name) 94)
738              (= (charset-dimension name) 2))
739         (insert (format " [%02d-%02d]"
740                         (- (lsh value -8) 32)
741                         (- (logand value 255) 32))))
742     )
743    (t
744     (insert (format "(%-18s . %s)" name value))
745     ))
746   (insert line-breaking))
747
748 (defun char-db-insert-relation-feature (char name value line-breaking
749                                              ccss readable)
750   (insert (format "(%-18s%s " name line-breaking))
751   (let ((lbs (concat "\n" (make-string (current-column) ?\ )))
752         separator cell sources required-features
753         ret)
754     (while (consp value)
755       (setq cell (car value))
756       (if (integerp cell)
757           (setq cell (decode-char '=ucs cell)))
758       (cond
759        ((eq name '->subsumptive)
760         (when (or (not (some (lambda (atr)
761                                (get-char-attribute cell atr))
762                              char-db-ignored-attributes))
763                   (some (lambda (ccs)
764                           (encode-char cell ccs 'defined-only))
765                         ccss))
766           (if separator
767               (insert lbs))
768           (let ((char-db-ignored-attributes
769                  (cons '<-subsumptive
770                        char-db-ignored-attributes)))
771             (insert-char-attributes cell readable nil nil 'for-sub-node))
772           (setq separator lbs))
773         )
774        ((characterp cell)
775         (setq sources
776               (get-char-attribute
777                char (intern (format "%s*sources" name))))
778         (setq required-features nil)
779         (dolist (source sources)
780           (cond
781            ((memq source '(JP
782                            JP/Jouyou
783                            shinjigen shinjigen@1ed shinjigen@rev))
784             (setq required-features
785                   (union required-features
786                          '(=jis-x0208
787                            =jis-x0208@1990
788                            =jis-x0213-1@2000
789                            =jis-x0213-1@2004
790                            =jis-x0213-2
791                            =jis-x0212
792                            =jis-x0208@1983
793                            =jis-x0208@1978
794                            =shinjigen
795                            =shinjigen@1ed
796                            =shinjigen@rev
797                            =shinjigen/+p@rev))))
798            ((eq source 'CN)
799             (setq required-features
800                   (union required-features
801                          '(=gb2312
802                            =gb12345
803                            =iso-ir165)))))
804           (cond
805            ((find-charset (setq ret (intern (format "=%s" source))))
806             (setq required-features
807                   (cons ret required-features)))
808            (t (setq required-features
809                     (cons source required-features)))))
810         (cond ((string-match "@JP" (symbol-name name))
811                (setq required-features
812                      (union required-features
813                             '(=jis-x0208
814                               =jis-x0208@1990
815                               =jis-x0213-1-2000
816                               =jis-x0213-2-2000
817                               =jis-x0212
818                               =jis-x0208@1983
819                               =jis-x0208@1978))))
820               ((string-match "@CN" (symbol-name name))
821                (setq required-features
822                      (union required-features
823                             '(=gb2312
824                               =gb12345
825                               =iso-ir165)))))
826         (if separator
827             (insert lbs))
828         (if readable
829             (insert (format "%S" cell))
830           (char-db-insert-char-spec cell readable
831                                     nil
832                                     required-features))
833         (setq separator lbs))
834        ((consp cell)
835         (if separator
836             (insert lbs))
837         (if (consp (car cell))
838             (char-db-insert-char-spec cell readable)
839           (char-db-insert-char-reference cell readable))
840         (setq separator lbs))
841        (t
842         (if separator
843             (insert separator))
844         (insert (prin1-to-string cell))
845         (setq separator " ")))
846       (setq value (cdr value)))
847     (insert ")")
848     (insert line-breaking)))
849
850 (defun insert-char-attributes (char &optional readable attributes column
851                                     for-sub-node)
852   (unless column
853     (setq column (current-column)))
854   (let (name value ; has-long-ccs-name
855         rest
856         radical strokes
857         (line-breaking
858          (concat "\n" (make-string (1+ column) ?\ )))
859         lbs cell separator ret
860         key al cal
861         dest-ccss ; sources required-features
862         ccss)
863     (let (atr-d)
864       (setq attributes
865             (sort (if attributes
866                       (if (consp attributes)
867                           (progn
868                             (dolist (name attributes)
869                               (unless (memq name char-db-ignored-attributes)
870                                 (if (find-charset name)
871                                     (push name ccss))
872                                 (push name atr-d)))
873                             atr-d))
874                     (dolist (name (char-attribute-list))
875                       (unless (memq name char-db-ignored-attributes)
876                         (if (find-charset name)
877                             (push name ccss))
878                         (push name atr-d)))
879                     atr-d)
880                   #'char-attribute-name<)))
881     (insert "(")
882     (when (memq '<-subsumptive attributes)
883       (when (or readable (not for-sub-node))
884         (when (setq value (get-char-attribute char '<-subsumptive))
885           (char-db-insert-relation-feature char '<-subsumptive value
886                                            line-breaking
887                                            ccss readable)))
888       (setq attributes (delq '<-subsumptive attributes)))
889     (when (and (memq '<-denotational attributes)
890                (setq value (get-char-attribute char '<-denotational)))
891       (char-db-insert-relation-feature char '<-denotational value
892                                        line-breaking
893                                        ccss readable)
894       (setq attributes (delq '<-denotational attributes)))
895     (when (and (memq '<-denotational@component attributes)
896                (setq value (get-char-attribute char '<-denotational@component)))
897       (char-db-insert-relation-feature char '<-denotational@component value
898                                        line-breaking
899                                        ccss readable)
900       (setq attributes (delq '<-denotational@component attributes)))
901     (when (and (memq 'name attributes)
902                (setq value (get-char-attribute char 'name)))
903       (insert (format
904                (if (> (+ (current-column) (length value)) 48)
905                    "(name . %S)%s"
906                  "(name               . %S)%s")
907                value line-breaking))
908       (setq attributes (delq 'name attributes))
909       )
910     (when (and (memq 'name* attributes)
911                (setq value (get-char-attribute char 'name*)))
912       (insert (format
913                (if (> (+ (current-column) (length value)) 48)
914                    "(name* . %S)%s"
915                  "(name*              . %S)%s")
916                value line-breaking))
917       (setq attributes (delq 'name* attributes))
918       )
919     (when (and (memq 'script attributes)
920                (setq value (get-char-attribute char 'script)))
921       (insert (format "(script\t\t%s)%s"
922                       (mapconcat (function prin1-to-string)
923                                  value " ")
924                       line-breaking))
925       (setq attributes (delq 'script attributes))
926       )
927     (dolist (name '(=>ucs =>ucs*))
928       (when (and (memq name attributes)
929                  (setq value (get-char-attribute char name)))
930         (insert (format "(%-18s . #x%04X)\t; %c%s"
931                         name value (decode-char '=ucs value)
932                         line-breaking))
933         (setq attributes (delq name attributes))))
934     (dolist (name '(=>ucs@gb =>ucs@big5))
935       (when (and (memq name attributes)
936                  (setq value (get-char-attribute char name)))
937         (insert (format "(%-18s . #x%04X)\t; %c%s"
938                         name value
939                         (decode-char (intern
940                                       (concat "="
941                                               (substring
942                                                (symbol-name name) 2)))
943                                      value)
944                         line-breaking))
945         (setq attributes (delq name attributes))
946         ))
947     ;; (dolist (name '(=>daikanwa))
948     ;;   (when (and (memq name attributes)
949     ;;              (setq value (get-char-attribute char name)))
950     ;;     (insert
951     ;;      (if (integerp value)
952     ;;          (format "(%-18s . %05d)\t; %c%s"
953     ;;                  name value (decode-char '=daikanwa value)
954     ;;                  line-breaking)
955     ;;        (format "(%-18s %s)\t; %c%s"
956     ;;                name
957     ;;                (mapconcat (function prin1-to-string)
958     ;;                           value " ")
959     ;;                (char-representative-of-daikanwa char)
960     ;;                line-breaking)))
961     ;;     (setq attributes (delq name attributes))))
962     (when (and (memq 'general-category attributes)
963                (setq value (get-char-attribute char 'general-category)))
964       (insert (format
965                "(general-category\t%s) ; %s%s"
966                (mapconcat (lambda (cell)
967                             (format "%S" cell))
968                           value " ")
969                (cond ((rassoc value unidata-normative-category-alist)
970                       "Normative Category")
971                      ((rassoc value unidata-informative-category-alist)
972                       "Informative Category")
973                      (t
974                       "Unknown Category"))
975                line-breaking))
976       (setq attributes (delq 'general-category attributes))
977       )
978     (when (and (memq 'bidi-category attributes)
979                (setq value (get-char-attribute char 'bidi-category)))
980       (insert (format "(bidi-category\t. %S)%s"
981                       value
982                       line-breaking))
983       (setq attributes (delq 'bidi-category attributes))
984       )
985     (unless (or (not (memq 'mirrored attributes))
986                 (eq (setq value (get-char-attribute char 'mirrored 'empty))
987                     'empty))
988       (insert (format "(mirrored\t\t. %S)%s"
989                       value
990                       line-breaking))
991       (setq attributes (delq 'mirrored attributes))
992       )
993     (cond
994      ((and (memq 'decimal-digit-value attributes)
995            (setq value (get-char-attribute char 'decimal-digit-value)))
996       (insert (format "(decimal-digit-value . %S)%s"
997                       value
998                       line-breaking))
999       (setq attributes (delq 'decimal-digit-value attributes))
1000       (when (and (memq 'digit-value attributes)
1001                  (setq value (get-char-attribute char 'digit-value)))
1002         (insert (format "(digit-value\t . %S)%s"
1003                         value
1004                         line-breaking))
1005         (setq attributes (delq 'digit-value attributes))
1006         )
1007       (when (and (memq 'numeric-value attributes)
1008                  (setq value (get-char-attribute char 'numeric-value)))
1009         (insert (format "(numeric-value\t . %S)%s"
1010                         value
1011                         line-breaking))
1012         (setq attributes (delq 'numeric-value attributes))
1013         )
1014       )
1015      (t
1016       (when (and (memq 'digit-value attributes)
1017                  (setq value (get-char-attribute char 'digit-value)))
1018         (insert (format "(digit-value\t. %S)%s"
1019                         value
1020                         line-breaking))
1021         (setq attributes (delq 'digit-value attributes))
1022         )
1023       (when (and (memq 'numeric-value attributes)
1024                  (setq value (get-char-attribute char 'numeric-value)))
1025         (insert (format "(numeric-value\t. %S)%s"
1026                         value
1027                         line-breaking))
1028         (setq attributes (delq 'numeric-value attributes))
1029         )))
1030     (when (and (memq 'iso-10646-comment attributes)
1031                (setq value (get-char-attribute char 'iso-10646-comment)))
1032       (insert (format "(iso-10646-comment\t. %S)%s"
1033                       value
1034                       line-breaking))
1035       (setq attributes (delq 'iso-10646-comment attributes))
1036       )
1037     (when (and (memq 'morohashi-daikanwa attributes)
1038                (setq value (get-char-attribute char 'morohashi-daikanwa)))
1039       (insert (format "(morohashi-daikanwa\t%s)%s"
1040                       (mapconcat (function prin1-to-string) value " ")
1041                       line-breaking))
1042       (setq attributes (delq 'morohashi-daikanwa attributes))
1043       )
1044     (setq radical nil
1045           strokes nil)
1046     (when (and (memq 'ideographic-radical attributes)
1047                (setq value (get-char-attribute char 'ideographic-radical)))
1048       (setq radical value)
1049       (insert (format "(ideographic-radical . %S)\t; %c%s"
1050                       radical
1051                       (ideographic-radical radical)
1052                       line-breaking))
1053       (setq attributes (delq 'ideographic-radical attributes))
1054       )
1055     (when (and (memq 'shuowen-radical attributes)
1056                (setq value (get-char-attribute char 'shuowen-radical)))
1057       (insert (format "(shuowen-radical\t. %S)\t; %c%s"
1058                       value
1059                       (shuowen-radical value)
1060                       line-breaking))
1061       (setq attributes (delq 'shuowen-radical attributes))
1062       )
1063     (let (key)
1064       (dolist (domain
1065                (append
1066                 char-db-feature-domains
1067                 (let (dest domain)
1068                   (dolist (feature (char-attribute-list))
1069                     (setq feature (symbol-name feature))
1070                     (when (string-match
1071                            "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
1072                            feature)
1073                       (setq domain (intern (match-string 2 feature)))
1074                      (unless (memq domain dest)
1075                        (setq dest (cons domain dest)))))
1076                   (sort dest #'string<))))
1077         (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
1078         (when (and (memq key attributes)
1079                    (setq value (get-char-attribute char key)))
1080           (setq radical value)
1081           (insert (format "(%s . %S)\t; %c%s"
1082                           key
1083                           radical
1084                           (ideographic-radical radical)
1085                           line-breaking))
1086           (setq attributes (delq key attributes))
1087           )
1088         (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
1089         (when (and (memq key attributes)
1090                    (setq value (get-char-attribute char key)))
1091           (setq strokes value)
1092           (insert (format "(%s . %S)%s"
1093                           key
1094                           strokes
1095                           line-breaking))
1096           (setq attributes (delq key attributes))
1097           )
1098         (setq key (intern (format "%s@%s" 'total-strokes domain)))
1099         (when (and (memq key attributes)
1100                    (setq value (get-char-attribute char key)))
1101           (insert (format "(%s       . %S)%s"
1102                           key
1103                           value
1104                           line-breaking))
1105           (setq attributes (delq key attributes))
1106           )
1107         (dolist (feature '(ideographic-radical
1108                            ideographic-strokes
1109                            total-strokes))
1110           (setq key (intern (format "%s@%s*sources" feature domain)))
1111           (when (and (memq key attributes)
1112                      (setq value (get-char-attribute char key)))
1113             (insert (format "(%s%s" key line-breaking))
1114             (dolist (cell value)
1115               (insert (format " %s" cell)))
1116             (insert ")")
1117             (insert line-breaking)
1118             (setq attributes (delq key attributes))
1119             ))
1120         ))
1121     (when (and (memq 'ideographic-strokes attributes)
1122                (setq value (get-char-attribute char 'ideographic-strokes)))
1123       (setq strokes value)
1124       (insert (format "(ideographic-strokes . %S)%s"
1125                       strokes
1126                       line-breaking))
1127       (setq attributes (delq 'ideographic-strokes attributes))
1128       )
1129     (when (and (memq 'kangxi-radical attributes)
1130                (setq value (get-char-attribute char 'kangxi-radical)))
1131       (unless (eq value radical)
1132         (insert (format "(kangxi-radical\t . %S)\t; %c%s"
1133                         value
1134                         (ideographic-radical value)
1135                         line-breaking))
1136         (or radical
1137             (setq radical value)))
1138       (setq attributes (delq 'kangxi-radical attributes))
1139       )
1140     (when (and (memq 'kangxi-strokes attributes)
1141                (setq value (get-char-attribute char 'kangxi-strokes)))
1142       (unless (eq value strokes)
1143         (insert (format "(kangxi-strokes\t . %S)%s"
1144                         value
1145                         line-breaking))
1146         (or strokes
1147             (setq strokes value)))
1148       (setq attributes (delq 'kangxi-strokes attributes))
1149       )
1150     (when (and (memq 'japanese-radical attributes)
1151                (setq value (get-char-attribute char 'japanese-radical)))
1152       (unless (eq value radical)
1153         (insert (format "(japanese-radical\t . %S)\t; %c%s"
1154                         value
1155                         (ideographic-radical value)
1156                         line-breaking))
1157         (or radical
1158             (setq radical value)))
1159       (setq attributes (delq 'japanese-radical attributes))
1160       )
1161     (when (and (memq 'japanese-strokes attributes)
1162                (setq value (get-char-attribute char 'japanese-strokes)))
1163       (unless (eq value strokes)
1164         (insert (format "(japanese-strokes\t . %S)%s"
1165                         value
1166                         line-breaking))
1167         (or strokes
1168             (setq strokes value)))
1169       (setq attributes (delq 'japanese-strokes attributes))
1170       )
1171     (when (and (memq 'cns-radical attributes)
1172                (setq value (get-char-attribute char 'cns-radical)))
1173       (insert (format "(cns-radical\t . %S)\t; %c%s"
1174                       value
1175                       (ideographic-radical value)
1176                       line-breaking))
1177       (setq attributes (delq 'cns-radical attributes))
1178       )
1179     (when (and (memq 'cns-strokes attributes)
1180                (setq value (get-char-attribute char 'cns-strokes)))
1181       (unless (eq value strokes)
1182         (insert (format "(cns-strokes\t . %S)%s"
1183                         value
1184                         line-breaking))
1185         (or strokes
1186             (setq strokes value)))
1187       (setq attributes (delq 'cns-strokes attributes))
1188       )
1189     ;; (when (and (memq 'shinjigen-1-radical attributes)
1190     ;;            (setq value (get-char-attribute char 'shinjigen-1-radical)))
1191     ;;   (unless (eq value radical)
1192     ;;     (insert (format "(shinjigen-1-radical . %S)\t; %c%s"
1193     ;;                     value
1194     ;;                     (ideographic-radical value)
1195     ;;                     line-breaking))
1196     ;;     (or radical
1197     ;;         (setq radical value)))
1198     ;;   (setq attributes (delq 'shinjigen-1-radical attributes))
1199     ;;   )
1200     (when (and (memq 'ideographic- attributes)
1201                (setq value (get-char-attribute char 'ideographic-)))
1202       (insert "(ideographic-       ")
1203       (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1204             separator nil)
1205       (while (consp value)
1206         (setq cell (car value))
1207         (if (integerp cell)
1208             (setq cell (decode-char '=ucs cell)))
1209         (cond ((characterp cell)
1210                (if separator
1211                    (insert lbs))
1212                (if readable
1213                    (insert (format "%S" cell))
1214                  (char-db-insert-char-spec cell readable))
1215                (setq separator lbs))
1216               ((consp cell)
1217                (if separator
1218                    (insert lbs))
1219                (if (consp (car cell))
1220                    (char-db-insert-char-spec cell readable)
1221                  (char-db-insert-char-reference cell readable))
1222                (setq separator lbs))
1223               (t
1224                (if separator
1225                    (insert separator))
1226                (insert (prin1-to-string cell))
1227                (setq separator " ")))
1228         (setq value (cdr value)))
1229       (insert ")")
1230       (insert line-breaking)
1231       (setq attributes (delq 'ideographic- attributes)))
1232     (when (and (memq 'total-strokes attributes)
1233                (setq value (get-char-attribute char 'total-strokes)))
1234       (insert (format "(total-strokes       . %S)%s"
1235                       value
1236                       line-breaking))
1237       (setq attributes (delq 'total-strokes attributes))
1238       )
1239     (when (and (memq '->ideograph attributes)
1240                (setq value (get-char-attribute char '->ideograph)))
1241       (insert (format "(->ideograph\t%s)%s"
1242                       (mapconcat (lambda (code)
1243                                    (cond ((symbolp code)
1244                                           (symbol-name code))
1245                                          ((integerp code)
1246                                           (format "#x%04X" code))
1247                                          (t
1248                                           (format "%s %S"
1249                                                   line-breaking code))))
1250                                  value " ")
1251                       line-breaking))
1252       (setq attributes (delq '->ideograph attributes))
1253       )
1254     ;; (when (and (memq '->decomposition attributes)
1255     ;;            (setq value (get-char-attribute char '->decomposition)))
1256     ;;   (insert (format "(->decomposition\t%s)%s"
1257     ;;                   (mapconcat (lambda (code)
1258     ;;                                (cond ((symbolp code)
1259     ;;                                       (symbol-name code))
1260     ;;                                      ((characterp code)
1261     ;;                                       (if readable
1262     ;;                                           (format "%S" code)
1263     ;;                                         (format "#x%04X"
1264     ;;                                                 (char-int code))
1265     ;;                                         ))
1266     ;;                                      ((integerp code)
1267     ;;                                       (format "#x%04X" code))
1268     ;;                                      (t
1269     ;;                                       (format "%s%S" line-breaking code))))
1270     ;;                              value " ")
1271     ;;                   line-breaking))
1272     ;;   (setq attributes (delq '->decomposition attributes))
1273     ;;   )
1274     (if (equal (get-char-attribute char '->titlecase)
1275                (get-char-attribute char '->uppercase))
1276         (setq attributes (delq '->titlecase attributes)))
1277     (when (and (memq '->mojikyo attributes)
1278                (setq value (get-char-attribute char '->mojikyo)))
1279       (insert (format "(->mojikyo\t\t. %06d)\t; %c%s"
1280                       value (decode-char 'mojikyo value)
1281                       line-breaking))
1282       (setq attributes (delq '->mojikyo attributes))
1283       )
1284     (when (and (memq 'hanyu-dazidian-vol attributes)
1285                (setq value (get-char-attribute char 'hanyu-dazidian-vol)))
1286       (insert (format "(hanyu-dazidian-vol  . %d)%s"
1287                       value line-breaking))
1288       (setq attributes (delq 'hanyu-dazidian-vol attributes))
1289       )
1290     (when (and (memq 'hanyu-dazidian-page attributes)
1291                (setq value (get-char-attribute char 'hanyu-dazidian-page)))
1292       (insert (format "(hanyu-dazidian-page . %d)%s"
1293                       value line-breaking))
1294       (setq attributes (delq 'hanyu-dazidian-page attributes))
1295       )
1296     (when (and (memq 'hanyu-dazidian-char attributes)
1297                (setq value (get-char-attribute char 'hanyu-dazidian-char)))
1298       (insert (format "(hanyu-dazidian-char . %d)%s"
1299                       value line-breaking))
1300       (setq attributes (delq 'hanyu-dazidian-char attributes))
1301       )
1302     (unless readable
1303       (dolist (ignored '(composition
1304                          ->denotational <-subsumptive ->ucs-unified
1305                          ->ideographic-component-forms))
1306         (setq attributes (delq ignored attributes))))
1307     (while attributes
1308       (setq name (car attributes))
1309       (unless (eq (setq value (get-char-attribute char name 'value-is-empty))
1310                   'value-is-empty)
1311         (cond ((setq ret (find-charset name))
1312                (setq name (charset-name ret))
1313                (when (not (memq name dest-ccss))
1314                  (setq dest-ccss (cons name dest-ccss))
1315                  (char-db-insert-ccs-feature name value line-breaking))
1316                )
1317               ((string-match "^=>ucs@" (symbol-name name))
1318                (insert (format "(%-18s . #x%04X)\t; %c%s"
1319                                name value (decode-char '=ucs value)
1320                                line-breaking))
1321                )
1322               ((eq name 'jisx0208-1978/4X)
1323                (insert (format "(%-18s . #x%04X)%s"
1324                                name value
1325                                line-breaking))
1326                )
1327               ((and
1328                 (not readable)
1329                 (not (eq name '->subsumptive))
1330                 (not (eq name '->uppercase))
1331                 (not (eq name '->lowercase))
1332                 (not (eq name '->titlecase))
1333                 (not (eq name '->canonical))
1334                 (not (eq name '->Bopomofo))
1335                 (not (eq name '->mistakable))
1336                 (not (eq name '->ideographic-variants))
1337                 (null (get-char-attribute
1338                        char (intern (format "%s*sources" name))))
1339                 (not (string-match "\\*sources$" (symbol-name name)))
1340                 (null (get-char-attribute
1341                        char (intern (format "%s*note" name))))
1342                 (not (string-match "\\*note$" (symbol-name name)))
1343                 (or (eq name '<-identical)
1344                     (eq name '<-uppercase)
1345                     (eq name '<-lowercase)
1346                     (eq name '<-titlecase)
1347                     (eq name '<-canonical)
1348                     (eq name '<-ideographic-variants)
1349                     ;; (eq name '<-synonyms)
1350                     (string-match "^<-synonyms" (symbol-name name))
1351                     (eq name '<-mistakable)
1352                     (when (string-match "^->" (symbol-name name))
1353                       (cond
1354                        ((string-match "^->fullwidth" (symbol-name name))
1355                         (not (and (consp value)
1356                                   (characterp (car value))
1357                                   (encode-char
1358                                    (car value) '=ucs 'defined-only)))
1359                         )
1360                        (t)))
1361                     ))
1362                )
1363               ((or (eq name 'ideographic-structure)
1364                    (eq name 'ideographic-combination)
1365                    (eq name 'ideographic-)
1366                    (eq name '=decomposition)
1367                    (char-feature-base-name= '=decomposition name)
1368                    (char-feature-base-name= '=>decomposition name)
1369                    ;; (string-match "^=>*decomposition\\(@[^*]+\\)?$"
1370                    ;;               (symbol-name name))
1371                    (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name name))
1372                    (string-match "^\\(->\\|<-\\)[^*]*\\*sources$"
1373                                  (symbol-name name))
1374                    )
1375                (char-db-insert-relation-feature char name value
1376                                                 line-breaking
1377                                                 ccss readable))
1378               ((memq name '(ideograph=
1379                             original-ideograph-of
1380                             ancient-ideograph-of
1381                             vulgar-ideograph-of
1382                             wrong-ideograph-of
1383                             ;; simplified-ideograph-of
1384                             ideographic-variants
1385                             ;; ideographic-different-form-of
1386                             ))
1387                (insert (format "(%-18s%s " name line-breaking))
1388                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1389                      separator nil)
1390                (while (consp value)
1391                  (setq cell (car value))
1392                  (if (and (consp cell)
1393                           (consp (car cell)))
1394                      (progn
1395                        (if separator
1396                            (insert lbs))
1397                        (char-db-insert-alist cell readable)
1398                        (setq separator lbs))
1399                    (if separator
1400                        (insert separator))
1401                    (insert (prin1-to-string cell))
1402                    (setq separator " "))
1403                  (setq value (cdr value)))
1404                (insert ")")
1405                (insert line-breaking))
1406               ((consp value)
1407                (insert (format "(%-18s " name))
1408                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1409                      separator nil)
1410                (while (consp value)
1411                  (setq cell (car value))
1412                  (if (and (consp cell)
1413                           (consp (car cell))
1414                           (setq ret (condition-case nil
1415                                         (find-char cell)
1416                                       (error nil))))
1417                      (progn
1418                        (setq rest cell
1419                              al nil
1420                              cal nil)
1421                        (while rest
1422                          (setq key (car (car rest)))
1423                          (if (find-charset key)
1424                              (setq cal (cons key cal))
1425                            (setq al (cons key al)))
1426                          (setq rest (cdr rest)))
1427                        (if separator
1428                            (insert lbs))
1429                        (insert-char-attributes ret
1430                                                readable
1431                                                al ; cal
1432                                                nil 'for-sub-node)
1433                        (setq separator lbs))
1434                    (setq ret (prin1-to-string cell))
1435                    (if separator
1436                        (if (< (+ (current-column)
1437                                  (length ret)
1438                                  (length separator))
1439                               76)
1440                            (insert separator)
1441                          (insert lbs)))
1442                    (insert ret)
1443                    (setq separator " "))
1444                  (setq value (cdr value)))
1445                (insert ")")
1446                (insert line-breaking))
1447               (t
1448                (insert (format "(%-18s" name))
1449                (setq ret (prin1-to-string value))
1450                (unless (< (+ (current-column)
1451                              (length ret)
1452                              3)
1453                           76)
1454                  (insert line-breaking))
1455                (insert " . " ret ")" line-breaking)
1456                ;; (insert (format "(%-18s . %S)%s"
1457                ;;                 name value
1458                ;;                 line-breaking))
1459                )
1460               ))
1461       (setq attributes (cdr attributes)))
1462     (insert ")")))
1463
1464 (defun insert-char-data (char &optional readable
1465                               attributes)
1466   (save-restriction
1467     (narrow-to-region (point)(point))
1468     (insert "(define-char
1469   '")
1470     (insert-char-attributes char readable attributes)
1471     (insert ")\n")
1472     (goto-char (point-min))
1473     (while (re-search-forward "[ \t]+$" nil t)
1474       (replace-match ""))
1475     ;; from tabify.
1476     (goto-char (point-min))
1477     (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
1478       (let ((column (current-column))
1479             (indent-tabs-mode t))
1480         (delete-region (match-beginning 0) (point))
1481         (indent-to column)))
1482     (goto-char (point-max))
1483     ;; (tabify (point-min)(point-max))
1484     ))
1485
1486 (defun insert-char-data-with-variant (char &optional printable
1487                                            no-ucs-unified
1488                                            script excluded-script)
1489   (insert-char-data char printable)
1490   (let ((variants (char-variants char))
1491         rest
1492         variant vs ret)
1493     (setq variants (sort variants #'<))
1494     (setq rest variants)
1495     (setq variants (cons char variants))
1496     (while rest
1497       (setq variant (car rest))
1498       (unless (get-char-attribute variant '<-subsumptive)
1499         (if (and (or (null script)
1500                      (null (setq vs (get-char-attribute variant 'script)))
1501                      (memq script vs))
1502                  (or (null excluded-script)
1503                      (null (setq vs (get-char-attribute variant 'script)))
1504                      (not (memq excluded-script vs))))
1505             (unless (and no-ucs-unified (get-char-attribute variant '=ucs))
1506               (insert-char-data variant printable)
1507               (if (setq ret (char-variants variant))
1508                   (while ret
1509                     (or (memq (car ret) variants)
1510                         ;; (get-char-attribute (car ret) '<-subsumptive)
1511                         (setq rest (nconc rest (list (car ret)))))
1512                     (setq ret (cdr ret)))))))
1513       (setq rest (cdr rest)))))
1514
1515 (defun insert-char-range-data (min max &optional script excluded-script)
1516   (let ((code min)
1517         char)
1518     (while (<= code max)
1519       (setq char (decode-char '=ucs code))
1520       (if (encode-char char '=ucs 'defined-only)
1521           (insert-char-data-with-variant char nil 'no-ucs-unified
1522                                          script excluded-script))
1523       (setq code (1+ code)))))
1524
1525 (defun write-char-range-data-to-file (min max file
1526                                           &optional script excluded-script)
1527   (let ((coding-system-for-write char-db-file-coding-system))
1528     (with-temp-buffer
1529       (insert (format ";; -*- coding: %s -*-\n"
1530                       char-db-file-coding-system))
1531       (insert-char-range-data min max script excluded-script)
1532       (write-region (point-min)(point-max) file))))
1533
1534 (defvar what-character-original-window-configuration)
1535
1536 ;;;###autoload
1537 (defun what-char-definition (char)
1538   (interactive (list (char-after)))
1539   (let ((buf (get-buffer-create "*Character Description*"))
1540         (the-buf (current-buffer))
1541         (win-conf (current-window-configuration)))
1542     (pop-to-buffer buf)
1543     (make-local-variable 'what-character-original-window-configuration)
1544     (setq what-character-original-window-configuration win-conf)
1545     (setq buffer-read-only nil)
1546     (erase-buffer)
1547     (condition-case err
1548         (progn
1549           (insert-char-data-with-variant char 'printable)
1550           (unless (char-attribute-alist char)
1551             (insert (format ";; = %c\n"
1552                             (let* ((rest (split-char char))
1553                                    (ccs (pop rest))
1554                                    (code (pop rest)))
1555                               (while rest
1556                                 (setq code (logior (lsh code 8)
1557                                                    (pop rest))))
1558                               (decode-char ccs code)))))
1559           ;; (char-db-update-comment)
1560           (set-buffer-modified-p nil)
1561           (view-mode the-buf (lambda (buf)
1562                                (set-window-configuration
1563                                 what-character-original-window-configuration)
1564                                ))
1565           (goto-char (point-min)))
1566       (error (progn
1567                (set-window-configuration
1568                 what-character-original-window-configuration)
1569                (signal (car err) (cdr err)))))))
1570
1571
1572 ;;; @ end
1573 ;;;
1574
1575 (provide 'char-db-util)
1576
1577 ;;; char-db-util.el ends here