(U+5396): Apply new conventions for glyph granularity.
[chise/xemacs-chise.git.1] / lisp / utf-2000 / char-db-util.el
1 ;;; char-db-util.el --- Character Database utility -*- coding: utf-8-er; -*-
2
3 ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
4 ;;   2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 MORIOKA Tomohiko.
5
6 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
7 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
8
9 ;; This file is part of XEmacs CHISE.
10
11 ;; XEmacs CHISE is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
15
16 ;; XEmacs CHISE is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
23 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Code:
27
28 (require 'chise-subr)
29 (require 'ideograph-subr)
30
31 (defconst unidata-normative-category-alist
32   '(("Lu" letter        uppercase)
33     ("Ll" letter        lowercase)
34     ("Lt" letter        titlecase)
35     ("Mn" mark          non-spacing)
36     ("Mc" mark          spacing-combining)
37     ("Me" mark          enclosing)
38     ("Nd" number        decimal-digit)
39     ("Nl" number        letter)
40     ("No" number        other)
41     ("Zs" separator     space)
42     ("Zl" separator     line)
43     ("Zp" separator     paragraph)
44     ("Cc" other         control)
45     ("Cf" other         format)
46     ("Cs" other         surrogate)
47     ("Co" other         private-use)
48     ("Cn" other         not-assigned)))
49
50 (defconst unidata-informative-category-alist
51   '(("Lm" letter        modifier)
52     ("Lo" letter        other)
53     ("Pc" punctuation   connector)
54     ("Pd" punctuation   dash)
55     ("Ps" punctuation   open)
56     ("Pe" punctuation   close)
57     ("Pi" punctuation   initial-quote)
58     ("Pf" punctuation   final-quote)
59     ("Po" punctuation   other)
60     ("Sm" symbol        math)
61     ("Sc" symbol        currency)
62     ("Sk" symbol        modifier)
63     ("So" symbol        other)
64     ))
65
66 (defconst shuowen-radicals
67   [?一 ?上 ?示 ?三 ?王 ?玉 ?玨 ?气 ?士 ?丨 ; 010
68    ?屮 ?艸 ?蓐 ?茻 ?小 ?八 ?釆 ?半 ?牛 ?犛 ; 020
69    ?告 ?口 ?凵 ?吅 ?哭 ?走 ?止 ?癶 ?步 ?此 ; 030
70    ?正 ?是 ?辵 ?彳 ?廴 ?㢟 ?行 ?齒 ?牙 ?足 ; 040
71    ?疋 ?品 ?龠 ?冊 ?㗊 ?舌 ?干 ?谷 ?只 ?㕯 ; 050
72    ?句 ?丩 ?古 ?十 ?卅 ?言 ?誩 ?音 ?䇂 ?丵 ; 060
73    ?菐 ?𠬞 ?𠬜 ?共 ?異 ?舁 ?𦥑 ?䢅 ?爨 ?革 ; 070
74    ?鬲 ?䰜 ?爪 ?𠃨 ?鬥 ?又 ?𠂇 ?㕜 ?支 ?𦘒 ; 080
75    ?聿 ?畫 ?隶 ?臤 ?臣 ?殳 ?殺 ?𠘧 ?寸 ?皮 ; 090
76    ?㼱 ?攴 ?敎 ?卜 ?用 ?爻 ?㸚 ?𥄎 ?目 ?䀠 ; 100
77    ?眉 ?盾 ?自 ?白 ?鼻 ?皕 ?習 ?羽 ?隹 ?奞 ; 110
78    ?萑 ?𦫳 ?苜 ?羊 ?羴 ?瞿 ?雔 ?雥 ?鳥 ?烏 ; 120
79    ?𠦒 ?冓 ?幺 ?𢆶 ?叀 ?玄 ?予 ?放 ?𠬪 ?𣦼 ; 130
80    ?歺 ?死 ?冎 ?骨 ?肉 ?筋 ?刀 ?刃 ?㓞 ?丰 ; 140
81    ?耒 ?𧢲 ?竹 ?箕 ?丌 ?左 ?工 ?㠭 ?巫 ?甘 ; 150
82    ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?旨 ?喜 ?壴 ; 160
83 ;  ?旨 ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?喜 ?壴 ; 160
84    ?鼓 ?豈 ?豆 ?豊 ?豐 ?䖒 ?虍 ?虎 ?虤 ?皿 ; 170
85    ?𠙴 ?去 ?血 ?丶 ?丹 ?青 ?井 ?皀 ?鬯 ?食 ; 180
86    ?亼 ?會 ?倉 ?入 ?缶 ?矢 ?高 ?冂 ?𩫏 ?京 ; 190
87    ?亯 ?𣆪 ?畗 ?㐭 ?嗇 ?來 ?麥 ?夊 ?舛 ?䑞 ; 200
88    ?韋 ?弟 ?夂 ?久 ?桀 ?木 ?東 ?林 ?才 ?叒 ; 210
89    ?之 ?帀 ?出 ?𣎵 ?生 ?乇 ?𠂹 ?𠌶 ?華 ?𥝌 ; 220
90    ?稽 ?巢 ?桼 ?束 ?㯻 ?囗 ?員 ?貝 ?邑 ?𨛜 ; 230
91    ?日 ?旦 ?倝 ?㫃 ?冥 ?晶 ?月 ?有 ?明 ?囧 ; 240
92    ?夕 ?多 ?毌 ?𢎘 ?𣐺 ?卣 ?齊 ?朿 ?片 ?鼎 ; 250
93    ?克 ?彔 ?禾 ?秝 ?黍 ?香 ?米 ?毇 ?臼 ?凶 ; 260
94    ?𣎳 ?林 ?麻 ?尗 ?耑 ?韭 ?瓜 ?瓠 ?宀 ?宮 ; 270
95    ?呂 ?穴 ?㝱 ?𤕫 ?冖 ?𠔼 ?冃 ?㒳 ?网 ?襾 ; 280
96    ?巾 ?巿 ?帛 ?白 ?㡀 ?黹 ?人 ?𠤎 ?匕 ?从 ; 290
97    ?比 ?北 ?丘 ?㐺 ?𡈼 ?重 ?臥 ?身 ?㐆 ?衣 ; 300
98    ?裘 ?老 ?毛 ?毳 ?尸 ?尺 ?尾 ?履 ?舟 ?方 ; 310
99    ?儿 ?兄 ?兂 ?皃 ?𠑹 ?先 ?秃 ?見 ?覞 ?欠 ; 320
100    ?㱃 ?㳄 ?旡 ?頁 ?𦣻 ?面 ?丏 ?首 ?𥄉 ?須 ; 330
101    ?彡 ?彣 ?文 ?髟 ?后 ?司 ?卮 ?卩 ?印 ?色 ; 340
102    ])
103
104 (defun shuowen-radical (number)
105   (aref shuowen-radicals (1- number)))
106
107 (defvar char-db-file-coding-system 'utf-8-mcs-er)
108
109 (defvar char-db-ignored-attributes '(ideographic-products))
110
111 (defvar char-db-coded-charset-priority-list
112   '(ascii
113     control-1
114     latin-iso8859-1
115     latin-iso8859-2
116     latin-iso8859-3
117     latin-iso8859-4
118     latin-iso8859-9
119     latin-jisx0201
120     cyrillic-iso8859-5
121     greek-iso8859-7
122     thai-tis620
123     ;; =mj
124     ;; =adobe-japan1-0
125     ;; =adobe-japan1-1
126     ;; =adobe-japan1-2
127     ;; =adobe-japan1-3
128     ;; =adobe-japan1-4
129     ;; =adobe-japan1-5
130     ;; =adobe-japan1-6
131     =jis-x0208
132     =jis-x0208@1978
133     =jis-x0208@1983
134     japanese-jisx0212
135     chinese-gb2312
136     =jis-x0208@1990
137     chinese-cns11643-1
138     chinese-cns11643-2
139     chinese-cns11643-3
140     chinese-cns11643-4
141     chinese-cns11643-5
142     chinese-cns11643-6
143     chinese-cns11643-7
144     =jis-x0213-1
145     =jis-x0213-1@2000
146     =jis-x0213-1@2004
147     =jis-x0213-2
148     korean-ksc5601
149     chinese-isoir165
150     katakana-jisx0201
151     hebrew-iso8859-8
152     chinese-gb12345
153     latin-viscii
154     ethiopic-ucs
155     =big5-cdp
156     =hanyo-denshi/ja
157     =hanyo-denshi/jb
158     =hanyo-denshi/jc
159     =hanyo-denshi/jd
160     =hanyo-denshi/ft
161     =hanyo-denshi/ia
162     =hanyo-denshi/ib
163     =hanyo-denshi/hg
164     =hanyo-denshi/jt
165     =hanyo-denshi/ks
166     ;; ==mj
167     ;; ==adobe-japan1-0
168     ;; ==adobe-japan1-1
169     ;; ==adobe-japan1-2
170     ;; ==adobe-japan1-3
171     ;; ==adobe-japan1-4
172     ;; ==adobe-japan1-5
173     ;; ==adobe-japan1-6
174     ==jis-x0208
175     ==jis-x0213-1
176     ==jis-x0213-2
177     ==hanyo-denshi/ja
178     ==hanyo-denshi/jb
179     ==hanyo-denshi/jc
180     ==hanyo-denshi/ft
181     ==hanyo-denshi/ib
182     ==hanyo-denshi/hg
183     ==hanyo-denshi/jt
184     ==hanyo-denshi/ks
185     =gt
186     =gt-k
187     =daikanwa
188     =daikanwa@rev2
189     =daikanwa@rev1
190     ==daikanwa
191     =cbeta
192     ideograph-hanziku-1
193     ideograph-hanziku-2
194     ideograph-hanziku-3
195     ideograph-hanziku-4
196     ideograph-hanziku-5
197     ideograph-hanziku-6
198     ideograph-hanziku-7
199     ideograph-hanziku-8
200     ideograph-hanziku-9
201     ideograph-hanziku-10
202     ideograph-hanziku-11
203     ideograph-hanziku-12
204     ;; =>>>adobe-japan1-0
205     ;; =>>>adobe-japan1-1
206     ;; =>>>adobe-japan1-2
207     ;; =>>>adobe-japan1-3
208     ;; =>>>adobe-japan1-4
209     ;; =>>>adobe-japan1-5
210     ;; =>>>adobe-japan1-6
211     ;; =>>>jis-x0208
212     ;; =>>>jis-x0213-1
213     ;; =>>>jis-x0213-2
214     ;; =>>>hanyo-denshi/ja
215     ;; =>>>hanyo-denshi/jb
216     ;; =>>>hanyo-denshi/jc
217     ;; =>>>hanyo-denshi/ft
218     ;; =>>>hanyo-denshi/ib
219     ;; =>>>hanyo-denshi/hg
220     ;; =>>>hanyo-denshi/jt
221     ;; =>>>hanyo-denshi/ks
222     ;; =>>>gt
223     =>>adobe-japan1-0
224     =>>adobe-japan1-1
225     =>>adobe-japan1-2
226     =>>adobe-japan1-3
227     =>>adobe-japan1-4
228     =>>adobe-japan1-5
229     =>>adobe-japan1-6
230     =>>jis-x0208
231     =>>jis-x0213-1
232     =>>jis-x0213-1@2000
233     =>>jis-x0213-1@2004
234     =>>jis-x0213-2
235     =>>jis-x0208@1978
236     =>>hanyo-denshi/ft
237     =>>hanyo-denshi/jt
238     =>>hanyo-denshi/ks
239     =>>gt
240     =>>daikanwa
241     =>>cbeta
242     =+>jis-x0208
243     =+>jis-x0213-1
244     =+>jis-x0213-2
245     =+>adobe-japan1-0
246     =+>adobe-japan1-1
247     =+>adobe-japan1-2
248     =+>adobe-japan1-3
249     =+>adobe-japan1-4
250     =+>adobe-japan1-5
251     =+>adobe-japan1-6
252     =+>jis-x0208@1978
253     =>jis-x0208
254     =>jis-x0208@1997
255     =>jis-x0213-1
256     =>jis-x0213-1@2000
257     =>jis-x0213-1@2004
258     =>jis-x0213-2
259     ==>ucs@bucs
260     =>ucs@hanyo-denshi
261     =>ucs@iso
262     =>ucs@unicode
263     =>ucs@jis
264     =>ucs@cns
265     =>ucs@ks
266     =+>ucs@iso
267     =+>ucs@unicode
268     =+>ucs@jis
269     =+>ucs@jis/1990
270     =+>ucs@cns
271     =+>ucs@ks
272     =>>ucs@iso
273     =>>ucs@unicode
274     =>>ucs@jis
275     =>>ucs@cns
276     =>>>ucs@iso
277     =>>>ucs@unicode
278     ==ucs@iso
279     ==ucs@unicode
280     ;; ==ucs@cns
281     ==gb2312
282     ==ks-x1001
283     ==cns11643-1
284     ==cns11643-2
285     ==cns11643-3
286     ==cns11643-4
287     ==cns11643-5
288     ==cns11643-6
289     ==cns11643-7
290     ==gt
291     ==jis-x0208@1990
292     ;; ==jis-x0208@1983
293     ;; ==jis-x0208@1978
294     ==gt-k
295     =ucs@iso
296     =ucs@unicode
297     =ucs@cns
298     ==big5-cdp
299     ==cbeta
300     =>>big5-cdp
301     =>>gt-k
302     =+>gt
303     =>gt
304     =>big5-cdp
305     =>daikanwa
306     =>daikanwa/ho
307     =>cns11643-7
308     =big5
309     =big5-eten
310     =>gt-k
311     =zinbun-oracle
312     =>zinbun-oracle
313     =ruimoku-v6
314     =>>ruimoku-v6
315     =jef-china3
316     =shinjigen
317     =big5-cdp-var-3
318     =big5-cdp-var-5))
319
320
321 ;;; @ char-db formatters
322 ;;;
323
324 (defun char-db-make-char-spec (char)
325   (let (ret char-spec)
326     (cond ((characterp char)
327            (cond ((and (setq ret (encode-char char '=ucs 'defined-only))
328                        (not (and (<= #xE000 ret)(<= ret #xF8FF))))
329                   (setq char-spec (list (cons '=ucs ret)))
330                   (cond ((setq ret (get-char-attribute char 'name))
331                          (setq char-spec (cons (cons 'name ret) char-spec))
332                          )
333                         ((setq ret (get-char-attribute char 'name*))
334                          (setq char-spec (cons (cons 'name* ret) char-spec))
335                          ))
336                   )
337                  ((encode-char char '=mj 'defined-only)
338                   (setq char-spec nil)
339                   (dolist (ccs (charset-list))
340                     (if (and (or (eq ccs '=mj)
341                                  ;; (eq (charset-property ccs 'iso-ir) 177)
342                                  (string-match "=ucs@" (symbol-name ccs))
343                                  )
344                              (setq ccs (charset-name ccs))
345                              (null (assq ccs char-spec))
346                              (setq ret (encode-char char ccs 'defined-only)))
347                         (setq char-spec (cons (cons ccs ret) char-spec))))
348                   )
349                  ((encode-char char '==mj 'defined-only)
350                   (setq char-spec nil)
351                   (dolist (ccs (charset-list))
352                     (if (and (or (eq ccs '==mj)
353                                  ;; (eq (charset-property ccs 'iso-ir) 177)
354                                  (string-match "=ucs@" (symbol-name ccs))
355                                  )
356                              (setq ccs (charset-name ccs))
357                              (null (assq ccs char-spec))
358                              (setq ret (encode-char char ccs 'defined-only)))
359                         (setq char-spec (cons (cons ccs ret) char-spec))))
360                   )
361                  ((encode-char char '=adobe-japan1 'defined-only)
362                   (setq char-spec nil)
363                   (dolist (ccs (charset-list))
364                     (if (and (or (memq ccs
365                                        '(=adobe-japan1-0
366                                          =adobe-japan1-1
367                                          =adobe-japan1-2
368                                          =adobe-japan1-3
369                                          =adobe-japan1-4
370                                          =adobe-japan1-5
371                                          =adobe-japan1-6
372                                          ))
373                                  ;; (eq (charset-property ccs 'iso-ir) 177)
374                                  (string-match "=ucs@" (symbol-name ccs))
375                                  )
376                              (setq ccs (charset-name ccs))
377                              (null (assq ccs char-spec))
378                              (setq ret (encode-char char ccs 'defined-only)))
379                         (setq char-spec (cons (cons ccs ret) char-spec))))
380                   )
381                  ((encode-char char '==adobe-japan1 'defined-only)
382                   (setq char-spec nil)
383                   (dolist (ccs (charset-list))
384                     (if (and (or (memq ccs
385                                        '(==adobe-japan1-0
386                                          ==adobe-japan1-1
387                                          ==adobe-japan1-2
388                                          ==adobe-japan1-3
389                                          ==adobe-japan1-4
390                                          ==adobe-japan1-5
391                                          ==adobe-japan1-6
392                                          ))
393                                  ;; (eq (charset-property ccs 'iso-ir) 177)
394                                  (string-match "=ucs@" (symbol-name ccs))
395                                  )
396                              (setq ccs (charset-name ccs))
397                              (null (assq ccs char-spec))
398                              (setq ret (encode-char char ccs 'defined-only)))
399                         (setq char-spec (cons (cons ccs ret) char-spec))))
400                   )
401                  ((setq ret
402                         (catch 'tag
403                           (let ((rest char-db-coded-charset-priority-list)
404                                 ccs)
405                             (while rest
406                               (setq ccs (charset-name
407                                          (find-charset (car rest))))
408                               (if (setq ret
409                                         (encode-char char ccs
410                                                      'defined-only))
411                                   (throw 'tag (cons ccs ret)))
412                               (setq rest (cdr rest))))))
413                   (setq char-spec (list ret))
414                   (dolist (ccs (delq (car ret) (charset-list)))
415                     (if (and (or (charset-iso-final-char ccs)
416                                  (memq ccs
417                                        '(=daikanwa
418                                          =daikanwa@rev2
419                                          ;; =gt-k
420                                          =jis-x0208@1997
421                                          ))
422                                  (eq (charset-property ccs 'iso-ir) 177)
423                                  ;; (string-match "=ucs@" (symbol-name ccs))
424                                  )
425                              (setq ccs (charset-name ccs))
426                              (null (assq ccs char-spec))
427                              (setq ret (encode-char char ccs 'defined-only)))
428                         (setq char-spec (cons (cons ccs ret) char-spec))))
429                   (if (null char-spec)
430                       (setq char-spec (split-char char)))
431                   (cond ((setq ret (get-char-attribute char 'name))
432                          (setq char-spec (cons (cons 'name ret) char-spec))
433                          )
434                         ((setq ret (get-char-attribute char 'name*))
435                          (setq char-spec (cons (cons 'name* ret) char-spec))
436                          ))
437                   )
438                  ((setq ret (get-char-attribute
439                              char 'ideographic-combination))
440                   (setq char-spec
441                         (cons (cons 'ideographic-combination ret)
442                               char-spec))
443                   ))
444            char-spec)
445           ((consp char)
446            char))))
447     
448 (defun char-db-insert-char-spec (char &optional readable column
449                                       required-features)
450   (unless column
451     (setq column (current-column)))
452   (let (char-spec temp-char)
453     (setq char-spec (char-db-make-char-spec char))
454     (unless (or (characterp char) ; char
455                 (condition-case nil
456                     (setq char (find-char char-spec))
457                   (error nil)))
458       ;; define temporary character
459       ;;   Current implementation is dirty.
460       (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
461                                          char-spec)))
462       (remove-char-attribute temp-char 'ideograph-daikanwa)
463       (setq char temp-char))
464     (insert-char-attributes char
465                             readable
466                             (union (mapcar #'car char-spec)
467                                    required-features)
468                             nil 'for-sub-node)
469     (when temp-char
470       ;; undefine temporary character
471       ;;   Current implementation is dirty.
472       (setq char-spec (char-attribute-alist temp-char))
473       (while char-spec
474         (remove-char-attribute temp-char (car (car char-spec)))
475         (setq char-spec (cdr char-spec))))))
476
477 (defun char-db-insert-alist (alist &optional readable column)
478   (unless column
479     (setq column (current-column)))
480   (let ((line-breaking
481          (concat "\n" (make-string (1+ column) ?\ )))
482         name value
483         ret al ; cal
484         key
485         lbs cell rest separator)
486     (insert "(")
487     (while alist
488       (setq name (car (car alist))
489             value (cdr (car alist)))
490       (cond ((eq name 'char)
491              (insert "(char . ")
492              (if (setq ret (condition-case nil
493                                (find-char value)
494                              (error nil)))
495                  (progn
496                    (setq al nil
497                          ;; cal nil
498                          )
499                    (while value
500                      (setq key (car (car value)))
501                      ;; (if (find-charset key)
502                      ;;     (setq cal (cons key cal))
503                      (setq al (cons key al))
504                      ;; )
505                      (setq value (cdr value)))
506                    (insert-char-attributes ret
507                                            readable
508                                            (or al 'none) ; cal
509                                            nil 'for-sub-node))
510                (insert (prin1-to-string value)))
511              (insert ")")
512              (insert line-breaking))
513             ((consp value)
514              (insert (format "(%-18s " name))
515              (setq lbs (concat "\n" (make-string (current-column) ?\ )))
516              (while (consp value)
517                (setq cell (car value))
518                (if (and (consp cell)
519                         (consp (car cell))
520                         (setq ret (condition-case nil
521                                       (find-char cell)
522                                     (error nil)))
523                         )
524                    (progn
525                      (setq rest cell
526                            al nil
527                            ;; cal nil
528                            )
529                      (while rest
530                        (setq key (car (car rest)))
531                        ;; (if (find-charset key)
532                        ;;     (setq cal (cons key cal))
533                        (setq al (cons key al))
534                        ;; )
535                        (setq rest (cdr rest)))
536                      (if separator
537                          (insert lbs))
538                      (insert-char-attributes ret
539                                              readable
540                                              al ; cal
541                                              nil 'for-sub-node)
542                      (setq separator lbs))
543                  (if separator
544                      (insert separator))
545                  (insert (prin1-to-string cell))
546                  (setq separator " "))
547                (setq value (cdr value)))
548              (insert ")")
549              (insert line-breaking))
550             (t
551              (insert (format "(%-18s . %S)%s"
552                              name value
553                              line-breaking))))
554       (setq alist (cdr alist))))
555   (insert ")"))
556
557 (defun char-db-insert-char-reference (plist &optional readable column)
558   (unless column
559     (setq column (current-column)))
560   (let ((line-breaking
561          (concat "\n" (make-string (1+ column) ?\ )))
562         (separator "")
563         name value)
564     (insert "(")
565     (while plist
566       (setq name (pop plist))
567       (setq value (pop plist))
568       (cond ((eq name :char)
569              (insert separator)
570              (insert ":char\t")
571              (cond ((numberp value)
572                     (setq value (decode-char '=ucs value)))
573                    ;; ((consp value)
574                    ;;  (setq value (or (find-char value)
575                    ;;                  value)))
576                    )
577              (char-db-insert-char-spec value readable)
578              (insert line-breaking)
579              (setq separator ""))
580             ((eq name :radical)
581              (insert (format "%s%s\t%d ; %c%s"
582                              separator
583                              name value
584                              (ideographic-radical value)
585                              line-breaking))
586              (setq separator ""))
587             (t
588              (insert (format "%s%s\t%S" separator name value))
589              (setq separator line-breaking)))
590       ))
591   (insert ")"))
592
593 (defun char-db-decode-isolated-char (ccs code-point)
594   (let (ret)
595     (setq ret
596           (cond ((eq ccs 'arabic-iso8859-6)
597                  (decode-char ccs code-point))
598                 ;; ((eq ccs '=gt)
599                 ;;  (decode-builtin-char '==gt code-point))
600                 ((and (memq ccs '(=gt-pj-1
601                                   =gt-pj-2
602                                   =gt-pj-3
603                                   =gt-pj-4
604                                   =gt-pj-5
605                                   =gt-pj-6
606                                   =gt-pj-7
607                                   =gt-pj-8
608                                   =gt-pj-9
609                                   =gt-pj-10
610                                   =gt-pj-11))
611                       (setq ret (decode-char ccs code-point))
612                       (setq ret (encode-char ret '=gt 'defined-only)))
613                  (decode-builtin-char '=gt ret))
614                 (t
615                  (decode-builtin-char ccs code-point))))
616     (cond ((and (<= 0 (char-int ret))
617                 (<= (char-int ret) #x1F))
618            (decode-char '=ucs (+ #x2400 (char-int ret))))
619           ((= (char-int ret) #x7F)
620            ?\u2421)
621           (t ret))))
622
623 (defvar char-db-convert-obsolete-format t)
624
625 (defun char-db-insert-ccs-feature (name value line-breaking)
626   (cond
627    ((integerp value)
628     (insert
629      (format
630       (cond
631        ((memq name '(==shinjigen
632                      =shinjigen
633                      =shinjigen@1ed ==shinjigen@1ed
634                      =shinjigen@rev ==shinjigen@rev
635                      =shinjigen/+p@rev ==shinjigen/+p@rev
636                      ===daikanwa/ho ==daikanwa/ho
637                      =daikanwa/ho =>>daikanwa/ho =>daikanwa/ho))
638         "(%-18s .  %04d)\t; %c")
639        ((eq name '=shinjigen@1ed/24pr)
640         "(%-18s . %04d)\t; %c")
641        ((or (memq name '(===daikanwa
642                          ==daikanwa =daikanwa =>>daikanwa =>daikanwa
643                          =daikanwa@rev1 =daikanwa@rev2
644                          =daikanwa/+p ==daikanwa/+p =>>daikanwa/+p
645                          =daikanwa/+2p =>>daikanwa/+2p
646                          =gt ==gt ===gt
647                          =>>gt =+>gt =>gt
648                          =gt-k ==gt-k ===gt-k
649                          =>>gt-k =>gt-k
650                          =adobe-japan1-0 ==adobe-japan1-0 ===adobe-japan1-0
651                          =adobe-japan1-1 ==adobe-japan1-1 ===adobe-japan1-1
652                          =adobe-japan1-2 ==adobe-japan1-2 ===adobe-japan1-2
653                          =adobe-japan1-3 ==adobe-japan1-3 ===adobe-japan1-3
654                          =adobe-japan1-4 ==adobe-japan1-4 ===adobe-japan1-4
655                          =adobe-japan1-5 ==adobe-japan1-5 ===adobe-japan1-5
656                          =adobe-japan1-6 ==adobe-japan1-6 ===adobe-japan1-6
657                          =>>adobe-japan1-0 =+>adobe-japan1-0
658                          =>>adobe-japan1-1 =+>adobe-japan1-1
659                          =>>adobe-japan1-2 =+>adobe-japan1-2
660                          =>>adobe-japan1-3 =+>adobe-japan1-3
661                          =>>adobe-japan1-4 =+>adobe-japan1-4
662                          =>>adobe-japan1-5 =+>adobe-japan1-5
663                          =>>adobe-japan1-6 =+>adobe-japan1-6
664                          =cbeta ==cbeta ===cbeta =>>cbeta
665                          =zinbun-oracle =>zinbun-oracle))
666             ;; (string-match "^=adobe-" (symbol-name name))
667             )
668         "(%-18s . %05d)\t; %c")
669        ((memq name '(=hanyo-denshi/ks
670                      ==hanyo-denshi/ks ===hanyo-denshi/ks
671                      =>>hanyo-denshi/ks
672                      =koseki
673                      =mj ==mj ===mj =>>mj
674                      =zihai mojikyo))
675         "(%-18s . %06d)\t; %c")
676        ((>= (charset-dimension name) 2)
677         "(%-18s . #x%04X)\t; %c")
678        (t
679         "(%-18s . #x%02X)\t; %c"))
680       name
681       (if (= (charset-iso-graphic-plane name) 1)
682           (logior value
683                   (cond ((= (charset-dimension name) 1)
684                          #x80)
685                         ((= (charset-dimension name) 2)
686                          #x8080)
687                         ((= (charset-dimension name) 3)
688                          #x808080)
689                         (t 0)))
690         value)
691       (char-db-decode-isolated-char name value)))
692     (if (and (= (charset-chars name) 94)
693              (= (charset-dimension name) 2))
694         (insert (format " [%02d-%02d]"
695                         (- (lsh value -8) 32)
696                         (- (logand value 255) 32))))
697     )
698    (t
699     (insert (format "(%-18s . %s)" name value))
700     ))
701   (insert line-breaking))
702
703 (defun char-db-insert-relation-feature (char name value line-breaking
704                                              ccss readable)
705   (insert (format "(%-18s%s " name line-breaking))
706   (let ((lbs (concat "\n" (make-string (current-column) ?\ )))
707         separator cell sources required-features
708         ret)
709     (while (consp value)
710       (setq cell (car value))
711       (if (integerp cell)
712           (setq cell (decode-char '=ucs cell)))
713       (cond
714        ((eq name '->subsumptive)
715         (when (or (not (some (lambda (atr)
716                                (get-char-attribute cell atr))
717                              char-db-ignored-attributes))
718                   (some (lambda (ccs)
719                           (encode-char cell ccs 'defined-only))
720                         ccss))
721           (if separator
722               (insert lbs))
723           (let ((char-db-ignored-attributes
724                  (cons '<-subsumptive
725                        char-db-ignored-attributes)))
726             (insert-char-attributes cell readable nil nil 'for-sub-node))
727           (setq separator lbs))
728         )
729        ((characterp cell)
730         (setq sources
731               (get-char-attribute
732                char (intern (format "%s*sources" name))))
733         (setq required-features nil)
734         (dolist (source sources)
735           (cond
736            ((memq source '(JP
737                            JP/Jouyou
738                            shinjigen shinjigen@1ed shinjigen@rev))
739             (setq required-features
740                   (union required-features
741                          '(=jis-x0208
742                            =jis-x0208@1990
743                            =jis-x0213-1@2000
744                            =jis-x0213-1@2004
745                            =jis-x0213-2
746                            =jis-x0212
747                            =jis-x0208@1983
748                            =jis-x0208@1978
749                            =shinjigen
750                            =shinjigen@1ed
751                            =shinjigen@rev
752                            =shinjigen/+p@rev))))
753            ((eq source 'CN)
754             (setq required-features
755                   (union required-features
756                          '(=gb2312
757                            =gb12345
758                            =iso-ir165)))))
759           (cond
760            ((find-charset (setq ret (intern (format "=%s" source))))
761             (setq required-features
762                   (cons ret required-features)))
763            (t (setq required-features
764                     (cons source required-features)))))
765         (cond ((string-match "@JP" (symbol-name name))
766                (setq required-features
767                      (union required-features
768                             '(=jis-x0208
769                               =jis-x0208@1990
770                               =jis-x0213-1-2000
771                               =jis-x0213-2-2000
772                               =jis-x0212
773                               =jis-x0208@1983
774                               =jis-x0208@1978))))
775               ((string-match "@CN" (symbol-name name))
776                (setq required-features
777                      (union required-features
778                             '(=gb2312
779                               =gb12345
780                               =iso-ir165)))))
781         (if separator
782             (insert lbs))
783         (if readable
784             (insert (format "%S" cell))
785           (char-db-insert-char-spec cell readable
786                                     nil
787                                     required-features))
788         (setq separator lbs))
789        ((consp cell)
790         (if separator
791             (insert lbs))
792         (if (consp (car cell))
793             (char-db-insert-char-spec cell readable)
794           (char-db-insert-char-reference cell readable))
795         (setq separator lbs))
796        (t
797         (if separator
798             (insert separator))
799         (insert (prin1-to-string cell))
800         (setq separator " ")))
801       (setq value (cdr value)))
802     (insert ")")
803     (insert line-breaking)))
804
805 (defun insert-char-attributes (char &optional readable attributes column
806                                     for-sub-node)
807   (unless column
808     (setq column (current-column)))
809   (let (name value ; has-long-ccs-name
810         rest
811         radical strokes
812         (line-breaking
813          (concat "\n" (make-string (1+ column) ?\ )))
814         lbs cell separator ret
815         key al cal
816         dest-ccss ; sources required-features
817         ccss)
818     (let (atr-d)
819       (setq attributes
820             (sort (if attributes
821                       (if (consp attributes)
822                           (progn
823                             (dolist (name attributes)
824                               (unless (memq name char-db-ignored-attributes)
825                                 (if (find-charset name)
826                                     (push name ccss))
827                                 (push name atr-d)))
828                             atr-d))
829                     (dolist (name (char-attribute-list))
830                       (unless (memq name char-db-ignored-attributes)
831                         (if (find-charset name)
832                             (push name ccss))
833                         (push name atr-d)))
834                     atr-d)
835                   #'char-attribute-name<)))
836     (insert "(")
837     (when (memq '<-subsumptive attributes)
838       (when (or readable (not for-sub-node))
839         (when (setq value (get-char-attribute char '<-subsumptive))
840           (char-db-insert-relation-feature char '<-subsumptive value
841                                            line-breaking
842                                            ccss readable)))
843       (setq attributes (delq '<-subsumptive attributes)))
844     (when (and (memq '<-denotational attributes)
845                (setq value (get-char-attribute char '<-denotational)))
846       (char-db-insert-relation-feature char '<-denotational value
847                                        line-breaking
848                                        ccss readable)
849       (setq attributes (delq '<-denotational attributes)))
850     (when (and (memq 'name attributes)
851                (setq value (get-char-attribute char 'name)))
852       (insert (format
853                (if (> (+ (current-column) (length value)) 48)
854                    "(name . %S)%s"
855                  "(name               . %S)%s")
856                value line-breaking))
857       (setq attributes (delq 'name attributes))
858       )
859     (when (and (memq 'name* attributes)
860                (setq value (get-char-attribute char 'name*)))
861       (insert (format
862                (if (> (+ (current-column) (length value)) 48)
863                    "(name* . %S)%s"
864                  "(name*              . %S)%s")
865                value line-breaking))
866       (setq attributes (delq 'name* attributes))
867       )
868     (when (and (memq 'script attributes)
869                (setq value (get-char-attribute char 'script)))
870       (insert (format "(script\t\t%s)%s"
871                       (mapconcat (function prin1-to-string)
872                                  value " ")
873                       line-breaking))
874       (setq attributes (delq 'script attributes))
875       )
876     (dolist (name '(=>ucs =>ucs*))
877       (when (and (memq name attributes)
878                  (setq value (get-char-attribute char name)))
879         (insert (format "(%-18s . #x%04X)\t; %c%s"
880                         name value (decode-char '=ucs value)
881                         line-breaking))
882         (setq attributes (delq name attributes))))
883     (dolist (name '(=>ucs@gb =>ucs@big5))
884       (when (and (memq name attributes)
885                  (setq value (get-char-attribute char name)))
886         (insert (format "(%-18s . #x%04X)\t; %c%s"
887                         name value
888                         (decode-char (intern
889                                       (concat "="
890                                               (substring
891                                                (symbol-name name) 2)))
892                                      value)
893                         line-breaking))
894         (setq attributes (delq name attributes))
895         ))
896     ;; (dolist (name '(=>daikanwa))
897     ;;   (when (and (memq name attributes)
898     ;;              (setq value (get-char-attribute char name)))
899     ;;     (insert
900     ;;      (if (integerp value)
901     ;;          (format "(%-18s . %05d)\t; %c%s"
902     ;;                  name value (decode-char '=daikanwa value)
903     ;;                  line-breaking)
904     ;;        (format "(%-18s %s)\t; %c%s"
905     ;;                name
906     ;;                (mapconcat (function prin1-to-string)
907     ;;                           value " ")
908     ;;                (char-representative-of-daikanwa char)
909     ;;                line-breaking)))
910     ;;     (setq attributes (delq name attributes))))
911     (when (and (memq 'general-category attributes)
912                (setq value (get-char-attribute char 'general-category)))
913       (insert (format
914                "(general-category\t%s) ; %s%s"
915                (mapconcat (lambda (cell)
916                             (format "%S" cell))
917                           value " ")
918                (cond ((rassoc value unidata-normative-category-alist)
919                       "Normative Category")
920                      ((rassoc value unidata-informative-category-alist)
921                       "Informative Category")
922                      (t
923                       "Unknown Category"))
924                line-breaking))
925       (setq attributes (delq 'general-category attributes))
926       )
927     (when (and (memq 'bidi-category attributes)
928                (setq value (get-char-attribute char 'bidi-category)))
929       (insert (format "(bidi-category\t. %S)%s"
930                       value
931                       line-breaking))
932       (setq attributes (delq 'bidi-category attributes))
933       )
934     (unless (or (not (memq 'mirrored attributes))
935                 (eq (setq value (get-char-attribute char 'mirrored 'empty))
936                     'empty))
937       (insert (format "(mirrored\t\t. %S)%s"
938                       value
939                       line-breaking))
940       (setq attributes (delq 'mirrored attributes))
941       )
942     (cond
943      ((and (memq 'decimal-digit-value attributes)
944            (setq value (get-char-attribute char 'decimal-digit-value)))
945       (insert (format "(decimal-digit-value . %S)%s"
946                       value
947                       line-breaking))
948       (setq attributes (delq 'decimal-digit-value attributes))
949       (when (and (memq 'digit-value attributes)
950                  (setq value (get-char-attribute char 'digit-value)))
951         (insert (format "(digit-value\t . %S)%s"
952                         value
953                         line-breaking))
954         (setq attributes (delq 'digit-value attributes))
955         )
956       (when (and (memq 'numeric-value attributes)
957                  (setq value (get-char-attribute char 'numeric-value)))
958         (insert (format "(numeric-value\t . %S)%s"
959                         value
960                         line-breaking))
961         (setq attributes (delq 'numeric-value attributes))
962         )
963       )
964      (t
965       (when (and (memq 'digit-value attributes)
966                  (setq value (get-char-attribute char 'digit-value)))
967         (insert (format "(digit-value\t. %S)%s"
968                         value
969                         line-breaking))
970         (setq attributes (delq 'digit-value attributes))
971         )
972       (when (and (memq 'numeric-value attributes)
973                  (setq value (get-char-attribute char 'numeric-value)))
974         (insert (format "(numeric-value\t. %S)%s"
975                         value
976                         line-breaking))
977         (setq attributes (delq 'numeric-value attributes))
978         )))
979     (when (and (memq 'iso-10646-comment attributes)
980                (setq value (get-char-attribute char 'iso-10646-comment)))
981       (insert (format "(iso-10646-comment\t. %S)%s"
982                       value
983                       line-breaking))
984       (setq attributes (delq 'iso-10646-comment attributes))
985       )
986     (when (and (memq 'morohashi-daikanwa attributes)
987                (setq value (get-char-attribute char 'morohashi-daikanwa)))
988       (insert (format "(morohashi-daikanwa\t%s)%s"
989                       (mapconcat (function prin1-to-string) value " ")
990                       line-breaking))
991       (setq attributes (delq 'morohashi-daikanwa attributes))
992       )
993     (setq radical nil
994           strokes nil)
995     (when (and (memq 'ideographic-radical attributes)
996                (setq value (get-char-attribute char 'ideographic-radical)))
997       (setq radical value)
998       (insert (format "(ideographic-radical . %S)\t; %c%s"
999                       radical
1000                       (ideographic-radical radical)
1001                       line-breaking))
1002       (setq attributes (delq 'ideographic-radical attributes))
1003       )
1004     (when (and (memq 'shuowen-radical attributes)
1005                (setq value (get-char-attribute char 'shuowen-radical)))
1006       (insert (format "(shuowen-radical\t. %S)\t; %c%s"
1007                       value
1008                       (shuowen-radical value)
1009                       line-breaking))
1010       (setq attributes (delq 'shuowen-radical attributes))
1011       )
1012     (let (key)
1013       (dolist (domain
1014                (append
1015                 char-db-feature-domains
1016                 (let (dest domain)
1017                   (dolist (feature (char-attribute-list))
1018                     (setq feature (symbol-name feature))
1019                     (when (string-match
1020                            "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
1021                            feature)
1022                       (setq domain (intern (match-string 2 feature)))
1023                      (unless (memq domain dest)
1024                        (setq dest (cons domain dest)))))
1025                   (sort dest #'string<))))
1026         (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
1027         (when (and (memq key attributes)
1028                    (setq value (get-char-attribute char key)))
1029           (setq radical value)
1030           (insert (format "(%s . %S)\t; %c%s"
1031                           key
1032                           radical
1033                           (ideographic-radical radical)
1034                           line-breaking))
1035           (setq attributes (delq key attributes))
1036           )
1037         (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
1038         (when (and (memq key attributes)
1039                    (setq value (get-char-attribute char key)))
1040           (setq strokes value)
1041           (insert (format "(%s . %S)%s"
1042                           key
1043                           strokes
1044                           line-breaking))
1045           (setq attributes (delq key attributes))
1046           )
1047         (setq key (intern (format "%s@%s" 'total-strokes domain)))
1048         (when (and (memq key attributes)
1049                    (setq value (get-char-attribute char key)))
1050           (insert (format "(%s       . %S)%s"
1051                           key
1052                           value
1053                           line-breaking))
1054           (setq attributes (delq key attributes))
1055           )
1056         (dolist (feature '(ideographic-radical
1057                            ideographic-strokes
1058                            total-strokes))
1059           (setq key (intern (format "%s@%s*sources" feature domain)))
1060           (when (and (memq key attributes)
1061                      (setq value (get-char-attribute char key)))
1062             (insert (format "(%s%s" key line-breaking))
1063             (dolist (cell value)
1064               (insert (format " %s" cell)))
1065             (insert ")")
1066             (insert line-breaking)
1067             (setq attributes (delq key attributes))
1068             ))
1069         ))
1070     (when (and (memq 'ideographic-strokes attributes)
1071                (setq value (get-char-attribute char 'ideographic-strokes)))
1072       (setq strokes value)
1073       (insert (format "(ideographic-strokes . %S)%s"
1074                       strokes
1075                       line-breaking))
1076       (setq attributes (delq 'ideographic-strokes attributes))
1077       )
1078     (when (and (memq 'kangxi-radical attributes)
1079                (setq value (get-char-attribute char 'kangxi-radical)))
1080       (unless (eq value radical)
1081         (insert (format "(kangxi-radical\t . %S)\t; %c%s"
1082                         value
1083                         (ideographic-radical value)
1084                         line-breaking))
1085         (or radical
1086             (setq radical value)))
1087       (setq attributes (delq 'kangxi-radical attributes))
1088       )
1089     (when (and (memq 'kangxi-strokes attributes)
1090                (setq value (get-char-attribute char 'kangxi-strokes)))
1091       (unless (eq value strokes)
1092         (insert (format "(kangxi-strokes\t . %S)%s"
1093                         value
1094                         line-breaking))
1095         (or strokes
1096             (setq strokes value)))
1097       (setq attributes (delq 'kangxi-strokes attributes))
1098       )
1099     (when (and (memq 'japanese-radical attributes)
1100                (setq value (get-char-attribute char 'japanese-radical)))
1101       (unless (eq value radical)
1102         (insert (format "(japanese-radical\t . %S)\t; %c%s"
1103                         value
1104                         (ideographic-radical value)
1105                         line-breaking))
1106         (or radical
1107             (setq radical value)))
1108       (setq attributes (delq 'japanese-radical attributes))
1109       )
1110     (when (and (memq 'japanese-strokes attributes)
1111                (setq value (get-char-attribute char 'japanese-strokes)))
1112       (unless (eq value strokes)
1113         (insert (format "(japanese-strokes\t . %S)%s"
1114                         value
1115                         line-breaking))
1116         (or strokes
1117             (setq strokes value)))
1118       (setq attributes (delq 'japanese-strokes attributes))
1119       )
1120     (when (and (memq 'cns-radical attributes)
1121                (setq value (get-char-attribute char 'cns-radical)))
1122       (insert (format "(cns-radical\t . %S)\t; %c%s"
1123                       value
1124                       (ideographic-radical value)
1125                       line-breaking))
1126       (setq attributes (delq 'cns-radical attributes))
1127       )
1128     (when (and (memq 'cns-strokes attributes)
1129                (setq value (get-char-attribute char 'cns-strokes)))
1130       (unless (eq value strokes)
1131         (insert (format "(cns-strokes\t . %S)%s"
1132                         value
1133                         line-breaking))
1134         (or strokes
1135             (setq strokes value)))
1136       (setq attributes (delq 'cns-strokes attributes))
1137       )
1138     ;; (when (and (memq 'shinjigen-1-radical attributes)
1139     ;;            (setq value (get-char-attribute char 'shinjigen-1-radical)))
1140     ;;   (unless (eq value radical)
1141     ;;     (insert (format "(shinjigen-1-radical . %S)\t; %c%s"
1142     ;;                     value
1143     ;;                     (ideographic-radical value)
1144     ;;                     line-breaking))
1145     ;;     (or radical
1146     ;;         (setq radical value)))
1147     ;;   (setq attributes (delq 'shinjigen-1-radical attributes))
1148     ;;   )
1149     (when (and (memq 'ideographic- attributes)
1150                (setq value (get-char-attribute char 'ideographic-)))
1151       (insert "(ideographic-       ")
1152       (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1153             separator nil)
1154       (while (consp value)
1155         (setq cell (car value))
1156         (if (integerp cell)
1157             (setq cell (decode-char '=ucs cell)))
1158         (cond ((characterp cell)
1159                (if separator
1160                    (insert lbs))
1161                (if readable
1162                    (insert (format "%S" cell))
1163                  (char-db-insert-char-spec cell readable))
1164                (setq separator lbs))
1165               ((consp cell)
1166                (if separator
1167                    (insert lbs))
1168                (if (consp (car cell))
1169                    (char-db-insert-char-spec cell readable)
1170                  (char-db-insert-char-reference cell readable))
1171                (setq separator lbs))
1172               (t
1173                (if separator
1174                    (insert separator))
1175                (insert (prin1-to-string cell))
1176                (setq separator " ")))
1177         (setq value (cdr value)))
1178       (insert ")")
1179       (insert line-breaking)
1180       (setq attributes (delq 'ideographic- attributes)))
1181     (when (and (memq 'total-strokes attributes)
1182                (setq value (get-char-attribute char 'total-strokes)))
1183       (insert (format "(total-strokes       . %S)%s"
1184                       value
1185                       line-breaking))
1186       (setq attributes (delq 'total-strokes attributes))
1187       )
1188     (when (and (memq '->ideograph attributes)
1189                (setq value (get-char-attribute char '->ideograph)))
1190       (insert (format "(->ideograph\t%s)%s"
1191                       (mapconcat (lambda (code)
1192                                    (cond ((symbolp code)
1193                                           (symbol-name code))
1194                                          ((integerp code)
1195                                           (format "#x%04X" code))
1196                                          (t
1197                                           (format "%s %S"
1198                                                   line-breaking code))))
1199                                  value " ")
1200                       line-breaking))
1201       (setq attributes (delq '->ideograph attributes))
1202       )
1203     ;; (when (and (memq '->decomposition attributes)
1204     ;;            (setq value (get-char-attribute char '->decomposition)))
1205     ;;   (insert (format "(->decomposition\t%s)%s"
1206     ;;                   (mapconcat (lambda (code)
1207     ;;                                (cond ((symbolp code)
1208     ;;                                       (symbol-name code))
1209     ;;                                      ((characterp code)
1210     ;;                                       (if readable
1211     ;;                                           (format "%S" code)
1212     ;;                                         (format "#x%04X"
1213     ;;                                                 (char-int code))
1214     ;;                                         ))
1215     ;;                                      ((integerp code)
1216     ;;                                       (format "#x%04X" code))
1217     ;;                                      (t
1218     ;;                                       (format "%s%S" line-breaking code))))
1219     ;;                              value " ")
1220     ;;                   line-breaking))
1221     ;;   (setq attributes (delq '->decomposition attributes))
1222     ;;   )
1223     (if (equal (get-char-attribute char '->titlecase)
1224                (get-char-attribute char '->uppercase))
1225         (setq attributes (delq '->titlecase attributes)))
1226     (when (and (memq '->mojikyo attributes)
1227                (setq value (get-char-attribute char '->mojikyo)))
1228       (insert (format "(->mojikyo\t\t. %06d)\t; %c%s"
1229                       value (decode-char 'mojikyo value)
1230                       line-breaking))
1231       (setq attributes (delq '->mojikyo attributes))
1232       )
1233     (when (and (memq 'hanyu-dazidian-vol attributes)
1234                (setq value (get-char-attribute char 'hanyu-dazidian-vol)))
1235       (insert (format "(hanyu-dazidian-vol  . %d)%s"
1236                       value line-breaking))
1237       (setq attributes (delq 'hanyu-dazidian-vol attributes))
1238       )
1239     (when (and (memq 'hanyu-dazidian-page attributes)
1240                (setq value (get-char-attribute char 'hanyu-dazidian-page)))
1241       (insert (format "(hanyu-dazidian-page . %d)%s"
1242                       value line-breaking))
1243       (setq attributes (delq 'hanyu-dazidian-page attributes))
1244       )
1245     (when (and (memq 'hanyu-dazidian-char attributes)
1246                (setq value (get-char-attribute char 'hanyu-dazidian-char)))
1247       (insert (format "(hanyu-dazidian-char . %d)%s"
1248                       value line-breaking))
1249       (setq attributes (delq 'hanyu-dazidian-char attributes))
1250       )
1251     (unless readable
1252       (dolist (ignored '(composition
1253                          ->denotational <-subsumptive ->ucs-unified
1254                          ->ideographic-component-forms))
1255         (setq attributes (delq ignored attributes))))
1256     (while attributes
1257       (setq name (car attributes))
1258       (unless (eq (setq value (get-char-attribute char name 'value-is-empty))
1259                   'value-is-empty)
1260         (cond ((setq ret (find-charset name))
1261                (setq name (charset-name ret))
1262                (when (not (memq name dest-ccss))
1263                  (setq dest-ccss (cons name dest-ccss))
1264                  (char-db-insert-ccs-feature name value line-breaking))
1265                )
1266               ((string-match "^=>ucs@" (symbol-name name))
1267                (insert (format "(%-18s . #x%04X)\t; %c%s"
1268                                name value (decode-char '=ucs value)
1269                                line-breaking))
1270                )
1271               ((eq name 'jisx0208-1978/4X)
1272                (insert (format "(%-18s . #x%04X)%s"
1273                                name value
1274                                line-breaking))
1275                )
1276               ((and
1277                 (not readable)
1278                 (not (eq name '->subsumptive))
1279                 (not (eq name '->uppercase))
1280                 (not (eq name '->lowercase))
1281                 (not (eq name '->titlecase))
1282                 (not (eq name '->canonical))
1283                 (not (eq name '->Bopomofo))
1284                 (not (eq name '->mistakable))
1285                 (not (eq name '->ideographic-variants))
1286                 (null (get-char-attribute
1287                        char (intern (format "%s*sources" name))))
1288                 (not (string-match "\\*sources$" (symbol-name name)))
1289                 (null (get-char-attribute
1290                        char (intern (format "%s*note" name))))
1291                 (not (string-match "\\*note$" (symbol-name name)))
1292                 (or (eq name '<-identical)
1293                     (eq name '<-uppercase)
1294                     (eq name '<-lowercase)
1295                     (eq name '<-titlecase)
1296                     (eq name '<-canonical)
1297                     (eq name '<-ideographic-variants)
1298                     ;; (eq name '<-synonyms)
1299                     (string-match "^<-synonyms" (symbol-name name))
1300                     (eq name '<-mistakable)
1301                     (when (string-match "^->" (symbol-name name))
1302                       (cond
1303                        ((string-match "^->fullwidth" (symbol-name name))
1304                         (not (and (consp value)
1305                                   (characterp (car value))
1306                                   (encode-char
1307                                    (car value) '=ucs 'defined-only)))
1308                         )
1309                        (t)))
1310                     ))
1311                )
1312               ((or (eq name 'ideographic-structure)
1313                    (eq name 'ideographic-combination)
1314                    (eq name 'ideographic-)
1315                    (eq name '=decomposition)
1316                    (char-feature-base-name= '=decomposition name)
1317                    (char-feature-base-name= '=>decomposition name)
1318                    ;; (string-match "^=>*decomposition\\(@[^*]+\\)?$"
1319                    ;;               (symbol-name name))
1320                    (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name name))
1321                    (string-match "^\\(->\\|<-\\)[^*]*\\*sources$"
1322                                  (symbol-name name))
1323                    )
1324                (char-db-insert-relation-feature char name value
1325                                                 line-breaking
1326                                                 ccss readable))
1327               ((memq name '(ideograph=
1328                             original-ideograph-of
1329                             ancient-ideograph-of
1330                             vulgar-ideograph-of
1331                             wrong-ideograph-of
1332                             ;; simplified-ideograph-of
1333                             ideographic-variants
1334                             ;; ideographic-different-form-of
1335                             ))
1336                (insert (format "(%-18s%s " name line-breaking))
1337                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1338                      separator nil)
1339                (while (consp value)
1340                  (setq cell (car value))
1341                  (if (and (consp cell)
1342                           (consp (car cell)))
1343                      (progn
1344                        (if separator
1345                            (insert lbs))
1346                        (char-db-insert-alist cell readable)
1347                        (setq separator lbs))
1348                    (if separator
1349                        (insert separator))
1350                    (insert (prin1-to-string cell))
1351                    (setq separator " "))
1352                  (setq value (cdr value)))
1353                (insert ")")
1354                (insert line-breaking))
1355               ((consp value)
1356                (insert (format "(%-18s " name))
1357                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1358                      separator nil)
1359                (while (consp value)
1360                  (setq cell (car value))
1361                  (if (and (consp cell)
1362                           (consp (car cell))
1363                           (setq ret (condition-case nil
1364                                         (find-char cell)
1365                                       (error nil))))
1366                      (progn
1367                        (setq rest cell
1368                              al nil
1369                              cal nil)
1370                        (while rest
1371                          (setq key (car (car rest)))
1372                          (if (find-charset key)
1373                              (setq cal (cons key cal))
1374                            (setq al (cons key al)))
1375                          (setq rest (cdr rest)))
1376                        (if separator
1377                            (insert lbs))
1378                        (insert-char-attributes ret
1379                                                readable
1380                                                al ; cal
1381                                                nil 'for-sub-node)
1382                        (setq separator lbs))
1383                    (setq ret (prin1-to-string cell))
1384                    (if separator
1385                        (if (< (+ (current-column)
1386                                  (length ret)
1387                                  (length separator))
1388                               76)
1389                            (insert separator)
1390                          (insert lbs)))
1391                    (insert ret)
1392                    (setq separator " "))
1393                  (setq value (cdr value)))
1394                (insert ")")
1395                (insert line-breaking))
1396               (t
1397                (insert (format "(%-18s" name))
1398                (setq ret (prin1-to-string value))
1399                (unless (< (+ (current-column)
1400                              (length ret)
1401                              3)
1402                           76)
1403                  (insert line-breaking))
1404                (insert " . " ret ")" line-breaking)
1405                ;; (insert (format "(%-18s . %S)%s"
1406                ;;                 name value
1407                ;;                 line-breaking))
1408                )
1409               ))
1410       (setq attributes (cdr attributes)))
1411     (insert ")")))
1412
1413 (defun insert-char-data (char &optional readable
1414                               attributes)
1415   (save-restriction
1416     (narrow-to-region (point)(point))
1417     (insert "(define-char
1418   '")
1419     (insert-char-attributes char readable attributes)
1420     (insert ")\n")
1421     (goto-char (point-min))
1422     (while (re-search-forward "[ \t]+$" nil t)
1423       (replace-match ""))
1424     ;; from tabify.
1425     (goto-char (point-min))
1426     (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
1427       (let ((column (current-column))
1428             (indent-tabs-mode t))
1429         (delete-region (match-beginning 0) (point))
1430         (indent-to column)))
1431     (goto-char (point-max))
1432     ;; (tabify (point-min)(point-max))
1433     ))
1434
1435 (defun insert-char-data-with-variant (char &optional printable
1436                                            no-ucs-unified
1437                                            script excluded-script)
1438   (insert-char-data char printable)
1439   (let ((variants (char-variants char))
1440         rest
1441         variant vs ret)
1442     (setq variants (sort variants #'<))
1443     (setq rest variants)
1444     (setq variants (cons char variants))
1445     (while rest
1446       (setq variant (car rest))
1447       (unless (get-char-attribute variant '<-subsumptive)
1448         (if (and (or (null script)
1449                      (null (setq vs (get-char-attribute variant 'script)))
1450                      (memq script vs))
1451                  (or (null excluded-script)
1452                      (null (setq vs (get-char-attribute variant 'script)))
1453                      (not (memq excluded-script vs))))
1454             (unless (and no-ucs-unified (get-char-attribute variant '=ucs))
1455               (insert-char-data variant printable)
1456               (if (setq ret (char-variants variant))
1457                   (while ret
1458                     (or (memq (car ret) variants)
1459                         ;; (get-char-attribute (car ret) '<-subsumptive)
1460                         (setq rest (nconc rest (list (car ret)))))
1461                     (setq ret (cdr ret)))))))
1462       (setq rest (cdr rest)))))
1463
1464 (defun insert-char-range-data (min max &optional script excluded-script)
1465   (let ((code min)
1466         char)
1467     (while (<= code max)
1468       (setq char (decode-char '=ucs code))
1469       (if (encode-char char '=ucs 'defined-only)
1470           (insert-char-data-with-variant char nil 'no-ucs-unified
1471                                          script excluded-script))
1472       (setq code (1+ code)))))
1473
1474 (defun write-char-range-data-to-file (min max file
1475                                           &optional script excluded-script)
1476   (let ((coding-system-for-write char-db-file-coding-system))
1477     (with-temp-buffer
1478       (insert (format ";; -*- coding: %s -*-\n"
1479                       char-db-file-coding-system))
1480       (insert-char-range-data min max script excluded-script)
1481       (write-region (point-min)(point-max) file))))
1482
1483 (defvar what-character-original-window-configuration)
1484
1485 ;;;###autoload
1486 (defun what-char-definition (char)
1487   (interactive (list (char-after)))
1488   (let ((buf (get-buffer-create "*Character Description*"))
1489         (the-buf (current-buffer))
1490         (win-conf (current-window-configuration)))
1491     (pop-to-buffer buf)
1492     (make-local-variable 'what-character-original-window-configuration)
1493     (setq what-character-original-window-configuration win-conf)
1494     (setq buffer-read-only nil)
1495     (erase-buffer)
1496     (condition-case err
1497         (progn
1498           (insert-char-data-with-variant char 'printable)
1499           (unless (char-attribute-alist char)
1500             (insert (format ";; = %c\n"
1501                             (let* ((rest (split-char char))
1502                                    (ccs (pop rest))
1503                                    (code (pop rest)))
1504                               (while rest
1505                                 (setq code (logior (lsh code 8)
1506                                                    (pop rest))))
1507                               (decode-char ccs code)))))
1508           ;; (char-db-update-comment)
1509           (set-buffer-modified-p nil)
1510           (view-mode the-buf (lambda (buf)
1511                                (set-window-configuration
1512                                 what-character-original-window-configuration)
1513                                ))
1514           (goto-char (point-min)))
1515       (error (progn
1516                (set-window-configuration
1517                 what-character-original-window-configuration)
1518                (signal (car err) (cdr err)))))))
1519
1520
1521 ;;; @ end
1522 ;;;
1523
1524 (provide 'char-db-util)
1525
1526 ;;; char-db-util.el ends here