(U+56E1): Apply new conventions for glyph granularity.
[chise/xemacs-chise.git.1] / lisp / utf-2000 / char-db-util.el
1 ;;; char-db-util.el --- Character Database utility -*- coding: utf-8-er; -*-
2
3 ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
4 ;;   2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 MORIOKA Tomohiko.
5
6 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
7 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
8
9 ;; This file is part of XEmacs CHISE.
10
11 ;; XEmacs CHISE is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
15
16 ;; XEmacs CHISE is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
23 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Code:
27
28 (require 'chise-subr)
29 (require 'ideograph-subr)
30
31 (defconst unidata-normative-category-alist
32   '(("Lu" letter        uppercase)
33     ("Ll" letter        lowercase)
34     ("Lt" letter        titlecase)
35     ("Mn" mark          non-spacing)
36     ("Mc" mark          spacing-combining)
37     ("Me" mark          enclosing)
38     ("Nd" number        decimal-digit)
39     ("Nl" number        letter)
40     ("No" number        other)
41     ("Zs" separator     space)
42     ("Zl" separator     line)
43     ("Zp" separator     paragraph)
44     ("Cc" other         control)
45     ("Cf" other         format)
46     ("Cs" other         surrogate)
47     ("Co" other         private-use)
48     ("Cn" other         not-assigned)))
49
50 (defconst unidata-informative-category-alist
51   '(("Lm" letter        modifier)
52     ("Lo" letter        other)
53     ("Pc" punctuation   connector)
54     ("Pd" punctuation   dash)
55     ("Ps" punctuation   open)
56     ("Pe" punctuation   close)
57     ("Pi" punctuation   initial-quote)
58     ("Pf" punctuation   final-quote)
59     ("Po" punctuation   other)
60     ("Sm" symbol        math)
61     ("Sc" symbol        currency)
62     ("Sk" symbol        modifier)
63     ("So" symbol        other)
64     ))
65
66 (defconst shuowen-radicals
67   [?一 ?上 ?示 ?三 ?王 ?玉 ?玨 ?气 ?士 ?丨 ; 010
68    ?屮 ?艸 ?蓐 ?茻 ?小 ?八 ?釆 ?半 ?牛 ?犛 ; 020
69    ?告 ?口 ?凵 ?吅 ?哭 ?走 ?止 ?癶 ?步 ?此 ; 030
70    ?正 ?是 ?辵 ?彳 ?廴 ?㢟 ?行 ?齒 ?牙 ?足 ; 040
71    ?疋 ?品 ?龠 ?冊 ?㗊 ?舌 ?干 ?谷 ?只 ?㕯 ; 050
72    ?句 ?丩 ?古 ?十 ?卅 ?言 ?誩 ?音 ?䇂 ?丵 ; 060
73    ?菐 ?𠬞 ?𠬜 ?共 ?異 ?舁 ?𦥑 ?䢅 ?爨 ?革 ; 070
74    ?鬲 ?䰜 ?爪 ?𠃨 ?鬥 ?又 ?𠂇 ?㕜 ?支 ?𦘒 ; 080
75    ?聿 ?畫 ?隶 ?臤 ?臣 ?殳 ?殺 ?𠘧 ?寸 ?皮 ; 090
76    ?㼱 ?攴 ?敎 ?卜 ?用 ?爻 ?㸚 ?𥄎 ?目 ?䀠 ; 100
77    ?眉 ?盾 ?自 ?白 ?鼻 ?皕 ?習 ?羽 ?隹 ?奞 ; 110
78    ?萑 ?𦫳 ?苜 ?羊 ?羴 ?瞿 ?雔 ?雥 ?鳥 ?烏 ; 120
79    ?𠦒 ?冓 ?幺 ?𢆶 ?叀 ?玄 ?予 ?放 ?𠬪 ?𣦼 ; 130
80    ?歺 ?死 ?冎 ?骨 ?肉 ?筋 ?刀 ?刃 ?㓞 ?丰 ; 140
81    ?耒 ?𧢲 ?竹 ?箕 ?丌 ?左 ?工 ?㠭 ?巫 ?甘 ; 150
82    ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?旨 ?喜 ?壴 ; 160
83 ;  ?旨 ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?喜 ?壴 ; 160
84    ?鼓 ?豈 ?豆 ?豊 ?豐 ?䖒 ?虍 ?虎 ?虤 ?皿 ; 170
85    ?𠙴 ?去 ?血 ?丶 ?丹 ?青 ?井 ?皀 ?鬯 ?食 ; 180
86    ?亼 ?會 ?倉 ?入 ?缶 ?矢 ?高 ?冂 ?𩫏 ?京 ; 190
87    ?亯 ?𣆪 ?畗 ?㐭 ?嗇 ?來 ?麥 ?夊 ?舛 ?䑞 ; 200
88    ?韋 ?弟 ?夂 ?久 ?桀 ?木 ?東 ?林 ?才 ?叒 ; 210
89    ?之 ?帀 ?出 ?𣎵 ?生 ?乇 ?𠂹 ?𠌶 ?華 ?𥝌 ; 220
90    ?稽 ?巢 ?桼 ?束 ?㯻 ?囗 ?員 ?貝 ?邑 ?𨛜 ; 230
91    ?日 ?旦 ?倝 ?㫃 ?冥 ?晶 ?月 ?有 ?明 ?囧 ; 240
92    ?夕 ?多 ?毌 ?𢎘 ?𣐺 ?卣 ?齊 ?朿 ?片 ?鼎 ; 250
93    ?克 ?彔 ?禾 ?秝 ?黍 ?香 ?米 ?毇 ?臼 ?凶 ; 260
94    ?𣎳 ?林 ?麻 ?尗 ?耑 ?韭 ?瓜 ?瓠 ?宀 ?宮 ; 270
95    ?呂 ?穴 ?㝱 ?𤕫 ?冖 ?𠔼 ?冃 ?㒳 ?网 ?襾 ; 280
96    ?巾 ?巿 ?帛 ?白 ?㡀 ?黹 ?人 ?𠤎 ?匕 ?从 ; 290
97    ?比 ?北 ?丘 ?㐺 ?𡈼 ?重 ?臥 ?身 ?㐆 ?衣 ; 300
98    ?裘 ?老 ?毛 ?毳 ?尸 ?尺 ?尾 ?履 ?舟 ?方 ; 310
99    ?儿 ?兄 ?兂 ?皃 ?𠑹 ?先 ?秃 ?見 ?覞 ?欠 ; 320
100    ?㱃 ?㳄 ?旡 ?頁 ?𦣻 ?面 ?丏 ?首 ?𥄉 ?須 ; 330
101    ])
102
103 (defun shuowen-radical (number)
104   (aref shuowen-radicals (1- number)))
105
106 (defvar char-db-file-coding-system 'utf-8-mcs-er)
107
108 (defvar char-db-ignored-attributes '(ideographic-products))
109
110 (defvar char-db-coded-charset-priority-list
111   '(ascii
112     control-1
113     latin-iso8859-1
114     latin-iso8859-2
115     latin-iso8859-3
116     latin-iso8859-4
117     latin-iso8859-9
118     latin-jisx0201
119     cyrillic-iso8859-5
120     greek-iso8859-7
121     thai-tis620
122     ;; =mj
123     ;; =adobe-japan1-0
124     ;; =adobe-japan1-1
125     ;; =adobe-japan1-2
126     ;; =adobe-japan1-3
127     ;; =adobe-japan1-4
128     ;; =adobe-japan1-5
129     ;; =adobe-japan1-6
130     =jis-x0208
131     =jis-x0208@1978
132     =jis-x0208@1983
133     japanese-jisx0212
134     chinese-gb2312
135     =jis-x0208@1990
136     chinese-cns11643-1
137     chinese-cns11643-2
138     chinese-cns11643-3
139     chinese-cns11643-4
140     chinese-cns11643-5
141     chinese-cns11643-6
142     chinese-cns11643-7
143     =jis-x0213-1
144     =jis-x0213-1@2000
145     =jis-x0213-1@2004
146     =jis-x0213-2
147     korean-ksc5601
148     chinese-isoir165
149     katakana-jisx0201
150     hebrew-iso8859-8
151     chinese-gb12345
152     latin-viscii
153     ethiopic-ucs
154     =big5-cdp
155     =hanyo-denshi/ja
156     =hanyo-denshi/jb
157     =hanyo-denshi/jc
158     =hanyo-denshi/jd
159     =hanyo-denshi/ft
160     =hanyo-denshi/ia
161     =hanyo-denshi/ib
162     =hanyo-denshi/hg
163     =hanyo-denshi/jt
164     =hanyo-denshi/ks
165     ;; ==mj
166     ;; ==adobe-japan1-0
167     ;; ==adobe-japan1-1
168     ;; ==adobe-japan1-2
169     ;; ==adobe-japan1-3
170     ;; ==adobe-japan1-4
171     ;; ==adobe-japan1-5
172     ;; ==adobe-japan1-6
173     ==jis-x0208
174     ==jis-x0213-1
175     ==jis-x0213-2
176     ==hanyo-denshi/ja
177     ==hanyo-denshi/jb
178     ==hanyo-denshi/jc
179     ==hanyo-denshi/ft
180     ==hanyo-denshi/ib
181     ==hanyo-denshi/hg
182     ==hanyo-denshi/jt
183     ==hanyo-denshi/ks
184     =gt
185     =gt-k
186     =daikanwa
187     =daikanwa@rev2
188     =daikanwa@rev1
189     ==daikanwa
190     =cbeta
191     ideograph-hanziku-1
192     ideograph-hanziku-2
193     ideograph-hanziku-3
194     ideograph-hanziku-4
195     ideograph-hanziku-5
196     ideograph-hanziku-6
197     ideograph-hanziku-7
198     ideograph-hanziku-8
199     ideograph-hanziku-9
200     ideograph-hanziku-10
201     ideograph-hanziku-11
202     ideograph-hanziku-12
203     ;; =>>>adobe-japan1-0
204     ;; =>>>adobe-japan1-1
205     ;; =>>>adobe-japan1-2
206     ;; =>>>adobe-japan1-3
207     ;; =>>>adobe-japan1-4
208     ;; =>>>adobe-japan1-5
209     ;; =>>>adobe-japan1-6
210     ;; =>>>jis-x0208
211     ;; =>>>jis-x0213-1
212     ;; =>>>jis-x0213-2
213     ;; =>>>hanyo-denshi/ja
214     ;; =>>>hanyo-denshi/jb
215     ;; =>>>hanyo-denshi/jc
216     ;; =>>>hanyo-denshi/ft
217     ;; =>>>hanyo-denshi/ib
218     ;; =>>>hanyo-denshi/hg
219     ;; =>>>hanyo-denshi/jt
220     ;; =>>>hanyo-denshi/ks
221     ;; =>>>gt
222     =>>adobe-japan1-0
223     =>>adobe-japan1-1
224     =>>adobe-japan1-2
225     =>>adobe-japan1-3
226     =>>adobe-japan1-4
227     =>>adobe-japan1-5
228     =>>adobe-japan1-6
229     =>>jis-x0208
230     =>>jis-x0213-1
231     =>>jis-x0213-1@2000
232     =>>jis-x0213-1@2004
233     =>>jis-x0213-2
234     =>>jis-x0208@1978
235     =>>hanyo-denshi/ft
236     =>>hanyo-denshi/jt
237     =>>hanyo-denshi/ks
238     =>>gt
239     =>>daikanwa
240     =>>cbeta
241     =+>jis-x0208
242     =+>jis-x0213-1
243     =+>jis-x0213-2
244     =+>adobe-japan1-0
245     =+>adobe-japan1-1
246     =+>adobe-japan1-2
247     =+>adobe-japan1-3
248     =+>adobe-japan1-4
249     =+>adobe-japan1-5
250     =+>adobe-japan1-6
251     =+>jis-x0208@1978
252     =>jis-x0208
253     =>jis-x0208@1997
254     =>jis-x0213-1
255     =>jis-x0213-1@2000
256     =>jis-x0213-1@2004
257     =>jis-x0213-2
258     ==>ucs@bucs
259     =>ucs@hanyo-denshi
260     =>ucs@iso
261     =>ucs@unicode
262     =>ucs@jis
263     =>ucs@cns
264     =>ucs@ks
265     =+>ucs@iso
266     =+>ucs@unicode
267     =+>ucs@jis
268     =+>ucs@jis/1990
269     =+>ucs@cns
270     =+>ucs@ks
271     =>>ucs@iso
272     =>>ucs@unicode
273     =>>ucs@jis
274     =>>ucs@cns
275     =>>>ucs@iso
276     =>>>ucs@unicode
277     ==ucs@iso
278     ==ucs@unicode
279     ==gb2312
280     ==ks-x1001
281     ==cns11643-1
282     ==cns11643-2
283     ==cns11643-3
284     ==gt
285     ==jis-x0208@1990
286     ==gt-k
287     =ucs@iso
288     =ucs@unicode
289     =ucs@cns
290     =>>big5-cdp
291     =>>gt-k
292     =+>gt
293     =>gt
294     =>big5-cdp
295     =>daikanwa
296     =>daikanwa/ho
297     =>cns11643-7
298     =big5
299     =big5-eten
300     =>gt-k
301     =zinbun-oracle
302     =>zinbun-oracle
303     =ruimoku-v6
304     =>>ruimoku-v6
305     =jef-china3
306     =shinjigen
307     =big5-cdp-var-3
308     =big5-cdp-var-5))
309
310
311 ;;; @ char-db formatters
312 ;;;
313
314 (defun char-db-make-char-spec (char)
315   (let (ret char-spec)
316     (cond ((characterp char)
317            (cond ((and (setq ret (encode-char char '=ucs 'defined-only))
318                        (not (and (<= #xE000 ret)(<= ret #xF8FF))))
319                   (setq char-spec (list (cons '=ucs ret)))
320                   (cond ((setq ret (get-char-attribute char 'name))
321                          (setq char-spec (cons (cons 'name ret) char-spec))
322                          )
323                         ((setq ret (get-char-attribute char 'name*))
324                          (setq char-spec (cons (cons 'name* ret) char-spec))
325                          ))
326                   )
327                  ((encode-char char '=mj 'defined-only)
328                   (setq char-spec nil)
329                   (dolist (ccs (charset-list))
330                     (if (and (or (eq ccs '=mj)
331                                  ;; (eq (charset-property ccs 'iso-ir) 177)
332                                  (string-match "=ucs@" (symbol-name ccs))
333                                  )
334                              (setq ccs (charset-name ccs))
335                              (null (assq ccs char-spec))
336                              (setq ret (encode-char char ccs 'defined-only)))
337                         (setq char-spec (cons (cons ccs ret) char-spec))))
338                   )
339                  ((encode-char char '==mj 'defined-only)
340                   (setq char-spec nil)
341                   (dolist (ccs (charset-list))
342                     (if (and (or (eq ccs '==mj)
343                                  ;; (eq (charset-property ccs 'iso-ir) 177)
344                                  (string-match "=ucs@" (symbol-name ccs))
345                                  )
346                              (setq ccs (charset-name ccs))
347                              (null (assq ccs char-spec))
348                              (setq ret (encode-char char ccs 'defined-only)))
349                         (setq char-spec (cons (cons ccs ret) char-spec))))
350                   )
351                  ((encode-char char '=adobe-japan1 'defined-only)
352                   (setq char-spec nil)
353                   (dolist (ccs (charset-list))
354                     (if (and (or (memq ccs
355                                        '(=adobe-japan1-0
356                                          =adobe-japan1-1
357                                          =adobe-japan1-2
358                                          =adobe-japan1-3
359                                          =adobe-japan1-4
360                                          =adobe-japan1-5
361                                          =adobe-japan1-6
362                                          ))
363                                  ;; (eq (charset-property ccs 'iso-ir) 177)
364                                  (string-match "=ucs@" (symbol-name ccs))
365                                  )
366                              (setq ccs (charset-name ccs))
367                              (null (assq ccs char-spec))
368                              (setq ret (encode-char char ccs 'defined-only)))
369                         (setq char-spec (cons (cons ccs ret) char-spec))))
370                   )
371                  ((encode-char char '==adobe-japan1 'defined-only)
372                   (setq char-spec nil)
373                   (dolist (ccs (charset-list))
374                     (if (and (or (memq ccs
375                                        '(==adobe-japan1-0
376                                          ==adobe-japan1-1
377                                          ==adobe-japan1-2
378                                          ==adobe-japan1-3
379                                          ==adobe-japan1-4
380                                          ==adobe-japan1-5
381                                          ==adobe-japan1-6
382                                          ))
383                                  ;; (eq (charset-property ccs 'iso-ir) 177)
384                                  (string-match "=ucs@" (symbol-name ccs))
385                                  )
386                              (setq ccs (charset-name ccs))
387                              (null (assq ccs char-spec))
388                              (setq ret (encode-char char ccs 'defined-only)))
389                         (setq char-spec (cons (cons ccs ret) char-spec))))
390                   )
391                  ((setq ret
392                         (catch 'tag
393                           (let ((rest char-db-coded-charset-priority-list)
394                                 ccs)
395                             (while rest
396                               (setq ccs (charset-name
397                                          (find-charset (car rest))))
398                               (if (setq ret
399                                         (encode-char char ccs
400                                                      'defined-only))
401                                   (throw 'tag (cons ccs ret)))
402                               (setq rest (cdr rest))))))
403                   (setq char-spec (list ret))
404                   (dolist (ccs (delq (car ret) (charset-list)))
405                     (if (and (or (charset-iso-final-char ccs)
406                                  (memq ccs
407                                        '(=daikanwa
408                                          =daikanwa@rev2
409                                          ;; =gt-k
410                                          =jis-x0208@1997
411                                          ))
412                                  (eq (charset-property ccs 'iso-ir) 177)
413                                  ;; (string-match "=ucs@" (symbol-name ccs))
414                                  )
415                              (setq ccs (charset-name ccs))
416                              (null (assq ccs char-spec))
417                              (setq ret (encode-char char ccs 'defined-only)))
418                         (setq char-spec (cons (cons ccs ret) char-spec))))
419                   (if (null char-spec)
420                       (setq char-spec (split-char char)))
421                   (cond ((setq ret (get-char-attribute char 'name))
422                          (setq char-spec (cons (cons 'name ret) char-spec))
423                          )
424                         ((setq ret (get-char-attribute char 'name*))
425                          (setq char-spec (cons (cons 'name* ret) char-spec))
426                          ))
427                   )
428                  ((setq ret (get-char-attribute
429                              char 'ideographic-combination))
430                   (setq char-spec
431                         (cons (cons 'ideographic-combination ret)
432                               char-spec))
433                   ))
434            char-spec)
435           ((consp char)
436            char))))
437     
438 (defun char-db-insert-char-spec (char &optional readable column
439                                       required-features)
440   (unless column
441     (setq column (current-column)))
442   (let (char-spec temp-char)
443     (setq char-spec (char-db-make-char-spec char))
444     (unless (or (characterp char) ; char
445                 (condition-case nil
446                     (setq char (find-char char-spec))
447                   (error nil)))
448       ;; define temporary character
449       ;;   Current implementation is dirty.
450       (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
451                                          char-spec)))
452       (remove-char-attribute temp-char 'ideograph-daikanwa)
453       (setq char temp-char))
454     (insert-char-attributes char
455                             readable
456                             (union (mapcar #'car char-spec)
457                                    required-features)
458                             nil 'for-sub-node)
459     (when temp-char
460       ;; undefine temporary character
461       ;;   Current implementation is dirty.
462       (setq char-spec (char-attribute-alist temp-char))
463       (while char-spec
464         (remove-char-attribute temp-char (car (car char-spec)))
465         (setq char-spec (cdr char-spec))))))
466
467 (defun char-db-insert-alist (alist &optional readable column)
468   (unless column
469     (setq column (current-column)))
470   (let ((line-breaking
471          (concat "\n" (make-string (1+ column) ?\ )))
472         name value
473         ret al ; cal
474         key
475         lbs cell rest separator)
476     (insert "(")
477     (while alist
478       (setq name (car (car alist))
479             value (cdr (car alist)))
480       (cond ((eq name 'char)
481              (insert "(char . ")
482              (if (setq ret (condition-case nil
483                                (find-char value)
484                              (error nil)))
485                  (progn
486                    (setq al nil
487                          ;; cal nil
488                          )
489                    (while value
490                      (setq key (car (car value)))
491                      ;; (if (find-charset key)
492                      ;;     (setq cal (cons key cal))
493                      (setq al (cons key al))
494                      ;; )
495                      (setq value (cdr value)))
496                    (insert-char-attributes ret
497                                            readable
498                                            (or al 'none) ; cal
499                                            nil 'for-sub-node))
500                (insert (prin1-to-string value)))
501              (insert ")")
502              (insert line-breaking))
503             ((consp value)
504              (insert (format "(%-18s " name))
505              (setq lbs (concat "\n" (make-string (current-column) ?\ )))
506              (while (consp value)
507                (setq cell (car value))
508                (if (and (consp cell)
509                         (consp (car cell))
510                         (setq ret (condition-case nil
511                                       (find-char cell)
512                                     (error nil)))
513                         )
514                    (progn
515                      (setq rest cell
516                            al nil
517                            ;; cal nil
518                            )
519                      (while rest
520                        (setq key (car (car rest)))
521                        ;; (if (find-charset key)
522                        ;;     (setq cal (cons key cal))
523                        (setq al (cons key al))
524                        ;; )
525                        (setq rest (cdr rest)))
526                      (if separator
527                          (insert lbs))
528                      (insert-char-attributes ret
529                                              readable
530                                              al ; cal
531                                              nil 'for-sub-node)
532                      (setq separator lbs))
533                  (if separator
534                      (insert separator))
535                  (insert (prin1-to-string cell))
536                  (setq separator " "))
537                (setq value (cdr value)))
538              (insert ")")
539              (insert line-breaking))
540             (t
541              (insert (format "(%-18s . %S)%s"
542                              name value
543                              line-breaking))))
544       (setq alist (cdr alist))))
545   (insert ")"))
546
547 (defun char-db-insert-char-reference (plist &optional readable column)
548   (unless column
549     (setq column (current-column)))
550   (let ((line-breaking
551          (concat "\n" (make-string (1+ column) ?\ )))
552         (separator "")
553         name value)
554     (insert "(")
555     (while plist
556       (setq name (pop plist))
557       (setq value (pop plist))
558       (cond ((eq name :char)
559              (insert separator)
560              (insert ":char\t")
561              (cond ((numberp value)
562                     (setq value (decode-char '=ucs value)))
563                    ;; ((consp value)
564                    ;;  (setq value (or (find-char value)
565                    ;;                  value)))
566                    )
567              (char-db-insert-char-spec value readable)
568              (insert line-breaking)
569              (setq separator ""))
570             ((eq name :radical)
571              (insert (format "%s%s\t%d ; %c%s"
572                              separator
573                              name value
574                              (ideographic-radical value)
575                              line-breaking))
576              (setq separator ""))
577             (t
578              (insert (format "%s%s\t%S" separator name value))
579              (setq separator line-breaking)))
580       ))
581   (insert ")"))
582
583 (defun char-db-decode-isolated-char (ccs code-point)
584   (let (ret)
585     (setq ret
586           (cond ((eq ccs 'arabic-iso8859-6)
587                  (decode-char ccs code-point))
588                 ;; ((eq ccs '=gt)
589                 ;;  (decode-builtin-char '==gt code-point))
590                 ((and (memq ccs '(=gt-pj-1
591                                   =gt-pj-2
592                                   =gt-pj-3
593                                   =gt-pj-4
594                                   =gt-pj-5
595                                   =gt-pj-6
596                                   =gt-pj-7
597                                   =gt-pj-8
598                                   =gt-pj-9
599                                   =gt-pj-10
600                                   =gt-pj-11))
601                       (setq ret (decode-char ccs code-point))
602                       (setq ret (encode-char ret '=gt 'defined-only)))
603                  (decode-builtin-char '=gt ret))
604                 (t
605                  (decode-builtin-char ccs code-point))))
606     (cond ((and (<= 0 (char-int ret))
607                 (<= (char-int ret) #x1F))
608            (decode-char '=ucs (+ #x2400 (char-int ret))))
609           ((= (char-int ret) #x7F)
610            ?\u2421)
611           (t ret))))
612
613 (defvar char-db-convert-obsolete-format t)
614
615 (defun char-db-insert-ccs-feature (name value line-breaking)
616   (cond
617    ((integerp value)
618     (insert
619      (format
620       (cond
621        ((memq name '(==shinjigen
622                      =shinjigen
623                      =shinjigen@1ed ==shinjigen@1ed
624                      =shinjigen@rev ==shinjigen@rev
625                      =shinjigen/+p@rev ==shinjigen/+p@rev
626                      ==daikanwa/ho
627                      =daikanwa/ho =>>daikanwa/ho =>daikanwa/ho))
628         "(%-18s .  %04d)\t; %c")
629        ((eq name '=shinjigen@1ed/24pr)
630         "(%-18s . %04d)\t; %c")
631        ((or (memq name '(==daikanwa
632                          =daikanwa =>>daikanwa =>daikanwa
633                          =daikanwa@rev1 =daikanwa@rev2
634                          =daikanwa/+p ==daikanwa/+p =>>daikanwa/+p
635                          =daikanwa/+2p =>>daikanwa/+2p
636                          =gt ==gt ; =>>>gt
637                          =>>gt =+>gt =>gt
638                          =gt-k ==gt-k =>>gt-k =>gt-k
639                          =adobe-japan1-0 ==adobe-japan1-0 ; =>>>adobe-japan1-0
640                          =adobe-japan1-1 ==adobe-japan1-1 ; =>>>adobe-japan1-1
641                          =adobe-japan1-2 ==adobe-japan1-2 ; =>>>adobe-japan1-2
642                          =adobe-japan1-3 ==adobe-japan1-3 ; =>>>adobe-japan1-3
643                          =adobe-japan1-4 ==adobe-japan1-4 ; =>>>adobe-japan1-4
644                          =adobe-japan1-5 ==adobe-japan1-5 ; =>>>adobe-japan1-5
645                          =adobe-japan1-6 ==adobe-japan1-6 ; =>>>adobe-japan1-6
646                          =>>adobe-japan1-0 =+>adobe-japan1-0
647                          =>>adobe-japan1-1 =+>adobe-japan1-1
648                          =>>adobe-japan1-2 =+>adobe-japan1-2
649                          =>>adobe-japan1-3 =+>adobe-japan1-3
650                          =>>adobe-japan1-4 =+>adobe-japan1-4
651                          =>>adobe-japan1-5 =+>adobe-japan1-5
652                          =>>adobe-japan1-6 =+>adobe-japan1-6
653                          =cbeta =>>cbeta
654                          =zinbun-oracle =>zinbun-oracle))
655             ;; (string-match "^=adobe-" (symbol-name name))
656             )
657         "(%-18s . %05d)\t; %c")
658        ((memq name '(=hanyo-denshi/ks
659                      ==hanyo-denshi/ks ; =>>>hanyo-denshi/ks
660                      =>>hanyo-denshi/ks
661                      =koseki
662                      =mj ==mj =>>mj
663                      =zihai mojikyo))
664         "(%-18s . %06d)\t; %c")
665        ((>= (charset-dimension name) 2)
666         "(%-18s . #x%04X)\t; %c")
667        (t
668         "(%-18s . #x%02X)\t; %c"))
669       name
670       (if (= (charset-iso-graphic-plane name) 1)
671           (logior value
672                   (cond ((= (charset-dimension name) 1)
673                          #x80)
674                         ((= (charset-dimension name) 2)
675                          #x8080)
676                         ((= (charset-dimension name) 3)
677                          #x808080)
678                         (t 0)))
679         value)
680       (char-db-decode-isolated-char name value)))
681     (if (and (= (charset-chars name) 94)
682              (= (charset-dimension name) 2))
683         (insert (format " [%02d-%02d]"
684                         (- (lsh value -8) 32)
685                         (- (logand value 255) 32))))
686     )
687    (t
688     (insert (format "(%-18s . %s)" name value))
689     ))
690   (insert line-breaking))
691
692 (defun char-db-insert-relation-feature (char name value line-breaking
693                                              ccss readable)
694   (insert (format "(%-18s%s " name line-breaking))
695   (let ((lbs (concat "\n" (make-string (current-column) ?\ )))
696         separator cell sources required-features
697         ret)
698     (while (consp value)
699       (setq cell (car value))
700       (if (integerp cell)
701           (setq cell (decode-char '=ucs cell)))
702       (cond
703        ((eq name '->subsumptive)
704         (when (or (not (some (lambda (atr)
705                                (get-char-attribute cell atr))
706                              char-db-ignored-attributes))
707                   (some (lambda (ccs)
708                           (encode-char cell ccs 'defined-only))
709                         ccss))
710           (if separator
711               (insert lbs))
712           (let ((char-db-ignored-attributes
713                  (cons '<-subsumptive
714                        char-db-ignored-attributes)))
715             (insert-char-attributes cell readable nil nil 'for-sub-node))
716           (setq separator lbs))
717         )
718        ((characterp cell)
719         (setq sources
720               (get-char-attribute
721                char (intern (format "%s*sources" name))))
722         (setq required-features nil)
723         (dolist (source sources)
724           (cond
725            ((memq source '(JP
726                            JP/Jouyou
727                            shinjigen shinjigen@1ed shinjigen@rev))
728             (setq required-features
729                   (union required-features
730                          '(=jis-x0208
731                            =jis-x0208@1990
732                            =jis-x0213-1@2000
733                            =jis-x0213-1@2004
734                            =jis-x0213-2
735                            =jis-x0212
736                            =jis-x0208@1983
737                            =jis-x0208@1978
738                            =shinjigen
739                            =shinjigen@1ed
740                            =shinjigen@rev
741                            =shinjigen/+p@rev))))
742            ((eq source 'CN)
743             (setq required-features
744                   (union required-features
745                          '(=gb2312
746                            =gb12345
747                            =iso-ir165)))))
748           (cond
749            ((find-charset (setq ret (intern (format "=%s" source))))
750             (setq required-features
751                   (cons ret required-features)))
752            (t (setq required-features
753                     (cons source required-features)))))
754         (cond ((string-match "@JP" (symbol-name name))
755                (setq required-features
756                      (union required-features
757                             '(=jis-x0208
758                               =jis-x0208@1990
759                               =jis-x0213-1-2000
760                               =jis-x0213-2-2000
761                               =jis-x0212
762                               =jis-x0208@1983
763                               =jis-x0208@1978))))
764               ((string-match "@CN" (symbol-name name))
765                (setq required-features
766                      (union required-features
767                             '(=gb2312
768                               =gb12345
769                               =iso-ir165)))))
770         (if separator
771             (insert lbs))
772         (if readable
773             (insert (format "%S" cell))
774           (char-db-insert-char-spec cell readable
775                                     nil
776                                     required-features))
777         (setq separator lbs))
778        ((consp cell)
779         (if separator
780             (insert lbs))
781         (if (consp (car cell))
782             (char-db-insert-char-spec cell readable)
783           (char-db-insert-char-reference cell readable))
784         (setq separator lbs))
785        (t
786         (if separator
787             (insert separator))
788         (insert (prin1-to-string cell))
789         (setq separator " ")))
790       (setq value (cdr value)))
791     (insert ")")
792     (insert line-breaking)))
793
794 (defun insert-char-attributes (char &optional readable attributes column
795                                     for-sub-node)
796   (unless column
797     (setq column (current-column)))
798   (let (name value ; has-long-ccs-name
799         rest
800         radical strokes
801         (line-breaking
802          (concat "\n" (make-string (1+ column) ?\ )))
803         lbs cell separator ret
804         key al cal
805         dest-ccss ; sources required-features
806         ccss)
807     (let (atr-d)
808       (setq attributes
809             (sort (if attributes
810                       (if (consp attributes)
811                           (progn
812                             (dolist (name attributes)
813                               (unless (memq name char-db-ignored-attributes)
814                                 (if (find-charset name)
815                                     (push name ccss))
816                                 (push name atr-d)))
817                             atr-d))
818                     (dolist (name (char-attribute-list))
819                       (unless (memq name char-db-ignored-attributes)
820                         (if (find-charset name)
821                             (push name ccss))
822                         (push name atr-d)))
823                     atr-d)
824                   #'char-attribute-name<)))
825     (insert "(")
826     (when (memq '<-subsumptive attributes)
827       (when (or readable (not for-sub-node))
828         (when (setq value (get-char-attribute char '<-subsumptive))
829           (char-db-insert-relation-feature char '<-subsumptive value
830                                            line-breaking
831                                            ccss readable)))
832       (setq attributes (delq '<-subsumptive attributes)))
833     (when (and (memq '<-denotational attributes)
834                (setq value (get-char-attribute char '<-denotational)))
835       (char-db-insert-relation-feature char '<-denotational value
836                                        line-breaking
837                                        ccss readable)
838       (setq attributes (delq '<-denotational attributes)))
839     (when (and (memq 'name attributes)
840                (setq value (get-char-attribute char 'name)))
841       (insert (format
842                (if (> (+ (current-column) (length value)) 48)
843                    "(name . %S)%s"
844                  "(name               . %S)%s")
845                value line-breaking))
846       (setq attributes (delq 'name attributes))
847       )
848     (when (and (memq 'name* attributes)
849                (setq value (get-char-attribute char 'name*)))
850       (insert (format
851                (if (> (+ (current-column) (length value)) 48)
852                    "(name* . %S)%s"
853                  "(name*              . %S)%s")
854                value line-breaking))
855       (setq attributes (delq 'name* attributes))
856       )
857     (when (and (memq 'script attributes)
858                (setq value (get-char-attribute char 'script)))
859       (insert (format "(script\t\t%s)%s"
860                       (mapconcat (function prin1-to-string)
861                                  value " ")
862                       line-breaking))
863       (setq attributes (delq 'script attributes))
864       )
865     (dolist (name '(=>ucs =>ucs*))
866       (when (and (memq name attributes)
867                  (setq value (get-char-attribute char name)))
868         (insert (format "(%-18s . #x%04X)\t; %c%s"
869                         name value (decode-char '=ucs value)
870                         line-breaking))
871         (setq attributes (delq name attributes))))
872     (dolist (name '(=>ucs@gb =>ucs@big5))
873       (when (and (memq name attributes)
874                  (setq value (get-char-attribute char name)))
875         (insert (format "(%-18s . #x%04X)\t; %c%s"
876                         name value
877                         (decode-char (intern
878                                       (concat "="
879                                               (substring
880                                                (symbol-name name) 2)))
881                                      value)
882                         line-breaking))
883         (setq attributes (delq name attributes))
884         ))
885     ;; (dolist (name '(=>daikanwa))
886     ;;   (when (and (memq name attributes)
887     ;;              (setq value (get-char-attribute char name)))
888     ;;     (insert
889     ;;      (if (integerp value)
890     ;;          (format "(%-18s . %05d)\t; %c%s"
891     ;;                  name value (decode-char '=daikanwa value)
892     ;;                  line-breaking)
893     ;;        (format "(%-18s %s)\t; %c%s"
894     ;;                name
895     ;;                (mapconcat (function prin1-to-string)
896     ;;                           value " ")
897     ;;                (char-representative-of-daikanwa char)
898     ;;                line-breaking)))
899     ;;     (setq attributes (delq name attributes))))
900     (when (and (memq 'general-category attributes)
901                (setq value (get-char-attribute char 'general-category)))
902       (insert (format
903                "(general-category\t%s) ; %s%s"
904                (mapconcat (lambda (cell)
905                             (format "%S" cell))
906                           value " ")
907                (cond ((rassoc value unidata-normative-category-alist)
908                       "Normative Category")
909                      ((rassoc value unidata-informative-category-alist)
910                       "Informative Category")
911                      (t
912                       "Unknown Category"))
913                line-breaking))
914       (setq attributes (delq 'general-category attributes))
915       )
916     (when (and (memq 'bidi-category attributes)
917                (setq value (get-char-attribute char 'bidi-category)))
918       (insert (format "(bidi-category\t. %S)%s"
919                       value
920                       line-breaking))
921       (setq attributes (delq 'bidi-category attributes))
922       )
923     (unless (or (not (memq 'mirrored attributes))
924                 (eq (setq value (get-char-attribute char 'mirrored 'empty))
925                     'empty))
926       (insert (format "(mirrored\t\t. %S)%s"
927                       value
928                       line-breaking))
929       (setq attributes (delq 'mirrored attributes))
930       )
931     (cond
932      ((and (memq 'decimal-digit-value attributes)
933            (setq value (get-char-attribute char 'decimal-digit-value)))
934       (insert (format "(decimal-digit-value . %S)%s"
935                       value
936                       line-breaking))
937       (setq attributes (delq 'decimal-digit-value attributes))
938       (when (and (memq 'digit-value attributes)
939                  (setq value (get-char-attribute char 'digit-value)))
940         (insert (format "(digit-value\t . %S)%s"
941                         value
942                         line-breaking))
943         (setq attributes (delq 'digit-value attributes))
944         )
945       (when (and (memq 'numeric-value attributes)
946                  (setq value (get-char-attribute char 'numeric-value)))
947         (insert (format "(numeric-value\t . %S)%s"
948                         value
949                         line-breaking))
950         (setq attributes (delq 'numeric-value attributes))
951         )
952       )
953      (t
954       (when (and (memq 'digit-value attributes)
955                  (setq value (get-char-attribute char 'digit-value)))
956         (insert (format "(digit-value\t. %S)%s"
957                         value
958                         line-breaking))
959         (setq attributes (delq 'digit-value attributes))
960         )
961       (when (and (memq 'numeric-value attributes)
962                  (setq value (get-char-attribute char 'numeric-value)))
963         (insert (format "(numeric-value\t. %S)%s"
964                         value
965                         line-breaking))
966         (setq attributes (delq 'numeric-value attributes))
967         )))
968     (when (and (memq 'iso-10646-comment attributes)
969                (setq value (get-char-attribute char 'iso-10646-comment)))
970       (insert (format "(iso-10646-comment\t. %S)%s"
971                       value
972                       line-breaking))
973       (setq attributes (delq 'iso-10646-comment attributes))
974       )
975     (when (and (memq 'morohashi-daikanwa attributes)
976                (setq value (get-char-attribute char 'morohashi-daikanwa)))
977       (insert (format "(morohashi-daikanwa\t%s)%s"
978                       (mapconcat (function prin1-to-string) value " ")
979                       line-breaking))
980       (setq attributes (delq 'morohashi-daikanwa attributes))
981       )
982     (setq radical nil
983           strokes nil)
984     (when (and (memq 'ideographic-radical attributes)
985                (setq value (get-char-attribute char 'ideographic-radical)))
986       (setq radical value)
987       (insert (format "(ideographic-radical . %S)\t; %c%s"
988                       radical
989                       (ideographic-radical radical)
990                       line-breaking))
991       (setq attributes (delq 'ideographic-radical attributes))
992       )
993     (when (and (memq 'shuowen-radical attributes)
994                (setq value (get-char-attribute char 'shuowen-radical)))
995       (insert (format "(shuowen-radical\t. %S)\t; %c%s"
996                       value
997                       (shuowen-radical value)
998                       line-breaking))
999       (setq attributes (delq 'shuowen-radical attributes))
1000       )
1001     (let (key)
1002       (dolist (domain
1003                (append
1004                 char-db-feature-domains
1005                 (let (dest domain)
1006                   (dolist (feature (char-attribute-list))
1007                     (setq feature (symbol-name feature))
1008                     (when (string-match
1009                            "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
1010                            feature)
1011                       (setq domain (intern (match-string 2 feature)))
1012                      (unless (memq domain dest)
1013                        (setq dest (cons domain dest)))))
1014                   (sort dest #'string<))))
1015         (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
1016         (when (and (memq key attributes)
1017                    (setq value (get-char-attribute char key)))
1018           (setq radical value)
1019           (insert (format "(%s . %S)\t; %c%s"
1020                           key
1021                           radical
1022                           (ideographic-radical radical)
1023                           line-breaking))
1024           (setq attributes (delq key attributes))
1025           )
1026         (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
1027         (when (and (memq key attributes)
1028                    (setq value (get-char-attribute char key)))
1029           (setq strokes value)
1030           (insert (format "(%s . %S)%s"
1031                           key
1032                           strokes
1033                           line-breaking))
1034           (setq attributes (delq key attributes))
1035           )
1036         (setq key (intern (format "%s@%s" 'total-strokes domain)))
1037         (when (and (memq key attributes)
1038                    (setq value (get-char-attribute char key)))
1039           (insert (format "(%s       . %S)%s"
1040                           key
1041                           value
1042                           line-breaking))
1043           (setq attributes (delq key attributes))
1044           )
1045         (dolist (feature '(ideographic-radical
1046                            ideographic-strokes
1047                            total-strokes))
1048           (setq key (intern (format "%s@%s*sources" feature domain)))
1049           (when (and (memq key attributes)
1050                      (setq value (get-char-attribute char key)))
1051             (insert (format "(%s%s" key line-breaking))
1052             (dolist (cell value)
1053               (insert (format " %s" cell)))
1054             (insert ")")
1055             (insert line-breaking)
1056             (setq attributes (delq key attributes))
1057             ))
1058         ))
1059     (when (and (memq 'ideographic-strokes attributes)
1060                (setq value (get-char-attribute char 'ideographic-strokes)))
1061       (setq strokes value)
1062       (insert (format "(ideographic-strokes . %S)%s"
1063                       strokes
1064                       line-breaking))
1065       (setq attributes (delq 'ideographic-strokes attributes))
1066       )
1067     (when (and (memq 'kangxi-radical attributes)
1068                (setq value (get-char-attribute char 'kangxi-radical)))
1069       (unless (eq value radical)
1070         (insert (format "(kangxi-radical\t . %S)\t; %c%s"
1071                         value
1072                         (ideographic-radical value)
1073                         line-breaking))
1074         (or radical
1075             (setq radical value)))
1076       (setq attributes (delq 'kangxi-radical attributes))
1077       )
1078     (when (and (memq 'kangxi-strokes attributes)
1079                (setq value (get-char-attribute char 'kangxi-strokes)))
1080       (unless (eq value strokes)
1081         (insert (format "(kangxi-strokes\t . %S)%s"
1082                         value
1083                         line-breaking))
1084         (or strokes
1085             (setq strokes value)))
1086       (setq attributes (delq 'kangxi-strokes attributes))
1087       )
1088     (when (and (memq 'japanese-radical attributes)
1089                (setq value (get-char-attribute char 'japanese-radical)))
1090       (unless (eq value radical)
1091         (insert (format "(japanese-radical\t . %S)\t; %c%s"
1092                         value
1093                         (ideographic-radical value)
1094                         line-breaking))
1095         (or radical
1096             (setq radical value)))
1097       (setq attributes (delq 'japanese-radical attributes))
1098       )
1099     (when (and (memq 'japanese-strokes attributes)
1100                (setq value (get-char-attribute char 'japanese-strokes)))
1101       (unless (eq value strokes)
1102         (insert (format "(japanese-strokes\t . %S)%s"
1103                         value
1104                         line-breaking))
1105         (or strokes
1106             (setq strokes value)))
1107       (setq attributes (delq 'japanese-strokes attributes))
1108       )
1109     (when (and (memq 'cns-radical attributes)
1110                (setq value (get-char-attribute char 'cns-radical)))
1111       (insert (format "(cns-radical\t . %S)\t; %c%s"
1112                       value
1113                       (ideographic-radical value)
1114                       line-breaking))
1115       (setq attributes (delq 'cns-radical attributes))
1116       )
1117     (when (and (memq 'cns-strokes attributes)
1118                (setq value (get-char-attribute char 'cns-strokes)))
1119       (unless (eq value strokes)
1120         (insert (format "(cns-strokes\t . %S)%s"
1121                         value
1122                         line-breaking))
1123         (or strokes
1124             (setq strokes value)))
1125       (setq attributes (delq 'cns-strokes attributes))
1126       )
1127     ;; (when (and (memq 'shinjigen-1-radical attributes)
1128     ;;            (setq value (get-char-attribute char 'shinjigen-1-radical)))
1129     ;;   (unless (eq value radical)
1130     ;;     (insert (format "(shinjigen-1-radical . %S)\t; %c%s"
1131     ;;                     value
1132     ;;                     (ideographic-radical value)
1133     ;;                     line-breaking))
1134     ;;     (or radical
1135     ;;         (setq radical value)))
1136     ;;   (setq attributes (delq 'shinjigen-1-radical attributes))
1137     ;;   )
1138     (when (and (memq 'ideographic- attributes)
1139                (setq value (get-char-attribute char 'ideographic-)))
1140       (insert "(ideographic-       ")
1141       (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1142             separator nil)
1143       (while (consp value)
1144         (setq cell (car value))
1145         (if (integerp cell)
1146             (setq cell (decode-char '=ucs cell)))
1147         (cond ((characterp cell)
1148                (if separator
1149                    (insert lbs))
1150                (if readable
1151                    (insert (format "%S" cell))
1152                  (char-db-insert-char-spec cell readable))
1153                (setq separator lbs))
1154               ((consp cell)
1155                (if separator
1156                    (insert lbs))
1157                (if (consp (car cell))
1158                    (char-db-insert-char-spec cell readable)
1159                  (char-db-insert-char-reference cell readable))
1160                (setq separator lbs))
1161               (t
1162                (if separator
1163                    (insert separator))
1164                (insert (prin1-to-string cell))
1165                (setq separator " ")))
1166         (setq value (cdr value)))
1167       (insert ")")
1168       (insert line-breaking)
1169       (setq attributes (delq 'ideographic- attributes)))
1170     (when (and (memq 'total-strokes attributes)
1171                (setq value (get-char-attribute char 'total-strokes)))
1172       (insert (format "(total-strokes       . %S)%s"
1173                       value
1174                       line-breaking))
1175       (setq attributes (delq 'total-strokes attributes))
1176       )
1177     (when (and (memq '->ideograph attributes)
1178                (setq value (get-char-attribute char '->ideograph)))
1179       (insert (format "(->ideograph\t%s)%s"
1180                       (mapconcat (lambda (code)
1181                                    (cond ((symbolp code)
1182                                           (symbol-name code))
1183                                          ((integerp code)
1184                                           (format "#x%04X" code))
1185                                          (t
1186                                           (format "%s %S"
1187                                                   line-breaking code))))
1188                                  value " ")
1189                       line-breaking))
1190       (setq attributes (delq '->ideograph attributes))
1191       )
1192     ;; (when (and (memq '->decomposition attributes)
1193     ;;            (setq value (get-char-attribute char '->decomposition)))
1194     ;;   (insert (format "(->decomposition\t%s)%s"
1195     ;;                   (mapconcat (lambda (code)
1196     ;;                                (cond ((symbolp code)
1197     ;;                                       (symbol-name code))
1198     ;;                                      ((characterp code)
1199     ;;                                       (if readable
1200     ;;                                           (format "%S" code)
1201     ;;                                         (format "#x%04X"
1202     ;;                                                 (char-int code))
1203     ;;                                         ))
1204     ;;                                      ((integerp code)
1205     ;;                                       (format "#x%04X" code))
1206     ;;                                      (t
1207     ;;                                       (format "%s%S" line-breaking code))))
1208     ;;                              value " ")
1209     ;;                   line-breaking))
1210     ;;   (setq attributes (delq '->decomposition attributes))
1211     ;;   )
1212     (if (equal (get-char-attribute char '->titlecase)
1213                (get-char-attribute char '->uppercase))
1214         (setq attributes (delq '->titlecase attributes)))
1215     (when (and (memq '->mojikyo attributes)
1216                (setq value (get-char-attribute char '->mojikyo)))
1217       (insert (format "(->mojikyo\t\t. %06d)\t; %c%s"
1218                       value (decode-char 'mojikyo value)
1219                       line-breaking))
1220       (setq attributes (delq '->mojikyo attributes))
1221       )
1222     (when (and (memq 'hanyu-dazidian-vol attributes)
1223                (setq value (get-char-attribute char 'hanyu-dazidian-vol)))
1224       (insert (format "(hanyu-dazidian-vol  . %d)%s"
1225                       value line-breaking))
1226       (setq attributes (delq 'hanyu-dazidian-vol attributes))
1227       )
1228     (when (and (memq 'hanyu-dazidian-page attributes)
1229                (setq value (get-char-attribute char 'hanyu-dazidian-page)))
1230       (insert (format "(hanyu-dazidian-page . %d)%s"
1231                       value line-breaking))
1232       (setq attributes (delq 'hanyu-dazidian-page attributes))
1233       )
1234     (when (and (memq 'hanyu-dazidian-char attributes)
1235                (setq value (get-char-attribute char 'hanyu-dazidian-char)))
1236       (insert (format "(hanyu-dazidian-char . %d)%s"
1237                       value line-breaking))
1238       (setq attributes (delq 'hanyu-dazidian-char attributes))
1239       )
1240     (unless readable
1241       (dolist (ignored '(composition
1242                          ->denotational <-subsumptive ->ucs-unified
1243                          ->ideographic-component-forms))
1244         (setq attributes (delq ignored attributes))))
1245     (while attributes
1246       (setq name (car attributes))
1247       (unless (eq (setq value (get-char-attribute char name 'value-is-empty))
1248                   'value-is-empty)
1249         (cond ((setq ret (find-charset name))
1250                (setq name (charset-name ret))
1251                (when (not (memq name dest-ccss))
1252                  (setq dest-ccss (cons name dest-ccss))
1253                  (char-db-insert-ccs-feature name value line-breaking))
1254                )
1255               ((string-match "^=>ucs@" (symbol-name name))
1256                (insert (format "(%-18s . #x%04X)\t; %c%s"
1257                                name value (decode-char '=ucs value)
1258                                line-breaking))
1259                )
1260               ((eq name 'jisx0208-1978/4X)
1261                (insert (format "(%-18s . #x%04X)%s"
1262                                name value
1263                                line-breaking))
1264                )
1265               ((and
1266                 (not readable)
1267                 (not (eq name '->subsumptive))
1268                 (not (eq name '->uppercase))
1269                 (not (eq name '->lowercase))
1270                 (not (eq name '->titlecase))
1271                 (not (eq name '->canonical))
1272                 (not (eq name '->Bopomofo))
1273                 (not (eq name '->mistakable))
1274                 (not (eq name '->ideographic-variants))
1275                 (null (get-char-attribute
1276                        char (intern (format "%s*sources" name))))
1277                 (not (string-match "\\*sources$" (symbol-name name)))
1278                 (null (get-char-attribute
1279                        char (intern (format "%s*note" name))))
1280                 (not (string-match "\\*note$" (symbol-name name)))
1281                 (or (eq name '<-identical)
1282                     (eq name '<-uppercase)
1283                     (eq name '<-lowercase)
1284                     (eq name '<-titlecase)
1285                     (eq name '<-canonical)
1286                     (eq name '<-ideographic-variants)
1287                     ;; (eq name '<-synonyms)
1288                     (string-match "^<-synonyms" (symbol-name name))
1289                     (eq name '<-mistakable)
1290                     (when (string-match "^->" (symbol-name name))
1291                       (cond
1292                        ((string-match "^->fullwidth" (symbol-name name))
1293                         (not (and (consp value)
1294                                   (characterp (car value))
1295                                   (encode-char
1296                                    (car value) '=ucs 'defined-only)))
1297                         )
1298                        (t)))
1299                     ))
1300                )
1301               ((or (eq name 'ideographic-structure)
1302                    (eq name 'ideographic-combination)
1303                    (eq name 'ideographic-)
1304                    (eq name '=decomposition)
1305                    (char-feature-base-name= '=decomposition name)
1306                    (char-feature-base-name= '=>decomposition name)
1307                    ;; (string-match "^=>*decomposition\\(@[^*]+\\)?$"
1308                    ;;               (symbol-name name))
1309                    (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name name))
1310                    (string-match "^\\(->\\|<-\\)[^*]*\\*sources$"
1311                                  (symbol-name name))
1312                    )
1313                (char-db-insert-relation-feature char name value
1314                                                 line-breaking
1315                                                 ccss readable))
1316               ((memq name '(ideograph=
1317                             original-ideograph-of
1318                             ancient-ideograph-of
1319                             vulgar-ideograph-of
1320                             wrong-ideograph-of
1321                             ;; simplified-ideograph-of
1322                             ideographic-variants
1323                             ;; ideographic-different-form-of
1324                             ))
1325                (insert (format "(%-18s%s " name line-breaking))
1326                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1327                      separator nil)
1328                (while (consp value)
1329                  (setq cell (car value))
1330                  (if (and (consp cell)
1331                           (consp (car cell)))
1332                      (progn
1333                        (if separator
1334                            (insert lbs))
1335                        (char-db-insert-alist cell readable)
1336                        (setq separator lbs))
1337                    (if separator
1338                        (insert separator))
1339                    (insert (prin1-to-string cell))
1340                    (setq separator " "))
1341                  (setq value (cdr value)))
1342                (insert ")")
1343                (insert line-breaking))
1344               ((consp value)
1345                (insert (format "(%-18s " name))
1346                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1347                      separator nil)
1348                (while (consp value)
1349                  (setq cell (car value))
1350                  (if (and (consp cell)
1351                           (consp (car cell))
1352                           (setq ret (condition-case nil
1353                                         (find-char cell)
1354                                       (error nil))))
1355                      (progn
1356                        (setq rest cell
1357                              al nil
1358                              cal nil)
1359                        (while rest
1360                          (setq key (car (car rest)))
1361                          (if (find-charset key)
1362                              (setq cal (cons key cal))
1363                            (setq al (cons key al)))
1364                          (setq rest (cdr rest)))
1365                        (if separator
1366                            (insert lbs))
1367                        (insert-char-attributes ret
1368                                                readable
1369                                                al ; cal
1370                                                nil 'for-sub-node)
1371                        (setq separator lbs))
1372                    (setq ret (prin1-to-string cell))
1373                    (if separator
1374                        (if (< (+ (current-column)
1375                                  (length ret)
1376                                  (length separator))
1377                               76)
1378                            (insert separator)
1379                          (insert lbs)))
1380                    (insert ret)
1381                    (setq separator " "))
1382                  (setq value (cdr value)))
1383                (insert ")")
1384                (insert line-breaking))
1385               (t
1386                (insert (format "(%-18s" name))
1387                (setq ret (prin1-to-string value))
1388                (unless (< (+ (current-column)
1389                              (length ret)
1390                              3)
1391                           76)
1392                  (insert line-breaking))
1393                (insert " . " ret ")" line-breaking)
1394                ;; (insert (format "(%-18s . %S)%s"
1395                ;;                 name value
1396                ;;                 line-breaking))
1397                )
1398               ))
1399       (setq attributes (cdr attributes)))
1400     (insert ")")))
1401
1402 (defun insert-char-data (char &optional readable
1403                               attributes)
1404   (save-restriction
1405     (narrow-to-region (point)(point))
1406     (insert "(define-char
1407   '")
1408     (insert-char-attributes char readable attributes)
1409     (insert ")\n")
1410     (goto-char (point-min))
1411     (while (re-search-forward "[ \t]+$" nil t)
1412       (replace-match ""))
1413     ;; from tabify.
1414     (goto-char (point-min))
1415     (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
1416       (let ((column (current-column))
1417             (indent-tabs-mode t))
1418         (delete-region (match-beginning 0) (point))
1419         (indent-to column)))
1420     (goto-char (point-max))
1421     ;; (tabify (point-min)(point-max))
1422     ))
1423
1424 (defun insert-char-data-with-variant (char &optional printable
1425                                            no-ucs-unified
1426                                            script excluded-script)
1427   (insert-char-data char printable)
1428   (let ((variants (char-variants char))
1429         rest
1430         variant vs ret)
1431     (setq variants (sort variants #'<))
1432     (setq rest variants)
1433     (setq variants (cons char variants))
1434     (while rest
1435       (setq variant (car rest))
1436       (unless (get-char-attribute variant '<-subsumptive)
1437         (if (and (or (null script)
1438                      (null (setq vs (get-char-attribute variant 'script)))
1439                      (memq script vs))
1440                  (or (null excluded-script)
1441                      (null (setq vs (get-char-attribute variant 'script)))
1442                      (not (memq excluded-script vs))))
1443             (unless (and no-ucs-unified (get-char-attribute variant '=ucs))
1444               (insert-char-data variant printable)
1445               (if (setq ret (char-variants variant))
1446                   (while ret
1447                     (or (memq (car ret) variants)
1448                         ;; (get-char-attribute (car ret) '<-subsumptive)
1449                         (setq rest (nconc rest (list (car ret)))))
1450                     (setq ret (cdr ret)))))))
1451       (setq rest (cdr rest)))))
1452
1453 (defun insert-char-range-data (min max &optional script excluded-script)
1454   (let ((code min)
1455         char)
1456     (while (<= code max)
1457       (setq char (decode-char '=ucs code))
1458       (if (encode-char char '=ucs 'defined-only)
1459           (insert-char-data-with-variant char nil 'no-ucs-unified
1460                                          script excluded-script))
1461       (setq code (1+ code)))))
1462
1463 (defun write-char-range-data-to-file (min max file
1464                                           &optional script excluded-script)
1465   (let ((coding-system-for-write char-db-file-coding-system))
1466     (with-temp-buffer
1467       (insert (format ";; -*- coding: %s -*-\n"
1468                       char-db-file-coding-system))
1469       (insert-char-range-data min max script excluded-script)
1470       (write-region (point-min)(point-max) file))))
1471
1472 (defvar what-character-original-window-configuration)
1473
1474 ;;;###autoload
1475 (defun what-char-definition (char)
1476   (interactive (list (char-after)))
1477   (let ((buf (get-buffer-create "*Character Description*"))
1478         (the-buf (current-buffer))
1479         (win-conf (current-window-configuration)))
1480     (pop-to-buffer buf)
1481     (make-local-variable 'what-character-original-window-configuration)
1482     (setq what-character-original-window-configuration win-conf)
1483     (setq buffer-read-only nil)
1484     (erase-buffer)
1485     (condition-case err
1486         (progn
1487           (insert-char-data-with-variant char 'printable)
1488           (unless (char-attribute-alist char)
1489             (insert (format ";; = %c\n"
1490                             (let* ((rest (split-char char))
1491                                    (ccs (pop rest))
1492                                    (code (pop rest)))
1493                               (while rest
1494                                 (setq code (logior (lsh code 8)
1495                                                    (pop rest))))
1496                               (decode-char ccs code)))))
1497           ;; (char-db-update-comment)
1498           (set-buffer-modified-p nil)
1499           (view-mode the-buf (lambda (buf)
1500                                (set-window-configuration
1501                                 what-character-original-window-configuration)
1502                                ))
1503           (goto-char (point-min)))
1504       (error (progn
1505                (set-window-configuration
1506                 what-character-original-window-configuration)
1507                (signal (car err) (cdr err)))))))
1508
1509
1510 ;;; @ end
1511 ;;;
1512
1513 (provide 'char-db-util)
1514
1515 ;;; char-db-util.el ends here