(shuowen-radicals): Add ?儿, ?兄, ?兂, ?皃, ?𠑹, ?先, ?秃, ?見, ?覞
[chise/xemacs-chise.git.1] / lisp / utf-2000 / char-db-util.el
1 ;;; char-db-util.el --- Character Database utility -*- coding: utf-8-er; -*-
2
3 ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
4 ;;   2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 MORIOKA Tomohiko.
5
6 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
7 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
8
9 ;; This file is part of XEmacs CHISE.
10
11 ;; XEmacs CHISE is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
15
16 ;; XEmacs CHISE is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
23 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Code:
27
28 (require 'chise-subr)
29 (require 'ideograph-subr)
30
31 (defconst unidata-normative-category-alist
32   '(("Lu" letter        uppercase)
33     ("Ll" letter        lowercase)
34     ("Lt" letter        titlecase)
35     ("Mn" mark          non-spacing)
36     ("Mc" mark          spacing-combining)
37     ("Me" mark          enclosing)
38     ("Nd" number        decimal-digit)
39     ("Nl" number        letter)
40     ("No" number        other)
41     ("Zs" separator     space)
42     ("Zl" separator     line)
43     ("Zp" separator     paragraph)
44     ("Cc" other         control)
45     ("Cf" other         format)
46     ("Cs" other         surrogate)
47     ("Co" other         private-use)
48     ("Cn" other         not-assigned)))
49
50 (defconst unidata-informative-category-alist
51   '(("Lm" letter        modifier)
52     ("Lo" letter        other)
53     ("Pc" punctuation   connector)
54     ("Pd" punctuation   dash)
55     ("Ps" punctuation   open)
56     ("Pe" punctuation   close)
57     ("Pi" punctuation   initial-quote)
58     ("Pf" punctuation   final-quote)
59     ("Po" punctuation   other)
60     ("Sm" symbol        math)
61     ("Sc" symbol        currency)
62     ("Sk" symbol        modifier)
63     ("So" symbol        other)
64     ))
65
66 (defconst shuowen-radicals
67   [?一 ?上 ?示 ?三 ?王 ?玉 ?玨 ?气 ?士 ?丨 ; 010
68    ?屮 ?艸 ?蓐 ?茻 ?小 ?八 ?釆 ?半 ?牛 ?犛 ; 020
69    ?告 ?口 ?凵 ?吅 ?哭 ?走 ?止 ?癶 ?步 ?此 ; 030
70    ?正 ?是 ?辵 ?彳 ?廴 ?㢟 ?行 ?齒 ?牙 ?足 ; 040
71    ?疋 ?品 ?龠 ?冊 ?㗊 ?舌 ?干 ?谷 ?只 ?㕯 ; 050
72    ?句 ?丩 ?古 ?十 ?卅 ?言 ?誩 ?音 ?䇂 ?丵 ; 060
73    ?菐 ?𠬞 ?𠬜 ?共 ?異 ?舁 ?𦥑 ?䢅 ?爨 ?革 ; 070
74    ?鬲 ?䰜 ?爪 ?𠃨 ?鬥 ?又 ?𠂇 ?㕜 ?支 ?𦘒 ; 080
75    ?聿 ?畫 ?隶 ?臤 ?臣 ?殳 ?殺 ?𠘧 ?寸 ?皮 ; 090
76    ?㼱 ?攴 ?敎 ?卜 ?用 ?爻 ?㸚 ?𥄎 ?目 ?䀠 ; 100
77    ?眉 ?盾 ?自 ?白 ?鼻 ?皕 ?習 ?羽 ?隹 ?奞 ; 110
78    ?萑 ?𦫳 ?苜 ?羊 ?羴 ?瞿 ?雔 ?雥 ?鳥 ?烏 ; 120
79    ?𠦒 ?冓 ?幺 ?𢆶 ?叀 ?玄 ?予 ?放 ?𠬪 ?𣦼 ; 130
80    ?歺 ?死 ?冎 ?骨 ?肉 ?筋 ?刀 ?刃 ?㓞 ?丰 ; 140
81    ?耒 ?𧢲 ?竹 ?箕 ?丌 ?左 ?工 ?㠭 ?巫 ?甘 ; 150
82    ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?旨 ?喜 ?壴 ; 160
83 ;  ?旨 ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?喜 ?壴 ; 160
84    ?鼓 ?豈 ?豆 ?豊 ?豐 ?䖒 ?虍 ?虎 ?虤 ?皿 ; 170
85    ?𠙴 ?去 ?血 ?丶 ?丹 ?青 ?井 ?皀 ?鬯 ?食 ; 180
86    ?亼 ?會 ?倉 ?入 ?缶 ?矢 ?高 ?冂 ?𩫏 ?京 ; 190
87    ?亯 ?𣆪 ?畗 ?㐭 ?嗇 ?來 ?麥 ?夊 ?舛 ?䑞 ; 200
88    ?韋 ?弟 ?夂 ?久 ?桀 ?木 ?東 ?林 ?才 ?叒 ; 210
89    ?之 ?帀 ?出 ?𣎵 ?生 ?乇 ?𠂹 ?𠌶 ?華 ?𥝌 ; 220
90    ?稽 ?巢 ?桼 ?束 ?㯻 ?囗 ?員 ?貝 ?邑 ?𨛜 ; 230
91    ?日 ?旦 ?倝 ?㫃 ?冥 ?晶 ?月 ?有 ?明 ?囧 ; 240
92    ?夕 ?多 ?毌 ?𢎘 ?𣐺 ?卣 ?齊 ?朿 ?片 ?鼎 ; 250
93    ?克 ?彔 ?禾 ?秝 ?黍 ?香 ?米 ?毇 ?臼 ?凶 ; 260
94    ?𣎳 ?林 ?麻 ?尗 ?耑 ?韭 ?瓜 ?瓠 ?宀 ?宮 ; 270
95    ?呂 ?穴 ?㝱 ?𤕫 ?冖 ?𠔼 ?冃 ?㒳 ?网 ?襾 ; 280
96    ?巾 ?巿 ?帛 ?白 ?㡀 ?黹 ?人 ?𠤎 ?匕 ?从 ; 290
97    ?比 ?北 ?丘 ?㐺 ?𡈼 ?重 ?臥 ?身 ?㐆 ?衣 ; 300
98    ?裘 ?老 ?毛 ?毳 ?尸 ?尺 ?尾 ?履 ?舟 ?方 ; 310
99    ?儿 ?兄 ?兂 ?皃 ?𠑹 ?先 ?秃 ?見 ?覞 ?欠 ; 320
100    ])
101
102 (defun shuowen-radical (number)
103   (aref shuowen-radicals (1- number)))
104
105 (defvar char-db-file-coding-system 'utf-8-mcs-er)
106
107 (defvar char-db-ignored-attributes '(ideographic-products))
108
109 (defvar char-db-coded-charset-priority-list
110   '(ascii
111     control-1
112     latin-iso8859-1
113     latin-iso8859-2
114     latin-iso8859-3
115     latin-iso8859-4
116     latin-iso8859-9
117     latin-jisx0201
118     cyrillic-iso8859-5
119     greek-iso8859-7
120     thai-tis620
121     ;; =mj
122     ;; =adobe-japan1-0
123     ;; =adobe-japan1-1
124     ;; =adobe-japan1-2
125     ;; =adobe-japan1-3
126     ;; =adobe-japan1-4
127     ;; =adobe-japan1-5
128     ;; =adobe-japan1-6
129     =jis-x0208
130     =jis-x0208@1978
131     =jis-x0208@1983
132     japanese-jisx0212
133     chinese-gb2312
134     =jis-x0208@1990
135     chinese-cns11643-1
136     chinese-cns11643-2
137     chinese-cns11643-3
138     chinese-cns11643-4
139     chinese-cns11643-5
140     chinese-cns11643-6
141     chinese-cns11643-7
142     =jis-x0213-1
143     =jis-x0213-1@2000
144     =jis-x0213-1@2004
145     =jis-x0213-2
146     korean-ksc5601
147     chinese-isoir165
148     katakana-jisx0201
149     hebrew-iso8859-8
150     chinese-gb12345
151     latin-viscii
152     ethiopic-ucs
153     =big5-cdp
154     =hanyo-denshi/ja
155     =hanyo-denshi/jb
156     =hanyo-denshi/jc
157     =hanyo-denshi/jd
158     =hanyo-denshi/ft
159     =hanyo-denshi/ia
160     =hanyo-denshi/ib
161     =hanyo-denshi/hg
162     =hanyo-denshi/jt
163     =hanyo-denshi/ks
164     ;; ==mj
165     ;; ==adobe-japan1-0
166     ;; ==adobe-japan1-1
167     ;; ==adobe-japan1-2
168     ;; ==adobe-japan1-3
169     ;; ==adobe-japan1-4
170     ;; ==adobe-japan1-5
171     ;; ==adobe-japan1-6
172     ==jis-x0208
173     ==jis-x0213-1
174     ==jis-x0213-2
175     ==hanyo-denshi/ja
176     ==hanyo-denshi/jb
177     ==hanyo-denshi/jc
178     ==hanyo-denshi/ft
179     ==hanyo-denshi/ib
180     ==hanyo-denshi/hg
181     ==hanyo-denshi/jt
182     ==hanyo-denshi/ks
183     =gt
184     =gt-k
185     =daikanwa
186     =daikanwa@rev2
187     =daikanwa@rev1
188     ==daikanwa
189     =cbeta
190     ideograph-hanziku-1
191     ideograph-hanziku-2
192     ideograph-hanziku-3
193     ideograph-hanziku-4
194     ideograph-hanziku-5
195     ideograph-hanziku-6
196     ideograph-hanziku-7
197     ideograph-hanziku-8
198     ideograph-hanziku-9
199     ideograph-hanziku-10
200     ideograph-hanziku-11
201     ideograph-hanziku-12
202     ;; =>>>adobe-japan1-0
203     ;; =>>>adobe-japan1-1
204     ;; =>>>adobe-japan1-2
205     ;; =>>>adobe-japan1-3
206     ;; =>>>adobe-japan1-4
207     ;; =>>>adobe-japan1-5
208     ;; =>>>adobe-japan1-6
209     ;; =>>>jis-x0208
210     ;; =>>>jis-x0213-1
211     ;; =>>>jis-x0213-2
212     ;; =>>>hanyo-denshi/ja
213     ;; =>>>hanyo-denshi/jb
214     ;; =>>>hanyo-denshi/jc
215     ;; =>>>hanyo-denshi/ft
216     ;; =>>>hanyo-denshi/ib
217     ;; =>>>hanyo-denshi/hg
218     ;; =>>>hanyo-denshi/jt
219     ;; =>>>hanyo-denshi/ks
220     ;; =>>>gt
221     =>>adobe-japan1-0
222     =>>adobe-japan1-1
223     =>>adobe-japan1-2
224     =>>adobe-japan1-3
225     =>>adobe-japan1-4
226     =>>adobe-japan1-5
227     =>>adobe-japan1-6
228     =>>jis-x0208
229     =>>jis-x0213-1
230     =>>jis-x0213-1@2000
231     =>>jis-x0213-1@2004
232     =>>jis-x0213-2
233     =>>jis-x0208@1978
234     =>>hanyo-denshi/ft
235     =>>hanyo-denshi/jt
236     =>>hanyo-denshi/ks
237     =>>gt
238     =>>daikanwa
239     =>>cbeta
240     =+>jis-x0208
241     =+>jis-x0213-1
242     =+>jis-x0213-2
243     =+>adobe-japan1-0
244     =+>adobe-japan1-1
245     =+>adobe-japan1-2
246     =+>adobe-japan1-3
247     =+>adobe-japan1-4
248     =+>adobe-japan1-5
249     =+>adobe-japan1-6
250     =+>jis-x0208@1978
251     =>jis-x0208
252     =>jis-x0208@1997
253     =>jis-x0213-1
254     =>jis-x0213-1@2000
255     =>jis-x0213-1@2004
256     =>jis-x0213-2
257     ==>ucs@bucs
258     =>ucs@hanyo-denshi
259     =>ucs@iso
260     =>ucs@unicode
261     =>ucs@jis
262     =>ucs@cns
263     =>ucs@ks
264     =+>ucs@iso
265     =+>ucs@unicode
266     =+>ucs@jis
267     =+>ucs@jis/1990
268     =+>ucs@cns
269     =+>ucs@ks
270     =>>ucs@iso
271     =>>ucs@unicode
272     =>>ucs@jis
273     =>>ucs@cns
274     =>>>ucs@iso
275     =>>>ucs@unicode
276     ==ucs@iso
277     ==ucs@unicode
278     ==gb2312
279     ==ks-x1001
280     ==cns11643-1
281     ==cns11643-2
282     ==cns11643-3
283     ==gt
284     ==jis-x0208@1990
285     ==gt-k
286     =ucs@iso
287     =ucs@unicode
288     =ucs@cns
289     =>>big5-cdp
290     =>>gt-k
291     =+>gt
292     =>gt
293     =>big5-cdp
294     =>daikanwa
295     =>daikanwa/ho
296     =>cns11643-7
297     =big5
298     =big5-eten
299     =>gt-k
300     =zinbun-oracle
301     =>zinbun-oracle
302     =ruimoku-v6
303     =>>ruimoku-v6
304     =jef-china3
305     =shinjigen
306     =big5-cdp-var-3
307     =big5-cdp-var-5))
308
309
310 ;;; @ char-db formatters
311 ;;;
312
313 (defun char-db-make-char-spec (char)
314   (let (ret char-spec)
315     (cond ((characterp char)
316            (cond ((and (setq ret (encode-char char '=ucs 'defined-only))
317                        (not (and (<= #xE000 ret)(<= ret #xF8FF))))
318                   (setq char-spec (list (cons '=ucs ret)))
319                   (cond ((setq ret (get-char-attribute char 'name))
320                          (setq char-spec (cons (cons 'name ret) char-spec))
321                          )
322                         ((setq ret (get-char-attribute char 'name*))
323                          (setq char-spec (cons (cons 'name* ret) char-spec))
324                          ))
325                   )
326                  ((encode-char char '=mj 'defined-only)
327                   (setq char-spec nil)
328                   (dolist (ccs (charset-list))
329                     (if (and (or (eq ccs '=mj)
330                                  ;; (eq (charset-property ccs 'iso-ir) 177)
331                                  (string-match "=ucs@" (symbol-name ccs))
332                                  )
333                              (setq ccs (charset-name ccs))
334                              (null (assq ccs char-spec))
335                              (setq ret (encode-char char ccs 'defined-only)))
336                         (setq char-spec (cons (cons ccs ret) char-spec))))
337                   )
338                  ((encode-char char '==mj 'defined-only)
339                   (setq char-spec nil)
340                   (dolist (ccs (charset-list))
341                     (if (and (or (eq ccs '==mj)
342                                  ;; (eq (charset-property ccs 'iso-ir) 177)
343                                  (string-match "=ucs@" (symbol-name ccs))
344                                  )
345                              (setq ccs (charset-name ccs))
346                              (null (assq ccs char-spec))
347                              (setq ret (encode-char char ccs 'defined-only)))
348                         (setq char-spec (cons (cons ccs ret) char-spec))))
349                   )
350                  ((encode-char char '=adobe-japan1 'defined-only)
351                   (setq char-spec nil)
352                   (dolist (ccs (charset-list))
353                     (if (and (or (memq ccs
354                                        '(=adobe-japan1-0
355                                          =adobe-japan1-1
356                                          =adobe-japan1-2
357                                          =adobe-japan1-3
358                                          =adobe-japan1-4
359                                          =adobe-japan1-5
360                                          =adobe-japan1-6
361                                          ))
362                                  ;; (eq (charset-property ccs 'iso-ir) 177)
363                                  (string-match "=ucs@" (symbol-name ccs))
364                                  )
365                              (setq ccs (charset-name ccs))
366                              (null (assq ccs char-spec))
367                              (setq ret (encode-char char ccs 'defined-only)))
368                         (setq char-spec (cons (cons ccs ret) char-spec))))
369                   )
370                  ((encode-char char '==adobe-japan1 'defined-only)
371                   (setq char-spec nil)
372                   (dolist (ccs (charset-list))
373                     (if (and (or (memq ccs
374                                        '(==adobe-japan1-0
375                                          ==adobe-japan1-1
376                                          ==adobe-japan1-2
377                                          ==adobe-japan1-3
378                                          ==adobe-japan1-4
379                                          ==adobe-japan1-5
380                                          ==adobe-japan1-6
381                                          ))
382                                  ;; (eq (charset-property ccs 'iso-ir) 177)
383                                  (string-match "=ucs@" (symbol-name ccs))
384                                  )
385                              (setq ccs (charset-name ccs))
386                              (null (assq ccs char-spec))
387                              (setq ret (encode-char char ccs 'defined-only)))
388                         (setq char-spec (cons (cons ccs ret) char-spec))))
389                   )
390                  ((setq ret
391                         (catch 'tag
392                           (let ((rest char-db-coded-charset-priority-list)
393                                 ccs)
394                             (while rest
395                               (setq ccs (charset-name
396                                          (find-charset (car rest))))
397                               (if (setq ret
398                                         (encode-char char ccs
399                                                      'defined-only))
400                                   (throw 'tag (cons ccs ret)))
401                               (setq rest (cdr rest))))))
402                   (setq char-spec (list ret))
403                   (dolist (ccs (delq (car ret) (charset-list)))
404                     (if (and (or (charset-iso-final-char ccs)
405                                  (memq ccs
406                                        '(=daikanwa
407                                          =daikanwa@rev2
408                                          ;; =gt-k
409                                          =jis-x0208@1997
410                                          ))
411                                  (eq (charset-property ccs 'iso-ir) 177)
412                                  ;; (string-match "=ucs@" (symbol-name ccs))
413                                  )
414                              (setq ccs (charset-name ccs))
415                              (null (assq ccs char-spec))
416                              (setq ret (encode-char char ccs 'defined-only)))
417                         (setq char-spec (cons (cons ccs ret) char-spec))))
418                   (if (null char-spec)
419                       (setq char-spec (split-char char)))
420                   (cond ((setq ret (get-char-attribute char 'name))
421                          (setq char-spec (cons (cons 'name ret) char-spec))
422                          )
423                         ((setq ret (get-char-attribute char 'name*))
424                          (setq char-spec (cons (cons 'name* ret) char-spec))
425                          ))
426                   )
427                  ((setq ret (get-char-attribute
428                              char 'ideographic-combination))
429                   (setq char-spec
430                         (cons (cons 'ideographic-combination ret)
431                               char-spec))
432                   ))
433            char-spec)
434           ((consp char)
435            char))))
436     
437 (defun char-db-insert-char-spec (char &optional readable column
438                                       required-features)
439   (unless column
440     (setq column (current-column)))
441   (let (char-spec temp-char)
442     (setq char-spec (char-db-make-char-spec char))
443     (unless (or (characterp char) ; char
444                 (condition-case nil
445                     (setq char (find-char char-spec))
446                   (error nil)))
447       ;; define temporary character
448       ;;   Current implementation is dirty.
449       (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
450                                          char-spec)))
451       (remove-char-attribute temp-char 'ideograph-daikanwa)
452       (setq char temp-char))
453     (insert-char-attributes char
454                             readable
455                             (union (mapcar #'car char-spec)
456                                    required-features)
457                             nil 'for-sub-node)
458     (when temp-char
459       ;; undefine temporary character
460       ;;   Current implementation is dirty.
461       (setq char-spec (char-attribute-alist temp-char))
462       (while char-spec
463         (remove-char-attribute temp-char (car (car char-spec)))
464         (setq char-spec (cdr char-spec))))))
465
466 (defun char-db-insert-alist (alist &optional readable column)
467   (unless column
468     (setq column (current-column)))
469   (let ((line-breaking
470          (concat "\n" (make-string (1+ column) ?\ )))
471         name value
472         ret al ; cal
473         key
474         lbs cell rest separator)
475     (insert "(")
476     (while alist
477       (setq name (car (car alist))
478             value (cdr (car alist)))
479       (cond ((eq name 'char)
480              (insert "(char . ")
481              (if (setq ret (condition-case nil
482                                (find-char value)
483                              (error nil)))
484                  (progn
485                    (setq al nil
486                          ;; cal nil
487                          )
488                    (while value
489                      (setq key (car (car value)))
490                      ;; (if (find-charset key)
491                      ;;     (setq cal (cons key cal))
492                      (setq al (cons key al))
493                      ;; )
494                      (setq value (cdr value)))
495                    (insert-char-attributes ret
496                                            readable
497                                            (or al 'none) ; cal
498                                            nil 'for-sub-node))
499                (insert (prin1-to-string value)))
500              (insert ")")
501              (insert line-breaking))
502             ((consp value)
503              (insert (format "(%-18s " name))
504              (setq lbs (concat "\n" (make-string (current-column) ?\ )))
505              (while (consp value)
506                (setq cell (car value))
507                (if (and (consp cell)
508                         (consp (car cell))
509                         (setq ret (condition-case nil
510                                       (find-char cell)
511                                     (error nil)))
512                         )
513                    (progn
514                      (setq rest cell
515                            al nil
516                            ;; cal nil
517                            )
518                      (while rest
519                        (setq key (car (car rest)))
520                        ;; (if (find-charset key)
521                        ;;     (setq cal (cons key cal))
522                        (setq al (cons key al))
523                        ;; )
524                        (setq rest (cdr rest)))
525                      (if separator
526                          (insert lbs))
527                      (insert-char-attributes ret
528                                              readable
529                                              al ; cal
530                                              nil 'for-sub-node)
531                      (setq separator lbs))
532                  (if separator
533                      (insert separator))
534                  (insert (prin1-to-string cell))
535                  (setq separator " "))
536                (setq value (cdr value)))
537              (insert ")")
538              (insert line-breaking))
539             (t
540              (insert (format "(%-18s . %S)%s"
541                              name value
542                              line-breaking))))
543       (setq alist (cdr alist))))
544   (insert ")"))
545
546 (defun char-db-insert-char-reference (plist &optional readable column)
547   (unless column
548     (setq column (current-column)))
549   (let ((line-breaking
550          (concat "\n" (make-string (1+ column) ?\ )))
551         (separator "")
552         name value)
553     (insert "(")
554     (while plist
555       (setq name (pop plist))
556       (setq value (pop plist))
557       (cond ((eq name :char)
558              (insert separator)
559              (insert ":char\t")
560              (cond ((numberp value)
561                     (setq value (decode-char '=ucs value)))
562                    ;; ((consp value)
563                    ;;  (setq value (or (find-char value)
564                    ;;                  value)))
565                    )
566              (char-db-insert-char-spec value readable)
567              (insert line-breaking)
568              (setq separator ""))
569             ((eq name :radical)
570              (insert (format "%s%s\t%d ; %c%s"
571                              separator
572                              name value
573                              (ideographic-radical value)
574                              line-breaking))
575              (setq separator ""))
576             (t
577              (insert (format "%s%s\t%S" separator name value))
578              (setq separator line-breaking)))
579       ))
580   (insert ")"))
581
582 (defun char-db-decode-isolated-char (ccs code-point)
583   (let (ret)
584     (setq ret
585           (cond ((eq ccs 'arabic-iso8859-6)
586                  (decode-char ccs code-point))
587                 ;; ((eq ccs '=gt)
588                 ;;  (decode-builtin-char '==gt code-point))
589                 ((and (memq ccs '(=gt-pj-1
590                                   =gt-pj-2
591                                   =gt-pj-3
592                                   =gt-pj-4
593                                   =gt-pj-5
594                                   =gt-pj-6
595                                   =gt-pj-7
596                                   =gt-pj-8
597                                   =gt-pj-9
598                                   =gt-pj-10
599                                   =gt-pj-11))
600                       (setq ret (decode-char ccs code-point))
601                       (setq ret (encode-char ret '=gt 'defined-only)))
602                  (decode-builtin-char '=gt ret))
603                 (t
604                  (decode-builtin-char ccs code-point))))
605     (cond ((and (<= 0 (char-int ret))
606                 (<= (char-int ret) #x1F))
607            (decode-char '=ucs (+ #x2400 (char-int ret))))
608           ((= (char-int ret) #x7F)
609            ?\u2421)
610           (t ret))))
611
612 (defvar char-db-convert-obsolete-format t)
613
614 (defun char-db-insert-ccs-feature (name value line-breaking)
615   (cond
616    ((integerp value)
617     (insert
618      (format
619       (cond
620        ((memq name '(==shinjigen
621                      =shinjigen
622                      =shinjigen@1ed ==shinjigen@1ed
623                      =shinjigen@rev ==shinjigen@rev
624                      =shinjigen/+p@rev ==shinjigen/+p@rev
625                      ==daikanwa/ho
626                      =daikanwa/ho =>>daikanwa/ho =>daikanwa/ho))
627         "(%-18s .  %04d)\t; %c")
628        ((eq name '=shinjigen@1ed/24pr)
629         "(%-18s . %04d)\t; %c")
630        ((or (memq name '(==daikanwa
631                          =daikanwa =>>daikanwa =>daikanwa
632                          =daikanwa@rev1 =daikanwa@rev2
633                          =daikanwa/+p ==daikanwa/+p =>>daikanwa/+p
634                          =daikanwa/+2p =>>daikanwa/+2p
635                          =gt ==gt ; =>>>gt
636                          =>>gt =+>gt =>gt
637                          =gt-k ==gt-k =>>gt-k =>gt-k
638                          =adobe-japan1-0 ==adobe-japan1-0 ; =>>>adobe-japan1-0
639                          =adobe-japan1-1 ==adobe-japan1-1 ; =>>>adobe-japan1-1
640                          =adobe-japan1-2 ==adobe-japan1-2 ; =>>>adobe-japan1-2
641                          =adobe-japan1-3 ==adobe-japan1-3 ; =>>>adobe-japan1-3
642                          =adobe-japan1-4 ==adobe-japan1-4 ; =>>>adobe-japan1-4
643                          =adobe-japan1-5 ==adobe-japan1-5 ; =>>>adobe-japan1-5
644                          =adobe-japan1-6 ==adobe-japan1-6 ; =>>>adobe-japan1-6
645                          =>>adobe-japan1-0 =+>adobe-japan1-0
646                          =>>adobe-japan1-1 =+>adobe-japan1-1
647                          =>>adobe-japan1-2 =+>adobe-japan1-2
648                          =>>adobe-japan1-3 =+>adobe-japan1-3
649                          =>>adobe-japan1-4 =+>adobe-japan1-4
650                          =>>adobe-japan1-5 =+>adobe-japan1-5
651                          =>>adobe-japan1-6 =+>adobe-japan1-6
652                          =cbeta =>>cbeta
653                          =zinbun-oracle =>zinbun-oracle))
654             ;; (string-match "^=adobe-" (symbol-name name))
655             )
656         "(%-18s . %05d)\t; %c")
657        ((memq name '(=hanyo-denshi/ks
658                      ==hanyo-denshi/ks ; =>>>hanyo-denshi/ks
659                      =>>hanyo-denshi/ks
660                      =koseki
661                      =mj ==mj =>>mj
662                      =zihai mojikyo))
663         "(%-18s . %06d)\t; %c")
664        ((>= (charset-dimension name) 2)
665         "(%-18s . #x%04X)\t; %c")
666        (t
667         "(%-18s . #x%02X)\t; %c"))
668       name
669       (if (= (charset-iso-graphic-plane name) 1)
670           (logior value
671                   (cond ((= (charset-dimension name) 1)
672                          #x80)
673                         ((= (charset-dimension name) 2)
674                          #x8080)
675                         ((= (charset-dimension name) 3)
676                          #x808080)
677                         (t 0)))
678         value)
679       (char-db-decode-isolated-char name value)))
680     (if (and (= (charset-chars name) 94)
681              (= (charset-dimension name) 2))
682         (insert (format " [%02d-%02d]"
683                         (- (lsh value -8) 32)
684                         (- (logand value 255) 32))))
685     )
686    (t
687     (insert (format "(%-18s . %s)" name value))
688     ))
689   (insert line-breaking))
690
691 (defun char-db-insert-relation-feature (char name value line-breaking
692                                              ccss readable)
693   (insert (format "(%-18s%s " name line-breaking))
694   (let ((lbs (concat "\n" (make-string (current-column) ?\ )))
695         separator cell sources required-features
696         ret)
697     (while (consp value)
698       (setq cell (car value))
699       (if (integerp cell)
700           (setq cell (decode-char '=ucs cell)))
701       (cond
702        ((eq name '->subsumptive)
703         (when (or (not (some (lambda (atr)
704                                (get-char-attribute cell atr))
705                              char-db-ignored-attributes))
706                   (some (lambda (ccs)
707                           (encode-char cell ccs 'defined-only))
708                         ccss))
709           (if separator
710               (insert lbs))
711           (let ((char-db-ignored-attributes
712                  (cons '<-subsumptive
713                        char-db-ignored-attributes)))
714             (insert-char-attributes cell readable nil nil 'for-sub-node))
715           (setq separator lbs))
716         )
717        ((characterp cell)
718         (setq sources
719               (get-char-attribute
720                char (intern (format "%s*sources" name))))
721         (setq required-features nil)
722         (dolist (source sources)
723           (cond
724            ((memq source '(JP
725                            JP/Jouyou
726                            shinjigen shinjigen@1ed shinjigen@rev))
727             (setq required-features
728                   (union required-features
729                          '(=jis-x0208
730                            =jis-x0208@1990
731                            =jis-x0213-1@2000
732                            =jis-x0213-1@2004
733                            =jis-x0213-2
734                            =jis-x0212
735                            =jis-x0208@1983
736                            =jis-x0208@1978
737                            =shinjigen
738                            =shinjigen@1ed
739                            =shinjigen@rev
740                            =shinjigen/+p@rev))))
741            ((eq source 'CN)
742             (setq required-features
743                   (union required-features
744                          '(=gb2312
745                            =gb12345
746                            =iso-ir165)))))
747           (cond
748            ((find-charset (setq ret (intern (format "=%s" source))))
749             (setq required-features
750                   (cons ret required-features)))
751            (t (setq required-features
752                     (cons source required-features)))))
753         (cond ((string-match "@JP" (symbol-name name))
754                (setq required-features
755                      (union required-features
756                             '(=jis-x0208
757                               =jis-x0208@1990
758                               =jis-x0213-1-2000
759                               =jis-x0213-2-2000
760                               =jis-x0212
761                               =jis-x0208@1983
762                               =jis-x0208@1978))))
763               ((string-match "@CN" (symbol-name name))
764                (setq required-features
765                      (union required-features
766                             '(=gb2312
767                               =gb12345
768                               =iso-ir165)))))
769         (if separator
770             (insert lbs))
771         (if readable
772             (insert (format "%S" cell))
773           (char-db-insert-char-spec cell readable
774                                     nil
775                                     required-features))
776         (setq separator lbs))
777        ((consp cell)
778         (if separator
779             (insert lbs))
780         (if (consp (car cell))
781             (char-db-insert-char-spec cell readable)
782           (char-db-insert-char-reference cell readable))
783         (setq separator lbs))
784        (t
785         (if separator
786             (insert separator))
787         (insert (prin1-to-string cell))
788         (setq separator " ")))
789       (setq value (cdr value)))
790     (insert ")")
791     (insert line-breaking)))
792
793 (defun insert-char-attributes (char &optional readable attributes column
794                                     for-sub-node)
795   (unless column
796     (setq column (current-column)))
797   (let (name value ; has-long-ccs-name
798         rest
799         radical strokes
800         (line-breaking
801          (concat "\n" (make-string (1+ column) ?\ )))
802         lbs cell separator ret
803         key al cal
804         dest-ccss ; sources required-features
805         ccss)
806     (let (atr-d)
807       (setq attributes
808             (sort (if attributes
809                       (if (consp attributes)
810                           (progn
811                             (dolist (name attributes)
812                               (unless (memq name char-db-ignored-attributes)
813                                 (if (find-charset name)
814                                     (push name ccss))
815                                 (push name atr-d)))
816                             atr-d))
817                     (dolist (name (char-attribute-list))
818                       (unless (memq name char-db-ignored-attributes)
819                         (if (find-charset name)
820                             (push name ccss))
821                         (push name atr-d)))
822                     atr-d)
823                   #'char-attribute-name<)))
824     (insert "(")
825     (when (memq '<-subsumptive attributes)
826       (when (or readable (not for-sub-node))
827         (when (setq value (get-char-attribute char '<-subsumptive))
828           (char-db-insert-relation-feature char '<-subsumptive value
829                                            line-breaking
830                                            ccss readable)))
831       (setq attributes (delq '<-subsumptive attributes)))
832     (when (and (memq '<-denotational attributes)
833                (setq value (get-char-attribute char '<-denotational)))
834       (char-db-insert-relation-feature char '<-denotational value
835                                        line-breaking
836                                        ccss readable)
837       (setq attributes (delq '<-denotational attributes)))
838     (when (and (memq 'name attributes)
839                (setq value (get-char-attribute char 'name)))
840       (insert (format
841                (if (> (+ (current-column) (length value)) 48)
842                    "(name . %S)%s"
843                  "(name               . %S)%s")
844                value line-breaking))
845       (setq attributes (delq 'name attributes))
846       )
847     (when (and (memq 'name* attributes)
848                (setq value (get-char-attribute char 'name*)))
849       (insert (format
850                (if (> (+ (current-column) (length value)) 48)
851                    "(name* . %S)%s"
852                  "(name*              . %S)%s")
853                value line-breaking))
854       (setq attributes (delq 'name* attributes))
855       )
856     (when (and (memq 'script attributes)
857                (setq value (get-char-attribute char 'script)))
858       (insert (format "(script\t\t%s)%s"
859                       (mapconcat (function prin1-to-string)
860                                  value " ")
861                       line-breaking))
862       (setq attributes (delq 'script attributes))
863       )
864     (dolist (name '(=>ucs =>ucs*))
865       (when (and (memq name attributes)
866                  (setq value (get-char-attribute char name)))
867         (insert (format "(%-18s . #x%04X)\t; %c%s"
868                         name value (decode-char '=ucs value)
869                         line-breaking))
870         (setq attributes (delq name attributes))))
871     (dolist (name '(=>ucs@gb =>ucs@big5))
872       (when (and (memq name attributes)
873                  (setq value (get-char-attribute char name)))
874         (insert (format "(%-18s . #x%04X)\t; %c%s"
875                         name value
876                         (decode-char (intern
877                                       (concat "="
878                                               (substring
879                                                (symbol-name name) 2)))
880                                      value)
881                         line-breaking))
882         (setq attributes (delq name attributes))
883         ))
884     ;; (dolist (name '(=>daikanwa))
885     ;;   (when (and (memq name attributes)
886     ;;              (setq value (get-char-attribute char name)))
887     ;;     (insert
888     ;;      (if (integerp value)
889     ;;          (format "(%-18s . %05d)\t; %c%s"
890     ;;                  name value (decode-char '=daikanwa value)
891     ;;                  line-breaking)
892     ;;        (format "(%-18s %s)\t; %c%s"
893     ;;                name
894     ;;                (mapconcat (function prin1-to-string)
895     ;;                           value " ")
896     ;;                (char-representative-of-daikanwa char)
897     ;;                line-breaking)))
898     ;;     (setq attributes (delq name attributes))))
899     (when (and (memq 'general-category attributes)
900                (setq value (get-char-attribute char 'general-category)))
901       (insert (format
902                "(general-category\t%s) ; %s%s"
903                (mapconcat (lambda (cell)
904                             (format "%S" cell))
905                           value " ")
906                (cond ((rassoc value unidata-normative-category-alist)
907                       "Normative Category")
908                      ((rassoc value unidata-informative-category-alist)
909                       "Informative Category")
910                      (t
911                       "Unknown Category"))
912                line-breaking))
913       (setq attributes (delq 'general-category attributes))
914       )
915     (when (and (memq 'bidi-category attributes)
916                (setq value (get-char-attribute char 'bidi-category)))
917       (insert (format "(bidi-category\t. %S)%s"
918                       value
919                       line-breaking))
920       (setq attributes (delq 'bidi-category attributes))
921       )
922     (unless (or (not (memq 'mirrored attributes))
923                 (eq (setq value (get-char-attribute char 'mirrored 'empty))
924                     'empty))
925       (insert (format "(mirrored\t\t. %S)%s"
926                       value
927                       line-breaking))
928       (setq attributes (delq 'mirrored attributes))
929       )
930     (cond
931      ((and (memq 'decimal-digit-value attributes)
932            (setq value (get-char-attribute char 'decimal-digit-value)))
933       (insert (format "(decimal-digit-value . %S)%s"
934                       value
935                       line-breaking))
936       (setq attributes (delq 'decimal-digit-value attributes))
937       (when (and (memq 'digit-value attributes)
938                  (setq value (get-char-attribute char 'digit-value)))
939         (insert (format "(digit-value\t . %S)%s"
940                         value
941                         line-breaking))
942         (setq attributes (delq 'digit-value attributes))
943         )
944       (when (and (memq 'numeric-value attributes)
945                  (setq value (get-char-attribute char 'numeric-value)))
946         (insert (format "(numeric-value\t . %S)%s"
947                         value
948                         line-breaking))
949         (setq attributes (delq 'numeric-value attributes))
950         )
951       )
952      (t
953       (when (and (memq 'digit-value attributes)
954                  (setq value (get-char-attribute char 'digit-value)))
955         (insert (format "(digit-value\t. %S)%s"
956                         value
957                         line-breaking))
958         (setq attributes (delq 'digit-value attributes))
959         )
960       (when (and (memq 'numeric-value attributes)
961                  (setq value (get-char-attribute char 'numeric-value)))
962         (insert (format "(numeric-value\t. %S)%s"
963                         value
964                         line-breaking))
965         (setq attributes (delq 'numeric-value attributes))
966         )))
967     (when (and (memq 'iso-10646-comment attributes)
968                (setq value (get-char-attribute char 'iso-10646-comment)))
969       (insert (format "(iso-10646-comment\t. %S)%s"
970                       value
971                       line-breaking))
972       (setq attributes (delq 'iso-10646-comment attributes))
973       )
974     (when (and (memq 'morohashi-daikanwa attributes)
975                (setq value (get-char-attribute char 'morohashi-daikanwa)))
976       (insert (format "(morohashi-daikanwa\t%s)%s"
977                       (mapconcat (function prin1-to-string) value " ")
978                       line-breaking))
979       (setq attributes (delq 'morohashi-daikanwa attributes))
980       )
981     (setq radical nil
982           strokes nil)
983     (when (and (memq 'ideographic-radical attributes)
984                (setq value (get-char-attribute char 'ideographic-radical)))
985       (setq radical value)
986       (insert (format "(ideographic-radical . %S)\t; %c%s"
987                       radical
988                       (ideographic-radical radical)
989                       line-breaking))
990       (setq attributes (delq 'ideographic-radical attributes))
991       )
992     (when (and (memq 'shuowen-radical attributes)
993                (setq value (get-char-attribute char 'shuowen-radical)))
994       (insert (format "(shuowen-radical\t. %S)\t; %c%s"
995                       value
996                       (shuowen-radical value)
997                       line-breaking))
998       (setq attributes (delq 'shuowen-radical attributes))
999       )
1000     (let (key)
1001       (dolist (domain
1002                (append
1003                 char-db-feature-domains
1004                 (let (dest domain)
1005                   (dolist (feature (char-attribute-list))
1006                     (setq feature (symbol-name feature))
1007                     (when (string-match
1008                            "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
1009                            feature)
1010                       (setq domain (intern (match-string 2 feature)))
1011                      (unless (memq domain dest)
1012                        (setq dest (cons domain dest)))))
1013                   (sort dest #'string<))))
1014         (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
1015         (when (and (memq key attributes)
1016                    (setq value (get-char-attribute char key)))
1017           (setq radical value)
1018           (insert (format "(%s . %S)\t; %c%s"
1019                           key
1020                           radical
1021                           (ideographic-radical radical)
1022                           line-breaking))
1023           (setq attributes (delq key attributes))
1024           )
1025         (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
1026         (when (and (memq key attributes)
1027                    (setq value (get-char-attribute char key)))
1028           (setq strokes value)
1029           (insert (format "(%s . %S)%s"
1030                           key
1031                           strokes
1032                           line-breaking))
1033           (setq attributes (delq key attributes))
1034           )
1035         (setq key (intern (format "%s@%s" 'total-strokes domain)))
1036         (when (and (memq key attributes)
1037                    (setq value (get-char-attribute char key)))
1038           (insert (format "(%s       . %S)%s"
1039                           key
1040                           value
1041                           line-breaking))
1042           (setq attributes (delq key attributes))
1043           )
1044         (dolist (feature '(ideographic-radical
1045                            ideographic-strokes
1046                            total-strokes))
1047           (setq key (intern (format "%s@%s*sources" feature domain)))
1048           (when (and (memq key attributes)
1049                      (setq value (get-char-attribute char key)))
1050             (insert (format "(%s%s" key line-breaking))
1051             (dolist (cell value)
1052               (insert (format " %s" cell)))
1053             (insert ")")
1054             (insert line-breaking)
1055             (setq attributes (delq key attributes))
1056             ))
1057         ))
1058     (when (and (memq 'ideographic-strokes attributes)
1059                (setq value (get-char-attribute char 'ideographic-strokes)))
1060       (setq strokes value)
1061       (insert (format "(ideographic-strokes . %S)%s"
1062                       strokes
1063                       line-breaking))
1064       (setq attributes (delq 'ideographic-strokes attributes))
1065       )
1066     (when (and (memq 'kangxi-radical attributes)
1067                (setq value (get-char-attribute char 'kangxi-radical)))
1068       (unless (eq value radical)
1069         (insert (format "(kangxi-radical\t . %S)\t; %c%s"
1070                         value
1071                         (ideographic-radical value)
1072                         line-breaking))
1073         (or radical
1074             (setq radical value)))
1075       (setq attributes (delq 'kangxi-radical attributes))
1076       )
1077     (when (and (memq 'kangxi-strokes attributes)
1078                (setq value (get-char-attribute char 'kangxi-strokes)))
1079       (unless (eq value strokes)
1080         (insert (format "(kangxi-strokes\t . %S)%s"
1081                         value
1082                         line-breaking))
1083         (or strokes
1084             (setq strokes value)))
1085       (setq attributes (delq 'kangxi-strokes attributes))
1086       )
1087     (when (and (memq 'japanese-radical attributes)
1088                (setq value (get-char-attribute char 'japanese-radical)))
1089       (unless (eq value radical)
1090         (insert (format "(japanese-radical\t . %S)\t; %c%s"
1091                         value
1092                         (ideographic-radical value)
1093                         line-breaking))
1094         (or radical
1095             (setq radical value)))
1096       (setq attributes (delq 'japanese-radical attributes))
1097       )
1098     (when (and (memq 'japanese-strokes attributes)
1099                (setq value (get-char-attribute char 'japanese-strokes)))
1100       (unless (eq value strokes)
1101         (insert (format "(japanese-strokes\t . %S)%s"
1102                         value
1103                         line-breaking))
1104         (or strokes
1105             (setq strokes value)))
1106       (setq attributes (delq 'japanese-strokes attributes))
1107       )
1108     (when (and (memq 'cns-radical attributes)
1109                (setq value (get-char-attribute char 'cns-radical)))
1110       (insert (format "(cns-radical\t . %S)\t; %c%s"
1111                       value
1112                       (ideographic-radical value)
1113                       line-breaking))
1114       (setq attributes (delq 'cns-radical attributes))
1115       )
1116     (when (and (memq 'cns-strokes attributes)
1117                (setq value (get-char-attribute char 'cns-strokes)))
1118       (unless (eq value strokes)
1119         (insert (format "(cns-strokes\t . %S)%s"
1120                         value
1121                         line-breaking))
1122         (or strokes
1123             (setq strokes value)))
1124       (setq attributes (delq 'cns-strokes attributes))
1125       )
1126     ;; (when (and (memq 'shinjigen-1-radical attributes)
1127     ;;            (setq value (get-char-attribute char 'shinjigen-1-radical)))
1128     ;;   (unless (eq value radical)
1129     ;;     (insert (format "(shinjigen-1-radical . %S)\t; %c%s"
1130     ;;                     value
1131     ;;                     (ideographic-radical value)
1132     ;;                     line-breaking))
1133     ;;     (or radical
1134     ;;         (setq radical value)))
1135     ;;   (setq attributes (delq 'shinjigen-1-radical attributes))
1136     ;;   )
1137     (when (and (memq 'ideographic- attributes)
1138                (setq value (get-char-attribute char 'ideographic-)))
1139       (insert "(ideographic-       ")
1140       (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1141             separator nil)
1142       (while (consp value)
1143         (setq cell (car value))
1144         (if (integerp cell)
1145             (setq cell (decode-char '=ucs cell)))
1146         (cond ((characterp cell)
1147                (if separator
1148                    (insert lbs))
1149                (if readable
1150                    (insert (format "%S" cell))
1151                  (char-db-insert-char-spec cell readable))
1152                (setq separator lbs))
1153               ((consp cell)
1154                (if separator
1155                    (insert lbs))
1156                (if (consp (car cell))
1157                    (char-db-insert-char-spec cell readable)
1158                  (char-db-insert-char-reference cell readable))
1159                (setq separator lbs))
1160               (t
1161                (if separator
1162                    (insert separator))
1163                (insert (prin1-to-string cell))
1164                (setq separator " ")))
1165         (setq value (cdr value)))
1166       (insert ")")
1167       (insert line-breaking)
1168       (setq attributes (delq 'ideographic- attributes)))
1169     (when (and (memq 'total-strokes attributes)
1170                (setq value (get-char-attribute char 'total-strokes)))
1171       (insert (format "(total-strokes       . %S)%s"
1172                       value
1173                       line-breaking))
1174       (setq attributes (delq 'total-strokes attributes))
1175       )
1176     (when (and (memq '->ideograph attributes)
1177                (setq value (get-char-attribute char '->ideograph)))
1178       (insert (format "(->ideograph\t%s)%s"
1179                       (mapconcat (lambda (code)
1180                                    (cond ((symbolp code)
1181                                           (symbol-name code))
1182                                          ((integerp code)
1183                                           (format "#x%04X" code))
1184                                          (t
1185                                           (format "%s %S"
1186                                                   line-breaking code))))
1187                                  value " ")
1188                       line-breaking))
1189       (setq attributes (delq '->ideograph attributes))
1190       )
1191     ;; (when (and (memq '->decomposition attributes)
1192     ;;            (setq value (get-char-attribute char '->decomposition)))
1193     ;;   (insert (format "(->decomposition\t%s)%s"
1194     ;;                   (mapconcat (lambda (code)
1195     ;;                                (cond ((symbolp code)
1196     ;;                                       (symbol-name code))
1197     ;;                                      ((characterp code)
1198     ;;                                       (if readable
1199     ;;                                           (format "%S" code)
1200     ;;                                         (format "#x%04X"
1201     ;;                                                 (char-int code))
1202     ;;                                         ))
1203     ;;                                      ((integerp code)
1204     ;;                                       (format "#x%04X" code))
1205     ;;                                      (t
1206     ;;                                       (format "%s%S" line-breaking code))))
1207     ;;                              value " ")
1208     ;;                   line-breaking))
1209     ;;   (setq attributes (delq '->decomposition attributes))
1210     ;;   )
1211     (if (equal (get-char-attribute char '->titlecase)
1212                (get-char-attribute char '->uppercase))
1213         (setq attributes (delq '->titlecase attributes)))
1214     (when (and (memq '->mojikyo attributes)
1215                (setq value (get-char-attribute char '->mojikyo)))
1216       (insert (format "(->mojikyo\t\t. %06d)\t; %c%s"
1217                       value (decode-char 'mojikyo value)
1218                       line-breaking))
1219       (setq attributes (delq '->mojikyo attributes))
1220       )
1221     (when (and (memq 'hanyu-dazidian-vol attributes)
1222                (setq value (get-char-attribute char 'hanyu-dazidian-vol)))
1223       (insert (format "(hanyu-dazidian-vol  . %d)%s"
1224                       value line-breaking))
1225       (setq attributes (delq 'hanyu-dazidian-vol attributes))
1226       )
1227     (when (and (memq 'hanyu-dazidian-page attributes)
1228                (setq value (get-char-attribute char 'hanyu-dazidian-page)))
1229       (insert (format "(hanyu-dazidian-page . %d)%s"
1230                       value line-breaking))
1231       (setq attributes (delq 'hanyu-dazidian-page attributes))
1232       )
1233     (when (and (memq 'hanyu-dazidian-char attributes)
1234                (setq value (get-char-attribute char 'hanyu-dazidian-char)))
1235       (insert (format "(hanyu-dazidian-char . %d)%s"
1236                       value line-breaking))
1237       (setq attributes (delq 'hanyu-dazidian-char attributes))
1238       )
1239     (unless readable
1240       (dolist (ignored '(composition
1241                          ->denotational <-subsumptive ->ucs-unified
1242                          ->ideographic-component-forms))
1243         (setq attributes (delq ignored attributes))))
1244     (while attributes
1245       (setq name (car attributes))
1246       (unless (eq (setq value (get-char-attribute char name 'value-is-empty))
1247                   'value-is-empty)
1248         (cond ((setq ret (find-charset name))
1249                (setq name (charset-name ret))
1250                (when (not (memq name dest-ccss))
1251                  (setq dest-ccss (cons name dest-ccss))
1252                  (char-db-insert-ccs-feature name value line-breaking))
1253                )
1254               ((string-match "^=>ucs@" (symbol-name name))
1255                (insert (format "(%-18s . #x%04X)\t; %c%s"
1256                                name value (decode-char '=ucs value)
1257                                line-breaking))
1258                )
1259               ((eq name 'jisx0208-1978/4X)
1260                (insert (format "(%-18s . #x%04X)%s"
1261                                name value
1262                                line-breaking))
1263                )
1264               ((and
1265                 (not readable)
1266                 (not (eq name '->subsumptive))
1267                 (not (eq name '->uppercase))
1268                 (not (eq name '->lowercase))
1269                 (not (eq name '->titlecase))
1270                 (not (eq name '->canonical))
1271                 (not (eq name '->Bopomofo))
1272                 (not (eq name '->mistakable))
1273                 (not (eq name '->ideographic-variants))
1274                 (null (get-char-attribute
1275                        char (intern (format "%s*sources" name))))
1276                 (not (string-match "\\*sources$" (symbol-name name)))
1277                 (null (get-char-attribute
1278                        char (intern (format "%s*note" name))))
1279                 (not (string-match "\\*note$" (symbol-name name)))
1280                 (or (eq name '<-identical)
1281                     (eq name '<-uppercase)
1282                     (eq name '<-lowercase)
1283                     (eq name '<-titlecase)
1284                     (eq name '<-canonical)
1285                     (eq name '<-ideographic-variants)
1286                     ;; (eq name '<-synonyms)
1287                     (string-match "^<-synonyms" (symbol-name name))
1288                     (eq name '<-mistakable)
1289                     (when (string-match "^->" (symbol-name name))
1290                       (cond
1291                        ((string-match "^->fullwidth" (symbol-name name))
1292                         (not (and (consp value)
1293                                   (characterp (car value))
1294                                   (encode-char
1295                                    (car value) '=ucs 'defined-only)))
1296                         )
1297                        (t)))
1298                     ))
1299                )
1300               ((or (eq name 'ideographic-structure)
1301                    (eq name 'ideographic-combination)
1302                    (eq name 'ideographic-)
1303                    (eq name '=decomposition)
1304                    (char-feature-base-name= '=decomposition name)
1305                    (char-feature-base-name= '=>decomposition name)
1306                    ;; (string-match "^=>*decomposition\\(@[^*]+\\)?$"
1307                    ;;               (symbol-name name))
1308                    (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name name))
1309                    (string-match "^\\(->\\|<-\\)[^*]*\\*sources$"
1310                                  (symbol-name name))
1311                    )
1312                (char-db-insert-relation-feature char name value
1313                                                 line-breaking
1314                                                 ccss readable))
1315               ((memq name '(ideograph=
1316                             original-ideograph-of
1317                             ancient-ideograph-of
1318                             vulgar-ideograph-of
1319                             wrong-ideograph-of
1320                             ;; simplified-ideograph-of
1321                             ideographic-variants
1322                             ;; ideographic-different-form-of
1323                             ))
1324                (insert (format "(%-18s%s " name line-breaking))
1325                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1326                      separator nil)
1327                (while (consp value)
1328                  (setq cell (car value))
1329                  (if (and (consp cell)
1330                           (consp (car cell)))
1331                      (progn
1332                        (if separator
1333                            (insert lbs))
1334                        (char-db-insert-alist cell readable)
1335                        (setq separator lbs))
1336                    (if separator
1337                        (insert separator))
1338                    (insert (prin1-to-string cell))
1339                    (setq separator " "))
1340                  (setq value (cdr value)))
1341                (insert ")")
1342                (insert line-breaking))
1343               ((consp value)
1344                (insert (format "(%-18s " name))
1345                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1346                      separator nil)
1347                (while (consp value)
1348                  (setq cell (car value))
1349                  (if (and (consp cell)
1350                           (consp (car cell))
1351                           (setq ret (condition-case nil
1352                                         (find-char cell)
1353                                       (error nil))))
1354                      (progn
1355                        (setq rest cell
1356                              al nil
1357                              cal nil)
1358                        (while rest
1359                          (setq key (car (car rest)))
1360                          (if (find-charset key)
1361                              (setq cal (cons key cal))
1362                            (setq al (cons key al)))
1363                          (setq rest (cdr rest)))
1364                        (if separator
1365                            (insert lbs))
1366                        (insert-char-attributes ret
1367                                                readable
1368                                                al ; cal
1369                                                nil 'for-sub-node)
1370                        (setq separator lbs))
1371                    (setq ret (prin1-to-string cell))
1372                    (if separator
1373                        (if (< (+ (current-column)
1374                                  (length ret)
1375                                  (length separator))
1376                               76)
1377                            (insert separator)
1378                          (insert lbs)))
1379                    (insert ret)
1380                    (setq separator " "))
1381                  (setq value (cdr value)))
1382                (insert ")")
1383                (insert line-breaking))
1384               (t
1385                (insert (format "(%-18s" name))
1386                (setq ret (prin1-to-string value))
1387                (unless (< (+ (current-column)
1388                              (length ret)
1389                              3)
1390                           76)
1391                  (insert line-breaking))
1392                (insert " . " ret ")" line-breaking)
1393                ;; (insert (format "(%-18s . %S)%s"
1394                ;;                 name value
1395                ;;                 line-breaking))
1396                )
1397               ))
1398       (setq attributes (cdr attributes)))
1399     (insert ")")))
1400
1401 (defun insert-char-data (char &optional readable
1402                               attributes)
1403   (save-restriction
1404     (narrow-to-region (point)(point))
1405     (insert "(define-char
1406   '")
1407     (insert-char-attributes char readable attributes)
1408     (insert ")\n")
1409     (goto-char (point-min))
1410     (while (re-search-forward "[ \t]+$" nil t)
1411       (replace-match ""))
1412     ;; from tabify.
1413     (goto-char (point-min))
1414     (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
1415       (let ((column (current-column))
1416             (indent-tabs-mode t))
1417         (delete-region (match-beginning 0) (point))
1418         (indent-to column)))
1419     (goto-char (point-max))
1420     ;; (tabify (point-min)(point-max))
1421     ))
1422
1423 (defun insert-char-data-with-variant (char &optional printable
1424                                            no-ucs-unified
1425                                            script excluded-script)
1426   (insert-char-data char printable)
1427   (let ((variants (char-variants char))
1428         rest
1429         variant vs ret)
1430     (setq variants (sort variants #'<))
1431     (setq rest variants)
1432     (setq variants (cons char variants))
1433     (while rest
1434       (setq variant (car rest))
1435       (unless (get-char-attribute variant '<-subsumptive)
1436         (if (and (or (null script)
1437                      (null (setq vs (get-char-attribute variant 'script)))
1438                      (memq script vs))
1439                  (or (null excluded-script)
1440                      (null (setq vs (get-char-attribute variant 'script)))
1441                      (not (memq excluded-script vs))))
1442             (unless (and no-ucs-unified (get-char-attribute variant '=ucs))
1443               (insert-char-data variant printable)
1444               (if (setq ret (char-variants variant))
1445                   (while ret
1446                     (or (memq (car ret) variants)
1447                         ;; (get-char-attribute (car ret) '<-subsumptive)
1448                         (setq rest (nconc rest (list (car ret)))))
1449                     (setq ret (cdr ret)))))))
1450       (setq rest (cdr rest)))))
1451
1452 (defun insert-char-range-data (min max &optional script excluded-script)
1453   (let ((code min)
1454         char)
1455     (while (<= code max)
1456       (setq char (decode-char '=ucs code))
1457       (if (encode-char char '=ucs 'defined-only)
1458           (insert-char-data-with-variant char nil 'no-ucs-unified
1459                                          script excluded-script))
1460       (setq code (1+ code)))))
1461
1462 (defun write-char-range-data-to-file (min max file
1463                                           &optional script excluded-script)
1464   (let ((coding-system-for-write char-db-file-coding-system))
1465     (with-temp-buffer
1466       (insert (format ";; -*- coding: %s -*-\n"
1467                       char-db-file-coding-system))
1468       (insert-char-range-data min max script excluded-script)
1469       (write-region (point-min)(point-max) file))))
1470
1471 (defvar what-character-original-window-configuration)
1472
1473 ;;;###autoload
1474 (defun what-char-definition (char)
1475   (interactive (list (char-after)))
1476   (let ((buf (get-buffer-create "*Character Description*"))
1477         (the-buf (current-buffer))
1478         (win-conf (current-window-configuration)))
1479     (pop-to-buffer buf)
1480     (make-local-variable 'what-character-original-window-configuration)
1481     (setq what-character-original-window-configuration win-conf)
1482     (setq buffer-read-only nil)
1483     (erase-buffer)
1484     (condition-case err
1485         (progn
1486           (insert-char-data-with-variant char 'printable)
1487           (unless (char-attribute-alist char)
1488             (insert (format ";; = %c\n"
1489                             (let* ((rest (split-char char))
1490                                    (ccs (pop rest))
1491                                    (code (pop rest)))
1492                               (while rest
1493                                 (setq code (logior (lsh code 8)
1494                                                    (pop rest))))
1495                               (decode-char ccs code)))))
1496           ;; (char-db-update-comment)
1497           (set-buffer-modified-p nil)
1498           (view-mode the-buf (lambda (buf)
1499                                (set-window-configuration
1500                                 what-character-original-window-configuration)
1501                                ))
1502           (goto-char (point-min)))
1503       (error (progn
1504                (set-window-configuration
1505                 what-character-original-window-configuration)
1506                (signal (car err) (cdr err)))))))
1507
1508
1509 ;;; @ end
1510 ;;;
1511
1512 (provide 'char-db-util)
1513
1514 ;;; char-db-util.el ends here