(char-db-coded-charset-priority-list): Add `=>>hanyo-denshi/jt'.
[chise/xemacs-chise.git-] / lisp / utf-2000 / char-db-util.el
1 ;;; char-db-util.el --- Character Database utility -*- coding: utf-8-er; -*-
2
3 ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
4 ;;   2007, 2008, 2009, 2010, 2011 MORIOKA Tomohiko.
5
6 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
7 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
8
9 ;; This file is part of XEmacs CHISE.
10
11 ;; XEmacs CHISE is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
15
16 ;; XEmacs CHISE is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
23 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Code:
27
28 (require 'chise-subr)
29 (require 'ideograph-subr)
30
31 (defconst unidata-normative-category-alist
32   '(("Lu" letter        uppercase)
33     ("Ll" letter        lowercase)
34     ("Lt" letter        titlecase)
35     ("Mn" mark          non-spacing)
36     ("Mc" mark          spacing-combining)
37     ("Me" mark          enclosing)
38     ("Nd" number        decimal-digit)
39     ("Nl" number        letter)
40     ("No" number        other)
41     ("Zs" separator     space)
42     ("Zl" separator     line)
43     ("Zp" separator     paragraph)
44     ("Cc" other         control)
45     ("Cf" other         format)
46     ("Cs" other         surrogate)
47     ("Co" other         private-use)
48     ("Cn" other         not-assigned)))
49
50 (defconst unidata-informative-category-alist
51   '(("Lm" letter        modifier)
52     ("Lo" letter        other)
53     ("Pc" punctuation   connector)
54     ("Pd" punctuation   dash)
55     ("Ps" punctuation   open)
56     ("Pe" punctuation   close)
57     ("Pi" punctuation   initial-quote)
58     ("Pf" punctuation   final-quote)
59     ("Po" punctuation   other)
60     ("Sm" symbol        math)
61     ("Sc" symbol        currency)
62     ("Sk" symbol        modifier)
63     ("So" symbol        other)
64     ))
65
66 (defconst shuowen-radicals
67   [?一 ?上 ?示 ?三 ?王 ?玉 ?玨 ?气 ?士 ?丨 ; 010
68    ?屮 ?艸 ?蓐 ?茻 ?小 ?八 ?釆 ?半 ?牛 ?犛 ; 020
69    ?告 ?口 ?凵 ?吅 ?哭 ?走 ?止 ?癶 ?步 ?此 ; 030
70    ?正 ?是 ?辵 ?彳 ?廴 ?㢟 ?行 ?齒 ?牙 ?足 ; 040
71    ?疋 ?品 ?龠 ?冊 ?㗊 ?舌 ?干 ?谷 ?只 ?㕯 ; 050
72    ?句 ?丩 ?古 ?十 ?卅 ?言 ?誩 ?音 ?䇂 ?丵 ; 060
73    ?菐 ?𠬞 ?𠬜 ?共 ?異 ?舁 ?𦥑 ?䢅 ?爨 ?革 ; 070
74    ?鬲 ?䰜 ?爪 ?𠃨 ?鬥 ?又 ?𠂇 ?㕜 ?支 ?𦘒 ; 080
75    ?聿 ?畫 ?隶 ?臤 ?臣 ?殳 ?殺 ?𠘧 ?寸 ?皮 ; 090
76    ?㼱 ?攴 ?敎 ?卜 ?用 ?爻 ?㸚 ?𥄎 ?目 ?䀠 ; 100
77    ?眉 ?盾 ?自 ?白 ?鼻 ?皕 ?習 ?羽 ?隹 ?奞 ; 110
78    ?萑 ?𦫳 ?苜 ?羊 ?羴 ?瞿 ?雔 ?雥 ?鳥 ?烏 ; 120
79    ?𠦒 ?冓 ?幺 ?𢆶 ?叀 ?玄 ?予 ?放 ?𠬪 ?𣦼 ; 130
80    ?歺 ?死 ?冎 ?骨 ?肉 ?筋 ?刀 ?刃 ?㓞 ?丰 ; 140
81    ?耒 ?𧢲 ?竹 ?箕 ?丌 ?左 ?工 ?㠭 ?巫 ?甘 ; 150
82    ?旨 ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?喜 ?壴 ; 160
83    ?鼓 ?豈 ?豆 ?豊 ?豐 ?䖒 ?虍 ?虎 ?虤 ?皿 ; 170
84    ?𠙴 ?去 ?血 ?丶 ?丹 ?青 ?井 ?皀 ?鬯 ?食 ; 180
85    ?亼 ?會 ?倉 ?入 ?缶 ?矢 ?高 ?冂 ?𩫏 ?京 ; 190
86    ?亯 ?𣆪 ?畗 ?㐭 ?嗇 ?來 ?麥 ?夊 ?舛 ?䑞 ; 200
87    ?韋 ?弟 ?夂 ?久 ?桀 ?木 ?東 ?林 ?才 ?叒 ; 210
88    ?之 ?帀 ?出 ?𣎵 ?生 ?乇 ?𠂹 ?𠌶 ?華 ?𥝌 ; 220
89    ?稽 ?巢 ?桼 ?束 ?㯻 ?囗 ?員 ?貝 ?邑 ?𨛜 ; 230
90    ?日 ?旦 ?倝 ?㫃 ?冥 ?晶 ?月 ?有 ?明 ?囧 ; 240
91    ?夕 ?多 ?毌 ?𢎘 ?𣐺 ?卣 ?齊 ?朿 ?片 ?鼎 ; 250
92    ?克 ?彔 ?禾 ?秝 ?黍 ?香 ?米 ?毇 ?臼 ?凶 ; 260
93    ?𣎳 ?林 ?麻 ?尗 ?耑 ?韭 ?瓜 ?瓠 ?宀 ?宮 ; 270
94    ?呂 ?穴 ?㝱 ?𤕫 ?冖 ?𠔼 ?冃 ?㒳 ?网 ?襾 ; 280
95    ?巾 ?巿 ?帛 ?白 ?㡀 ?黹 ?人 ?𠤎 ?匕 ?从 ; 290
96    ])
97
98 (defun shuowen-radical (number)
99   (aref shuowen-radicals (1- number)))
100
101 (defvar char-db-file-coding-system 'utf-8-mcs-er)
102
103 (defvar char-db-ignored-attributes '(ideographic-products))
104
105 (defvar char-db-coded-charset-priority-list
106   '(ascii
107     control-1
108     latin-iso8859-1
109     latin-iso8859-2
110     latin-iso8859-3
111     latin-iso8859-4
112     latin-iso8859-9
113     latin-jisx0201
114     cyrillic-iso8859-5
115     greek-iso8859-7
116     thai-tis620
117     =jis-x0208
118     =jis-x0208@1978
119     =jis-x0208@1983
120     japanese-jisx0212
121     chinese-gb2312
122     =jis-x0208@1990
123     chinese-cns11643-1
124     chinese-cns11643-2
125     chinese-cns11643-3
126     chinese-cns11643-4
127     chinese-cns11643-5
128     chinese-cns11643-6
129     chinese-cns11643-7
130     =jis-x0213-1
131     =jis-x0213-1@2000
132     =jis-x0213-1@2004
133     =jis-x0213-2
134     korean-ksc5601
135     chinese-isoir165
136     katakana-jisx0201
137     hebrew-iso8859-8
138     chinese-gb12345
139     latin-viscii
140     ethiopic-ucs
141     =big5-cdp
142     =gt
143     =adobe-japan1-0
144     =adobe-japan1-1
145     =adobe-japan1-2
146     =adobe-japan1-3
147     =adobe-japan1-4
148     =adobe-japan1-5
149     =adobe-japan1-6
150     =hanyo-denshi/ja
151     =hanyo-denshi/jb
152     =hanyo-denshi/jc
153     =hanyo-denshi/jd
154     =hanyo-denshi/ft
155     =hanyo-denshi/ia
156     =hanyo-denshi/ib
157     =hanyo-denshi/hg
158     =hanyo-denshi/jt
159     =hanyo-denshi/ks
160     =daikanwa
161     =daikanwa@rev2
162     =daikanwa@rev1
163     =cbeta
164     =gt-k
165     ideograph-hanziku-1
166     ideograph-hanziku-2
167     ideograph-hanziku-3
168     ideograph-hanziku-4
169     ideograph-hanziku-5
170     ideograph-hanziku-6
171     ideograph-hanziku-7
172     ideograph-hanziku-8
173     ideograph-hanziku-9
174     ideograph-hanziku-10
175     ideograph-hanziku-11
176     ideograph-hanziku-12
177     =>>>jis-x0208
178     =>>>jis-x0213-1
179     =>>>jis-x0213-2
180     =>>>gt
181     =>>>adobe-japan1
182     =>>jis-x0208
183     =>>jis-x0213-1
184     =>>jis-x0213-1@2000
185     =>>jis-x0213-1@2004
186     =>>jis-x0213-2
187     =>>jis-x0208@1978
188     =>>hanyo-denshi/ft
189     =>>hanyo-denshi/jt
190     =>>hanyo-denshi/ks
191     =>>gt
192     =>>daikanwa
193     =>>adobe-japan1
194     =+>jis-x0208
195     =+>jis-x0213-1
196     =+>jis-x0213-2
197     =+>jis-x0208@1978
198     =+>adobe-japan1
199     =>jis-x0208
200     =>jis-x0208@1997
201     =>jis-x0213-1
202     =>jis-x0213-1@2000
203     =>jis-x0213-1@2004
204     =>jis-x0213-2
205     ==>ucs@bucs
206     =>ucs@iso
207     =>ucs@unicode
208     =>ucs@jis
209     =>ucs@cns
210     =>ucs@ks
211     =+>ucs@iso
212     =+>ucs@unicode
213     =+>ucs@jis
214     =+>ucs@jis/1990
215     =+>ucs@cns
216     =+>ucs@ks
217     =>>ucs@iso
218     =>>ucs@unicode
219     =>>ucs@jis
220     =>>ucs@cns
221     =>>>ucs@iso
222     =>>>ucs@unicode
223     =ucs@iso
224     =ucs@unicode
225     =>>big5-cdp
226     =>>gt-k
227     =+>gt
228     =>gt
229     =>big5-cdp
230     =>daikanwa
231     =>cns11643-7
232     =big5
233     =big5-eten
234     =>gt-k
235     =zinbun-oracle
236     =>zinbun-oracle
237     =ruimoku-v6
238     =>>ruimoku-v6
239     =jef-china3
240     =shinjigen))
241
242
243 ;;; @ char-db formatters
244 ;;;
245
246 (defun char-db-make-char-spec (char)
247   (let (ret char-spec)
248     (cond ((characterp char)
249            (cond ((and (setq ret (encode-char char '=ucs 'defined-only))
250                        (not (and (<= #xE000 ret)(<= ret #xF8FF))))
251                   (setq char-spec (list (cons '=ucs ret)))
252                   (cond ((setq ret (get-char-attribute char 'name))
253                          (setq char-spec (cons (cons 'name ret) char-spec))
254                          )
255                         ((setq ret (get-char-attribute char 'name*))
256                          (setq char-spec (cons (cons 'name* ret) char-spec))
257                          ))
258                   )
259                  ((setq ret
260                         (catch 'tag
261                           (let ((rest char-db-coded-charset-priority-list)
262                                 ccs)
263                             (while rest
264                               (setq ccs (charset-name
265                                          (find-charset (car rest))))
266                               (if (setq ret
267                                         (encode-char char ccs
268                                                      'defined-only))
269                                   (throw 'tag (cons ccs ret)))
270                               (setq rest (cdr rest))))))
271                   (setq char-spec (list ret))
272                   (dolist (ccs (delq (car ret) (charset-list)))
273                     (if (and (or (charset-iso-final-char ccs)
274                                  (memq ccs
275                                        '(=daikanwa
276                                          =daikanwa@rev2
277                                          ;; =gt-k
278                                          =jis-x0208@1997
279                                          ))
280                                  (string-match "=ucs@" (symbol-name ccs)))
281                              (setq ccs (charset-name ccs))
282                              (null (assq ccs char-spec))
283                              (setq ret (encode-char char ccs 'defined-only)))
284                         (setq char-spec (cons (cons ccs ret) char-spec))))
285                   (if (null char-spec)
286                       (setq char-spec (split-char char)))
287                   (cond ((setq ret (get-char-attribute char 'name))
288                          (setq char-spec (cons (cons 'name ret) char-spec))
289                          )
290                         ((setq ret (get-char-attribute char 'name*))
291                          (setq char-spec (cons (cons 'name* ret) char-spec))
292                          ))
293                   )
294                  ((setq ret (get-char-attribute
295                              char 'ideographic-combination))
296                   (setq char-spec
297                         (cons (cons 'ideographic-combination ret)
298                               char-spec))
299                   ))
300            char-spec)
301           ((consp char)
302            char))))
303     
304 (defun char-db-insert-char-spec (char &optional readable column
305                                       required-features)
306   (unless column
307     (setq column (current-column)))
308   (let (char-spec temp-char)
309     (setq char-spec (char-db-make-char-spec char))
310     (unless (or (characterp char) ; char
311                 (condition-case nil
312                     (setq char (find-char char-spec))
313                   (error nil)))
314       ;; define temporary character
315       ;;   Current implementation is dirty.
316       (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
317                                          char-spec)))
318       (remove-char-attribute temp-char 'ideograph-daikanwa)
319       (setq char temp-char))
320     (insert-char-attributes char
321                             readable
322                             (union (mapcar #'car char-spec)
323                                    required-features)
324                             nil 'for-sub-node)
325     (when temp-char
326       ;; undefine temporary character
327       ;;   Current implementation is dirty.
328       (setq char-spec (char-attribute-alist temp-char))
329       (while char-spec
330         (remove-char-attribute temp-char (car (car char-spec)))
331         (setq char-spec (cdr char-spec))))))
332
333 (defun char-db-insert-alist (alist &optional readable column)
334   (unless column
335     (setq column (current-column)))
336   (let ((line-breaking
337          (concat "\n" (make-string (1+ column) ?\ )))
338         name value
339         ret al ; cal
340         key
341         lbs cell rest separator)
342     (insert "(")
343     (while alist
344       (setq name (car (car alist))
345             value (cdr (car alist)))
346       (cond ((eq name 'char)
347              (insert "(char . ")
348              (if (setq ret (condition-case nil
349                                (find-char value)
350                              (error nil)))
351                  (progn
352                    (setq al nil
353                          ;; cal nil
354                          )
355                    (while value
356                      (setq key (car (car value)))
357                      ;; (if (find-charset key)
358                      ;;     (setq cal (cons key cal))
359                      (setq al (cons key al))
360                      ;; )
361                      (setq value (cdr value)))
362                    (insert-char-attributes ret
363                                            readable
364                                            (or al 'none) ; cal
365                                            nil 'for-sub-node))
366                (insert (prin1-to-string value)))
367              (insert ")")
368              (insert line-breaking))
369             ((consp value)
370              (insert (format "(%-18s " name))
371              (setq lbs (concat "\n" (make-string (current-column) ?\ )))
372              (while (consp value)
373                (setq cell (car value))
374                (if (and (consp cell)
375                         (consp (car cell))
376                         (setq ret (condition-case nil
377                                       (find-char cell)
378                                     (error nil)))
379                         )
380                    (progn
381                      (setq rest cell
382                            al nil
383                            ;; cal nil
384                            )
385                      (while rest
386                        (setq key (car (car rest)))
387                        ;; (if (find-charset key)
388                        ;;     (setq cal (cons key cal))
389                        (setq al (cons key al))
390                        ;; )
391                        (setq rest (cdr rest)))
392                      (if separator
393                          (insert lbs))
394                      (insert-char-attributes ret
395                                              readable
396                                              al ; cal
397                                              nil 'for-sub-node)
398                      (setq separator lbs))
399                  (if separator
400                      (insert separator))
401                  (insert (prin1-to-string cell))
402                  (setq separator " "))
403                (setq value (cdr value)))
404              (insert ")")
405              (insert line-breaking))
406             (t
407              (insert (format "(%-18s . %S)%s"
408                              name value
409                              line-breaking))))
410       (setq alist (cdr alist))))
411   (insert ")"))
412
413 (defun char-db-insert-char-reference (plist &optional readable column)
414   (unless column
415     (setq column (current-column)))
416   (let ((line-breaking
417          (concat "\n" (make-string (1+ column) ?\ )))
418         (separator "")
419         name value)
420     (insert "(")
421     (while plist
422       (setq name (pop plist))
423       (setq value (pop plist))
424       (cond ((eq name :char)
425              (insert separator)
426              (insert ":char\t")
427              (cond ((numberp value)
428                     (setq value (decode-char '=ucs value)))
429                    ;; ((consp value)
430                    ;;  (setq value (or (find-char value)
431                    ;;                  value)))
432                    )
433              (char-db-insert-char-spec value readable)
434              (insert line-breaking)
435              (setq separator ""))
436             ((eq name :radical)
437              (insert (format "%s%s\t%d ; %c%s"
438                              separator
439                              name value
440                              (ideographic-radical value)
441                              line-breaking))
442              (setq separator ""))
443             (t
444              (insert (format "%s%s\t%S" separator name value))
445              (setq separator line-breaking)))
446       ))
447   (insert ")"))
448
449 (defun char-db-decode-isolated-char (ccs code-point)
450   (let (ret)
451     (setq ret
452           (cond ((eq ccs 'arabic-iso8859-6)
453                  (decode-char ccs code-point))
454                 ((and (memq ccs '(=gt-pj-1
455                                   =gt-pj-2
456                                   =gt-pj-3
457                                   =gt-pj-4
458                                   =gt-pj-5
459                                   =gt-pj-6
460                                   =gt-pj-7
461                                   =gt-pj-8
462                                   =gt-pj-9
463                                   =gt-pj-10
464                                   =gt-pj-11))
465                       (setq ret (decode-char ccs code-point))
466                       (setq ret (encode-char ret '=gt 'defined-only)))
467                  (decode-builtin-char '=gt ret))
468                 (t
469                  (decode-builtin-char ccs code-point))))
470     (cond ((and (<= 0 (char-int ret))
471                 (<= (char-int ret) #x1F))
472            (decode-char '=ucs (+ #x2400 (char-int ret))))
473           ((= (char-int ret) #x7F)
474            ?\u2421)
475           (t ret))))
476
477 (defvar char-db-convert-obsolete-format t)
478
479 (defun char-db-insert-ccs-feature (name value line-breaking)
480   (cond
481    ((integerp value)
482     (insert
483      (format
484       (cond
485        ((memq name '(=shinjigen
486                      =shinjigen@1ed
487                      =shinjigen@rev =shinjigen/+p@rev
488                      =daikanwa/ho))
489         "(%-18s .  %04d)\t; %c")
490        ((eq name '=shinjigen@1ed/24pr)
491         "(%-18s . %04d)\t; %c")
492        ((or (memq name '(=daikanwa =>>daikanwa =>daikanwa
493                          =daikanwa@rev1 =daikanwa@rev2
494                          =daikanwa/+p =daikanwa/+2p
495                          =gt =>>>gt =>>gt =+>gt =>gt
496                          =gt-k =>>gt-k =>gt-k
497                          =adobe-japan1-0 =>>>adobe-japan1-0 =>>adobe-japan1-0
498                          =adobe-japan1-1 =>>>adobe-japan1-1 =>>adobe-japan1-1
499                          =adobe-japan1-2 =>>>adobe-japan1-2 =>>adobe-japan1-2
500                          =adobe-japan1-3 =>>>adobe-japan1-3 =>>adobe-japan1-3
501                          =adobe-japan1-4 =>>>adobe-japan1-4 =>>adobe-japan1-4
502                          =adobe-japan1-5 =>>>adobe-japan1-5 =>>adobe-japan1-5
503                          =adobe-japan1-6 =>>>adobe-japan1-6 =>>adobe-japan1-6
504                          =+>adobe-japan1
505                          =cbeta =>>cbeta
506                          =zinbun-oracle =>zinbun-oracle))
507             ;; (string-match "^=adobe-" (symbol-name name))
508             )
509         "(%-18s . %05d)\t; %c")
510        ((memq name '(=hanyo-denshi/ks =>>hanyo-denshi/ks mojikyo))
511         "(%-18s . %06d)\t; %c")
512        ((>= (charset-dimension name) 2)
513         "(%-18s . #x%04X)\t; %c")
514        (t
515         "(%-18s . #x%02X)\t; %c"))
516       name
517       (if (= (charset-iso-graphic-plane name) 1)
518           (logior value
519                   (cond ((= (charset-dimension name) 1)
520                          #x80)
521                         ((= (charset-dimension name) 2)
522                          #x8080)
523                         ((= (charset-dimension name) 3)
524                          #x808080)
525                         (t 0)))
526         value)
527       (char-db-decode-isolated-char name value)))
528     (if (and (= (charset-chars name) 94)
529              (= (charset-dimension name) 2))
530         (insert (format " [%02d-%02d]"
531                         (- (lsh value -8) 32)
532                         (- (logand value 255) 32))))
533     )
534    (t
535     (insert (format "(%-18s . %s)" name value))
536     ))
537   (insert line-breaking))
538
539 (defun char-db-insert-relation-feature (char name value line-breaking
540                                              ccss readable)
541   (insert (format "(%-18s%s " name line-breaking))
542   (let ((lbs (concat "\n" (make-string (current-column) ?\ )))
543         separator cell sources required-features
544         ret)
545     (while (consp value)
546       (setq cell (car value))
547       (if (integerp cell)
548           (setq cell (decode-char '=ucs cell)))
549       (cond
550        ((eq name '->subsumptive)
551         (when (or (not (some (lambda (atr)
552                                (get-char-attribute cell atr))
553                              char-db-ignored-attributes))
554                   (some (lambda (ccs)
555                           (encode-char cell ccs 'defined-only))
556                         ccss))
557           (if separator
558               (insert lbs))
559           (let ((char-db-ignored-attributes
560                  (cons '<-subsumptive
561                        char-db-ignored-attributes)))
562             (insert-char-attributes cell readable nil nil 'for-sub-node))
563           (setq separator lbs))
564         )
565        ((characterp cell)
566         (setq sources
567               (get-char-attribute
568                char (intern (format "%s*sources" name))))
569         (setq required-features nil)
570         (dolist (source sources)
571           (cond
572            ((memq source '(JP
573                            JP/Jouyou
574                            shinjigen shinjigen@1ed shinjigen@rev))
575             (setq required-features
576                   (union required-features
577                          '(=jis-x0208
578                            =jis-x0208@1990
579                            =jis-x0213-1@2000
580                            =jis-x0213-1@2004
581                            =jis-x0213-2
582                            =jis-x0212
583                            =jis-x0208@1983
584                            =jis-x0208@1978
585                            =shinjigen))))
586            ((eq source 'CN)
587             (setq required-features
588                   (union required-features
589                          '(=gb2312
590                            =gb12345
591                            =iso-ir165)))))
592           (cond
593            ((find-charset (setq ret (intern (format "=%s" source))))
594             (setq required-features
595                   (cons ret required-features)))
596            (t (setq required-features
597                     (cons source required-features)))))
598         (cond ((string-match "@JP" (symbol-name name))
599                (setq required-features
600                      (union required-features
601                             '(=jis-x0208
602                               =jis-x0208@1990
603                               =jis-x0213-1-2000
604                               =jis-x0213-2-2000
605                               =jis-x0212
606                               =jis-x0208@1983
607                               =jis-x0208@1978))))
608               ((string-match "@CN" (symbol-name name))
609                (setq required-features
610                      (union required-features
611                             '(=gb2312
612                               =gb12345
613                               =iso-ir165)))))
614         (if separator
615             (insert lbs))
616         (if readable
617             (insert (format "%S" cell))
618           (char-db-insert-char-spec cell readable
619                                     nil
620                                     required-features))
621         (setq separator lbs))
622        ((consp cell)
623         (if separator
624             (insert lbs))
625         (if (consp (car cell))
626             (char-db-insert-char-spec cell readable)
627           (char-db-insert-char-reference cell readable))
628         (setq separator lbs))
629        (t
630         (if separator
631             (insert separator))
632         (insert (prin1-to-string cell))
633         (setq separator " ")))
634       (setq value (cdr value)))
635     (insert ")")
636     (insert line-breaking)))
637
638 (defun insert-char-attributes (char &optional readable attributes column
639                                     for-sub-node)
640   (unless column
641     (setq column (current-column)))
642   (let (name value ; has-long-ccs-name
643         rest
644         radical strokes
645         (line-breaking
646          (concat "\n" (make-string (1+ column) ?\ )))
647         lbs cell separator ret
648         key al cal
649         dest-ccss ; sources required-features
650         ccss)
651     (let (atr-d)
652       (setq attributes
653             (sort (if attributes
654                       (if (consp attributes)
655                           (progn
656                             (dolist (name attributes)
657                               (unless (memq name char-db-ignored-attributes)
658                                 (if (find-charset name)
659                                     (push name ccss))
660                                 (push name atr-d)))
661                             atr-d))
662                     (dolist (name (char-attribute-list))
663                       (unless (memq name char-db-ignored-attributes)
664                         (if (find-charset name)
665                             (push name ccss))
666                         (push name atr-d)))
667                     atr-d)
668                   #'char-attribute-name<)))
669     (insert "(")
670     (when (memq '<-subsumptive attributes)
671       (when (or readable (not for-sub-node))
672         (when (setq value (get-char-attribute char '<-subsumptive))
673           (char-db-insert-relation-feature char '<-subsumptive value
674                                            line-breaking
675                                            ccss readable)))
676       (setq attributes (delq '<-subsumptive attributes)))
677     (when (and (memq '<-denotational attributes)
678                (setq value (get-char-attribute char '<-denotational)))
679       (char-db-insert-relation-feature char '<-denotational value
680                                        line-breaking
681                                        ccss readable)
682       (setq attributes (delq '<-denotational attributes)))
683     (when (and (memq 'name attributes)
684                (setq value (get-char-attribute char 'name)))
685       (insert (format
686                (if (> (+ (current-column) (length value)) 48)
687                    "(name . %S)%s"
688                  "(name               . %S)%s")
689                value line-breaking))
690       (setq attributes (delq 'name attributes))
691       )
692     (when (and (memq 'name* attributes)
693                (setq value (get-char-attribute char 'name*)))
694       (insert (format
695                (if (> (+ (current-column) (length value)) 48)
696                    "(name* . %S)%s"
697                  "(name*              . %S)%s")
698                value line-breaking))
699       (setq attributes (delq 'name* attributes))
700       )
701     (when (and (memq 'script attributes)
702                (setq value (get-char-attribute char 'script)))
703       (insert (format "(script\t\t%s)%s"
704                       (mapconcat (function prin1-to-string)
705                                  value " ")
706                       line-breaking))
707       (setq attributes (delq 'script attributes))
708       )
709     (dolist (name '(=>ucs =>ucs*))
710       (when (and (memq name attributes)
711                  (setq value (get-char-attribute char name)))
712         (insert (format "(%-18s . #x%04X)\t; %c%s"
713                         name value (decode-char '=ucs value)
714                         line-breaking))
715         (setq attributes (delq name attributes))))
716     (dolist (name '(=>ucs@gb =>ucs@big5))
717       (when (and (memq name attributes)
718                  (setq value (get-char-attribute char name)))
719         (insert (format "(%-18s . #x%04X)\t; %c%s"
720                         name value
721                         (decode-char (intern
722                                       (concat "="
723                                               (substring
724                                                (symbol-name name) 2)))
725                                      value)
726                         line-breaking))
727         (setq attributes (delq name attributes))
728         ))
729     ;; (dolist (name '(=>daikanwa))
730     ;;   (when (and (memq name attributes)
731     ;;              (setq value (get-char-attribute char name)))
732     ;;     (insert
733     ;;      (if (integerp value)
734     ;;          (format "(%-18s . %05d)\t; %c%s"
735     ;;                  name value (decode-char '=daikanwa value)
736     ;;                  line-breaking)
737     ;;        (format "(%-18s %s)\t; %c%s"
738     ;;                name
739     ;;                (mapconcat (function prin1-to-string)
740     ;;                           value " ")
741     ;;                (char-representative-of-daikanwa char)
742     ;;                line-breaking)))
743     ;;     (setq attributes (delq name attributes))))
744     (when (and (memq 'general-category attributes)
745                (setq value (get-char-attribute char 'general-category)))
746       (insert (format
747                "(general-category\t%s) ; %s%s"
748                (mapconcat (lambda (cell)
749                             (format "%S" cell))
750                           value " ")
751                (cond ((rassoc value unidata-normative-category-alist)
752                       "Normative Category")
753                      ((rassoc value unidata-informative-category-alist)
754                       "Informative Category")
755                      (t
756                       "Unknown Category"))
757                line-breaking))
758       (setq attributes (delq 'general-category attributes))
759       )
760     (when (and (memq 'bidi-category attributes)
761                (setq value (get-char-attribute char 'bidi-category)))
762       (insert (format "(bidi-category\t. %S)%s"
763                       value
764                       line-breaking))
765       (setq attributes (delq 'bidi-category attributes))
766       )
767     (unless (or (not (memq 'mirrored attributes))
768                 (eq (setq value (get-char-attribute char 'mirrored 'empty))
769                     'empty))
770       (insert (format "(mirrored\t\t. %S)%s"
771                       value
772                       line-breaking))
773       (setq attributes (delq 'mirrored attributes))
774       )
775     (cond
776      ((and (memq 'decimal-digit-value attributes)
777            (setq value (get-char-attribute char 'decimal-digit-value)))
778       (insert (format "(decimal-digit-value . %S)%s"
779                       value
780                       line-breaking))
781       (setq attributes (delq 'decimal-digit-value attributes))
782       (when (and (memq 'digit-value attributes)
783                  (setq value (get-char-attribute char 'digit-value)))
784         (insert (format "(digit-value\t . %S)%s"
785                         value
786                         line-breaking))
787         (setq attributes (delq 'digit-value attributes))
788         )
789       (when (and (memq 'numeric-value attributes)
790                  (setq value (get-char-attribute char 'numeric-value)))
791         (insert (format "(numeric-value\t . %S)%s"
792                         value
793                         line-breaking))
794         (setq attributes (delq 'numeric-value attributes))
795         )
796       )
797      (t
798       (when (and (memq 'digit-value attributes)
799                  (setq value (get-char-attribute char 'digit-value)))
800         (insert (format "(digit-value\t. %S)%s"
801                         value
802                         line-breaking))
803         (setq attributes (delq 'digit-value attributes))
804         )
805       (when (and (memq 'numeric-value attributes)
806                  (setq value (get-char-attribute char 'numeric-value)))
807         (insert (format "(numeric-value\t. %S)%s"
808                         value
809                         line-breaking))
810         (setq attributes (delq 'numeric-value attributes))
811         )))
812     (when (and (memq 'iso-10646-comment attributes)
813                (setq value (get-char-attribute char 'iso-10646-comment)))
814       (insert (format "(iso-10646-comment\t. %S)%s"
815                       value
816                       line-breaking))
817       (setq attributes (delq 'iso-10646-comment attributes))
818       )
819     (when (and (memq 'morohashi-daikanwa attributes)
820                (setq value (get-char-attribute char 'morohashi-daikanwa)))
821       (insert (format "(morohashi-daikanwa\t%s)%s"
822                       (mapconcat (function prin1-to-string) value " ")
823                       line-breaking))
824       (setq attributes (delq 'morohashi-daikanwa attributes))
825       )
826     (setq radical nil
827           strokes nil)
828     (when (and (memq 'ideographic-radical attributes)
829                (setq value (get-char-attribute char 'ideographic-radical)))
830       (setq radical value)
831       (insert (format "(ideographic-radical . %S)\t; %c%s"
832                       radical
833                       (ideographic-radical radical)
834                       line-breaking))
835       (setq attributes (delq 'ideographic-radical attributes))
836       )
837     (when (and (memq 'shuowen-radical attributes)
838                (setq value (get-char-attribute char 'shuowen-radical)))
839       (insert (format "(shuowen-radical\t. %S)\t; %c%s"
840                       value
841                       (shuowen-radical value)
842                       line-breaking))
843       (setq attributes (delq 'shuowen-radical attributes))
844       )
845     (let (key)
846       (dolist (domain
847                (append
848                 char-db-feature-domains
849                 (let (dest domain)
850                   (dolist (feature (char-attribute-list))
851                     (setq feature (symbol-name feature))
852                     (when (string-match
853                            "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
854                            feature)
855                       (setq domain (intern (match-string 2 feature)))
856                      (unless (memq domain dest)
857                        (setq dest (cons domain dest)))))
858                   (sort dest #'string<))))
859         (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
860         (when (and (memq key attributes)
861                    (setq value (get-char-attribute char key)))
862           (setq radical value)
863           (insert (format "(%s . %S)\t; %c%s"
864                           key
865                           radical
866                           (ideographic-radical radical)
867                           line-breaking))
868           (setq attributes (delq key attributes))
869           )
870         (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
871         (when (and (memq key attributes)
872                    (setq value (get-char-attribute char key)))
873           (setq strokes value)
874           (insert (format "(%s . %S)%s"
875                           key
876                           strokes
877                           line-breaking))
878           (setq attributes (delq key attributes))
879           )
880         (setq key (intern (format "%s@%s" 'total-strokes domain)))
881         (when (and (memq key attributes)
882                    (setq value (get-char-attribute char key)))
883           (insert (format "(%s       . %S)%s"
884                           key
885                           value
886                           line-breaking))
887           (setq attributes (delq key attributes))
888           )
889         (dolist (feature '(ideographic-radical
890                            ideographic-strokes
891                            total-strokes))
892           (setq key (intern (format "%s@%s*sources" feature domain)))
893           (when (and (memq key attributes)
894                      (setq value (get-char-attribute char key)))
895             (insert (format "(%s%s" key line-breaking))
896             (dolist (cell value)
897               (insert (format " %s" cell)))
898             (insert ")")
899             (insert line-breaking)
900             (setq attributes (delq key attributes))
901             ))
902         ))
903     (when (and (memq 'ideographic-strokes attributes)
904                (setq value (get-char-attribute char 'ideographic-strokes)))
905       (setq strokes value)
906       (insert (format "(ideographic-strokes . %S)%s"
907                       strokes
908                       line-breaking))
909       (setq attributes (delq 'ideographic-strokes attributes))
910       )
911     (when (and (memq 'kangxi-radical attributes)
912                (setq value (get-char-attribute char 'kangxi-radical)))
913       (unless (eq value radical)
914         (insert (format "(kangxi-radical\t . %S)\t; %c%s"
915                         value
916                         (ideographic-radical value)
917                         line-breaking))
918         (or radical
919             (setq radical value)))
920       (setq attributes (delq 'kangxi-radical attributes))
921       )
922     (when (and (memq 'kangxi-strokes attributes)
923                (setq value (get-char-attribute char 'kangxi-strokes)))
924       (unless (eq value strokes)
925         (insert (format "(kangxi-strokes\t . %S)%s"
926                         value
927                         line-breaking))
928         (or strokes
929             (setq strokes value)))
930       (setq attributes (delq 'kangxi-strokes attributes))
931       )
932     (when (and (memq 'japanese-radical attributes)
933                (setq value (get-char-attribute char 'japanese-radical)))
934       (unless (eq value radical)
935         (insert (format "(japanese-radical\t . %S)\t; %c%s"
936                         value
937                         (ideographic-radical value)
938                         line-breaking))
939         (or radical
940             (setq radical value)))
941       (setq attributes (delq 'japanese-radical attributes))
942       )
943     (when (and (memq 'japanese-strokes attributes)
944                (setq value (get-char-attribute char 'japanese-strokes)))
945       (unless (eq value strokes)
946         (insert (format "(japanese-strokes\t . %S)%s"
947                         value
948                         line-breaking))
949         (or strokes
950             (setq strokes value)))
951       (setq attributes (delq 'japanese-strokes attributes))
952       )
953     (when (and (memq 'cns-radical attributes)
954                (setq value (get-char-attribute char 'cns-radical)))
955       (insert (format "(cns-radical\t . %S)\t; %c%s"
956                       value
957                       (ideographic-radical value)
958                       line-breaking))
959       (setq attributes (delq 'cns-radical attributes))
960       )
961     (when (and (memq 'cns-strokes attributes)
962                (setq value (get-char-attribute char 'cns-strokes)))
963       (unless (eq value strokes)
964         (insert (format "(cns-strokes\t . %S)%s"
965                         value
966                         line-breaking))
967         (or strokes
968             (setq strokes value)))
969       (setq attributes (delq 'cns-strokes attributes))
970       )
971     (when (and (memq 'shinjigen-1-radical attributes)
972                (setq value (get-char-attribute char 'shinjigen-1-radical)))
973       (unless (eq value radical)
974         (insert (format "(shinjigen-1-radical . %S)\t; %c%s"
975                         value
976                         (ideographic-radical value)
977                         line-breaking))
978         (or radical
979             (setq radical value)))
980       (setq attributes (delq 'shinjigen-1-radical attributes))
981       )
982     (when (and (memq 'ideographic- attributes)
983                (setq value (get-char-attribute char 'ideographic-)))
984       (insert "(ideographic-       ")
985       (setq lbs (concat "\n" (make-string (current-column) ?\ ))
986             separator nil)
987       (while (consp value)
988         (setq cell (car value))
989         (if (integerp cell)
990             (setq cell (decode-char '=ucs cell)))
991         (cond ((characterp cell)
992                (if separator
993                    (insert lbs))
994                (if readable
995                    (insert (format "%S" cell))
996                  (char-db-insert-char-spec cell readable))
997                (setq separator lbs))
998               ((consp cell)
999                (if separator
1000                    (insert lbs))
1001                (if (consp (car cell))
1002                    (char-db-insert-char-spec cell readable)
1003                  (char-db-insert-char-reference cell readable))
1004                (setq separator lbs))
1005               (t
1006                (if separator
1007                    (insert separator))
1008                (insert (prin1-to-string cell))
1009                (setq separator " ")))
1010         (setq value (cdr value)))
1011       (insert ")")
1012       (insert line-breaking)
1013       (setq attributes (delq 'ideographic- attributes)))
1014     (when (and (memq 'total-strokes attributes)
1015                (setq value (get-char-attribute char 'total-strokes)))
1016       (insert (format "(total-strokes       . %S)%s"
1017                       value
1018                       line-breaking))
1019       (setq attributes (delq 'total-strokes attributes))
1020       )
1021     (when (and (memq '->ideograph attributes)
1022                (setq value (get-char-attribute char '->ideograph)))
1023       (insert (format "(->ideograph\t%s)%s"
1024                       (mapconcat (lambda (code)
1025                                    (cond ((symbolp code)
1026                                           (symbol-name code))
1027                                          ((integerp code)
1028                                           (format "#x%04X" code))
1029                                          (t
1030                                           (format "%s %S"
1031                                                   line-breaking code))))
1032                                  value " ")
1033                       line-breaking))
1034       (setq attributes (delq '->ideograph attributes))
1035       )
1036     ;; (when (and (memq '->decomposition attributes)
1037     ;;            (setq value (get-char-attribute char '->decomposition)))
1038     ;;   (insert (format "(->decomposition\t%s)%s"
1039     ;;                   (mapconcat (lambda (code)
1040     ;;                                (cond ((symbolp code)
1041     ;;                                       (symbol-name code))
1042     ;;                                      ((characterp code)
1043     ;;                                       (if readable
1044     ;;                                           (format "%S" code)
1045     ;;                                         (format "#x%04X"
1046     ;;                                                 (char-int code))
1047     ;;                                         ))
1048     ;;                                      ((integerp code)
1049     ;;                                       (format "#x%04X" code))
1050     ;;                                      (t
1051     ;;                                       (format "%s%S" line-breaking code))))
1052     ;;                              value " ")
1053     ;;                   line-breaking))
1054     ;;   (setq attributes (delq '->decomposition attributes))
1055     ;;   )
1056     (if (equal (get-char-attribute char '->titlecase)
1057                (get-char-attribute char '->uppercase))
1058         (setq attributes (delq '->titlecase attributes)))
1059     (when (and (memq '->mojikyo attributes)
1060                (setq value (get-char-attribute char '->mojikyo)))
1061       (insert (format "(->mojikyo\t\t. %06d)\t; %c%s"
1062                       value (decode-char 'mojikyo value)
1063                       line-breaking))
1064       (setq attributes (delq '->mojikyo attributes))
1065       )
1066     (when (and (memq 'hanyu-dazidian-vol attributes)
1067                (setq value (get-char-attribute char 'hanyu-dazidian-vol)))
1068       (insert (format "(hanyu-dazidian-vol  . %d)%s"
1069                       value line-breaking))
1070       (setq attributes (delq 'hanyu-dazidian-vol attributes))
1071       )
1072     (when (and (memq 'hanyu-dazidian-page attributes)
1073                (setq value (get-char-attribute char 'hanyu-dazidian-page)))
1074       (insert (format "(hanyu-dazidian-page . %d)%s"
1075                       value line-breaking))
1076       (setq attributes (delq 'hanyu-dazidian-page attributes))
1077       )
1078     (when (and (memq 'hanyu-dazidian-char attributes)
1079                (setq value (get-char-attribute char 'hanyu-dazidian-char)))
1080       (insert (format "(hanyu-dazidian-char . %d)%s"
1081                       value line-breaking))
1082       (setq attributes (delq 'hanyu-dazidian-char attributes))
1083       )
1084     (unless readable
1085       (dolist (ignored '(composition
1086                          ->denotational <-subsumptive ->ucs-unified
1087                          ->ideographic-component-forms))
1088         (setq attributes (delq ignored attributes))))
1089     (while attributes
1090       (setq name (car attributes))
1091       (unless (eq (setq value (get-char-attribute char name 'value-is-empty))
1092                   'value-is-empty)
1093         (cond ((setq ret (find-charset name))
1094                (setq name (charset-name ret))
1095                (when (not (memq name dest-ccss))
1096                  (setq dest-ccss (cons name dest-ccss))
1097                  (char-db-insert-ccs-feature name value line-breaking))
1098                )
1099               ((string-match "^=>ucs@" (symbol-name name))
1100                (insert (format "(%-18s . #x%04X)\t; %c%s"
1101                                name value (decode-char '=ucs value)
1102                                line-breaking))
1103                )
1104               ((eq name 'jisx0208-1978/4X)
1105                (insert (format "(%-18s . #x%04X)%s"
1106                                name value
1107                                line-breaking))
1108                )
1109               ((and
1110                 (not readable)
1111                 (not (eq name '->subsumptive))
1112                 (not (eq name '->uppercase))
1113                 (not (eq name '->lowercase))
1114                 (not (eq name '->titlecase))
1115                 (not (eq name '->canonical))
1116                 (not (eq name '->Bopomofo))
1117                 (not (eq name '->mistakable))
1118                 (not (eq name '->ideographic-variants))
1119                 (null (get-char-attribute
1120                        char (intern (format "%s*sources" name))))
1121                 (not (string-match "\\*sources$" (symbol-name name)))
1122                 (null (get-char-attribute
1123                        char (intern (format "%s*note" name))))
1124                 (not (string-match "\\*note$" (symbol-name name)))
1125                 (or (eq name '<-identical)
1126                     (eq name '<-uppercase)
1127                     (eq name '<-lowercase)
1128                     (eq name '<-titlecase)
1129                     (eq name '<-canonical)
1130                     (eq name '<-ideographic-variants)
1131                     ;; (eq name '<-synonyms)
1132                     (string-match "^<-synonyms" (symbol-name name))
1133                     (eq name '<-mistakable)
1134                     (when (string-match "^->" (symbol-name name))
1135                       (cond
1136                        ((string-match "^->fullwidth" (symbol-name name))
1137                         (not (and (consp value)
1138                                   (characterp (car value))
1139                                   (encode-char
1140                                    (car value) '=ucs 'defined-only)))
1141                         )
1142                        (t)))
1143                     ))
1144                )
1145               ((or (eq name 'ideographic-structure)
1146                    (eq name 'ideographic-combination)
1147                    (eq name 'ideographic-)
1148                    (eq name '=decomposition)
1149                    (char-feature-base-name= '=decomposition name)
1150                    (char-feature-base-name= '=>decomposition name)
1151                    ;; (string-match "^=>*decomposition\\(@[^*]+\\)?$"
1152                    ;;               (symbol-name name))
1153                    (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name name))
1154                    (string-match "^\\(->\\|<-\\)[^*]*\\*sources$"
1155                                  (symbol-name name))
1156                    )
1157                (char-db-insert-relation-feature char name value
1158                                                 line-breaking
1159                                                 ccss readable))
1160               ((memq name '(ideograph=
1161                             original-ideograph-of
1162                             ancient-ideograph-of
1163                             vulgar-ideograph-of
1164                             wrong-ideograph-of
1165                             ;; simplified-ideograph-of
1166                             ideographic-variants
1167                             ;; ideographic-different-form-of
1168                             ))
1169                (insert (format "(%-18s%s " name line-breaking))
1170                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1171                      separator nil)
1172                (while (consp value)
1173                  (setq cell (car value))
1174                  (if (and (consp cell)
1175                           (consp (car cell)))
1176                      (progn
1177                        (if separator
1178                            (insert lbs))
1179                        (char-db-insert-alist cell readable)
1180                        (setq separator lbs))
1181                    (if separator
1182                        (insert separator))
1183                    (insert (prin1-to-string cell))
1184                    (setq separator " "))
1185                  (setq value (cdr value)))
1186                (insert ")")
1187                (insert line-breaking))
1188               ((consp value)
1189                (insert (format "(%-18s " name))
1190                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1191                      separator nil)
1192                (while (consp value)
1193                  (setq cell (car value))
1194                  (if (and (consp cell)
1195                           (consp (car cell))
1196                           (setq ret (condition-case nil
1197                                         (find-char cell)
1198                                       (error nil))))
1199                      (progn
1200                        (setq rest cell
1201                              al nil
1202                              cal nil)
1203                        (while rest
1204                          (setq key (car (car rest)))
1205                          (if (find-charset key)
1206                              (setq cal (cons key cal))
1207                            (setq al (cons key al)))
1208                          (setq rest (cdr rest)))
1209                        (if separator
1210                            (insert lbs))
1211                        (insert-char-attributes ret
1212                                                readable
1213                                                al ; cal
1214                                                nil 'for-sub-node)
1215                        (setq separator lbs))
1216                    (setq ret (prin1-to-string cell))
1217                    (if separator
1218                        (if (< (+ (current-column)
1219                                  (length ret)
1220                                  (length separator))
1221                               76)
1222                            (insert separator)
1223                          (insert lbs)))
1224                    (insert ret)
1225                    (setq separator " "))
1226                  (setq value (cdr value)))
1227                (insert ")")
1228                (insert line-breaking))
1229               (t
1230                (insert (format "(%-18s" name))
1231                (setq ret (prin1-to-string value))
1232                (unless (< (+ (current-column)
1233                              (length ret)
1234                              3)
1235                           76)
1236                  (insert line-breaking))
1237                (insert " . " ret ")" line-breaking)
1238                ;; (insert (format "(%-18s . %S)%s"
1239                ;;                 name value
1240                ;;                 line-breaking))
1241                )
1242               ))
1243       (setq attributes (cdr attributes)))
1244     (insert ")")))
1245
1246 (defun insert-char-data (char &optional readable
1247                               attributes)
1248   (save-restriction
1249     (narrow-to-region (point)(point))
1250     (insert "(define-char
1251   '")
1252     (insert-char-attributes char readable attributes)
1253     (insert ")\n")
1254     (goto-char (point-min))
1255     (while (re-search-forward "[ \t]+$" nil t)
1256       (replace-match ""))
1257     ;; from tabify.
1258     (goto-char (point-min))
1259     (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
1260       (let ((column (current-column))
1261             (indent-tabs-mode t))
1262         (delete-region (match-beginning 0) (point))
1263         (indent-to column)))
1264     (goto-char (point-max))
1265     ;; (tabify (point-min)(point-max))
1266     ))
1267
1268 (defun insert-char-data-with-variant (char &optional printable
1269                                            no-ucs-unified
1270                                            script excluded-script)
1271   (insert-char-data char printable)
1272   (let ((variants (char-variants char))
1273         rest
1274         variant vs ret)
1275     (setq variants (sort variants #'<))
1276     (setq rest variants)
1277     (setq variants (cons char variants))
1278     (while rest
1279       (setq variant (car rest))
1280       (unless (get-char-attribute variant '<-subsumptive)
1281         (if (and (or (null script)
1282                      (null (setq vs (get-char-attribute variant 'script)))
1283                      (memq script vs))
1284                  (or (null excluded-script)
1285                      (null (setq vs (get-char-attribute variant 'script)))
1286                      (not (memq excluded-script vs))))
1287             (unless (and no-ucs-unified (get-char-attribute variant '=ucs))
1288               (insert-char-data variant printable)
1289               (if (setq ret (char-variants variant))
1290                   (while ret
1291                     (or (memq (car ret) variants)
1292                         ;; (get-char-attribute (car ret) '<-subsumptive)
1293                         (setq rest (nconc rest (list (car ret)))))
1294                     (setq ret (cdr ret)))))))
1295       (setq rest (cdr rest)))))
1296
1297 (defun insert-char-range-data (min max &optional script excluded-script)
1298   (let ((code min)
1299         char)
1300     (while (<= code max)
1301       (setq char (decode-char '=ucs code))
1302       (if (encode-char char '=ucs 'defined-only)
1303           (insert-char-data-with-variant char nil 'no-ucs-unified
1304                                          script excluded-script))
1305       (setq code (1+ code)))))
1306
1307 (defun write-char-range-data-to-file (min max file
1308                                           &optional script excluded-script)
1309   (let ((coding-system-for-write char-db-file-coding-system))
1310     (with-temp-buffer
1311       (insert (format ";; -*- coding: %s -*-\n"
1312                       char-db-file-coding-system))
1313       (insert-char-range-data min max script excluded-script)
1314       (write-region (point-min)(point-max) file))))
1315
1316 (defvar what-character-original-window-configuration)
1317
1318 ;;;###autoload
1319 (defun what-char-definition (char)
1320   (interactive (list (char-after)))
1321   (let ((buf (get-buffer-create "*Character Description*"))
1322         (the-buf (current-buffer))
1323         (win-conf (current-window-configuration)))
1324     (pop-to-buffer buf)
1325     (make-local-variable 'what-character-original-window-configuration)
1326     (setq what-character-original-window-configuration win-conf)
1327     (setq buffer-read-only nil)
1328     (erase-buffer)
1329     (condition-case err
1330         (progn
1331           (insert-char-data-with-variant char 'printable)
1332           (unless (char-attribute-alist char)
1333             (insert (format ";; = %c\n"
1334                             (let* ((rest (split-char char))
1335                                    (ccs (pop rest))
1336                                    (code (pop rest)))
1337                               (while rest
1338                                 (setq code (logior (lsh code 8)
1339                                                    (pop rest))))
1340                               (decode-char ccs code)))))
1341           ;; (char-db-update-comment)
1342           (set-buffer-modified-p nil)
1343           (view-mode the-buf (lambda (buf)
1344                                (set-window-configuration
1345                                 what-character-original-window-configuration)
1346                                ))
1347           (goto-char (point-min)))
1348       (error (progn
1349                (set-window-configuration
1350                 what-character-original-window-configuration)
1351                (signal (car err) (cdr err)))))))
1352
1353
1354 ;;; @ end
1355 ;;;
1356
1357 (provide 'char-db-util)
1358
1359 ;;; char-db-util.el ends here