(char-db-feature-domains): Moved to chise-subr.el.
[chise/xemacs-chise.git.1] / lisp / utf-2000 / char-db-util.el
1 ;;; char-db-util.el --- Character Database utility -*- coding: utf-8-er; -*-
2
3 ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
4 ;;   2007, 2008, 2009, 2010 MORIOKA Tomohiko.
5
6 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
7 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
8
9 ;; This file is part of XEmacs CHISE.
10
11 ;; XEmacs CHISE is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
15
16 ;; XEmacs CHISE is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
23 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Code:
27
28 (require 'chise-subr)
29 (require 'ideograph-subr)
30
31 (defconst unidata-normative-category-alist
32   '(("Lu" letter        uppercase)
33     ("Ll" letter        lowercase)
34     ("Lt" letter        titlecase)
35     ("Mn" mark          non-spacing)
36     ("Mc" mark          spacing-combining)
37     ("Me" mark          enclosing)
38     ("Nd" number        decimal-digit)
39     ("Nl" number        letter)
40     ("No" number        other)
41     ("Zs" separator     space)
42     ("Zl" separator     line)
43     ("Zp" separator     paragraph)
44     ("Cc" other         control)
45     ("Cf" other         format)
46     ("Cs" other         surrogate)
47     ("Co" other         private-use)
48     ("Cn" other         not-assigned)))
49
50 (defconst unidata-informative-category-alist
51   '(("Lm" letter        modifier)
52     ("Lo" letter        other)
53     ("Pc" punctuation   connector)
54     ("Pd" punctuation   dash)
55     ("Ps" punctuation   open)
56     ("Pe" punctuation   close)
57     ("Pi" punctuation   initial-quote)
58     ("Pf" punctuation   final-quote)
59     ("Po" punctuation   other)
60     ("Sm" symbol        math)
61     ("Sc" symbol        currency)
62     ("Sk" symbol        modifier)
63     ("So" symbol        other)
64     ))
65
66 (defconst shuowen-radicals
67   [?一 ?上 ?示 ?三 ?王 ?玉 ?玨 ?气 ?士 ?丨 ; 010
68    ?屮 ?艸 ?蓐 ?茻 ?小 ?八 ?釆 ?半 ?牛 ?犛 ; 020
69    ?告 ?口 ?凵 ?吅 ?哭 ?走 ?止 ?癶 ?步 ?此 ; 030
70    ?正 ?是 ?辵 ?彳 ?廴 ?㢟 ?行 ?齒 ?牙 ?足 ; 040
71    ?疋 ?品 ?龠 ?冊 ?㗊 ?舌 ?干 ?谷 ?只 ?㕯 ; 050
72    ?句 ?丩 ?古 ?十 ?卅 ?言 ?誩 ?音 ?䇂 ?丵 ; 060
73    ?菐 ?𠬞 ?廾 ?共 ?異 ?舁 ?𦥑 ?䢅 ?爨 ?革 ; 070
74    ?鬲 ?䰜 ?爪 ?𠃨 ?鬥 ?又 ?𠂇 ?㕜 ?支 ?𦘒 ; 080
75    ?聿 ?畫 ?隶 ?臤 ?臣 ?殳 ?殺 ?𠘧 ?寸 ?皮 ; 090
76    ?㼱 ?攴 ?敎 ?卜 ?用 ?爻 ?㸚 ?𥄎 ?目 ?䀠 ; 100
77    ?眉 ?盾 ?自 ?白 ?鼻 ?皕 ?習 ?羽 ?隹 ?奞 ; 110
78    ?萑 ?𦫳 ?苜 ?羊 ?羴 ?瞿 ?雔 ?雥 ?鳥 ?烏 ; 120
79    ?𠦒 ?冓 ?幺 ?𢆶 ?叀 ?玄 ?予 ?放 ?𠬪 ?𣦼 ; 130
80    ?歺 ?死 ?冎 ?骨 ?肉 ?筋 ?刀 ?刃 ?㓞 ?丰 ; 140
81    ?耒 ?𧢲 ?竹 ?箕 ?丌 ?左 ?工 ?㠭 ?巫 ?甘 ; 150
82    ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?旨 ?喜 ?壴 ; 160
83    ?鼓 ?豈 ?豆 ?豊 ?豐 ?䖒 ?虍 ?虎 ?虤 ?皿 ; 170
84    ?𠙴 ?去 ?血 ?丶 ?丹 ?青 ?井 ?皀 ?鬯 ?食 ; 180
85    ?亼 ?會 ?倉 ?入 ?缶 ?矢 ?高 ?冂 ?𩫏 ?京 ; 190
86    ?亯 ?𣆪 ?畗 ?㐭 ?嗇 ?來 ?麥 ?夊 ?舛 ?䑞 ; 200
87    ?韋 ?弟 ?夂 ?久 ?桀 ?木 ?東 ?林 ?才 ?叒 ; 210
88    ?之 ?帀 ?出 ?𣎵 ?生 ?乇 ?𠂹 ?𠌶 ?華 ?𥝌 ; 220
89    ?稽 ?巢 ?桼 ?束 ?㯻 ?囗 ?員 ?貝 ?邑 ?𨛜 ; 230
90    ?日 ?旦 ?倝 ?㫃 ?冥 ?晶 ?月 ?有 ?明 ?囧 ; 240
91    ?夕 ?多 ?毌 ?𢎘 ?𣐺 ?卣 ?齊 ?朿 ?片 ?鼎 ; 250
92    ?克 ?彔 ?禾 ?秝 ?黍 ?香 ?米 ?毇 ?臼 ?凶 ; 260
93    ?𣎳 ?林 ?麻 ?尗 ?耑 ?韭 ?瓜 ?瓠 ?宀 ?宮 ; 270
94    ?呂 ?穴 ?㝱 ?𤕫 ?冖 ?𠔼 ?冃 ?㒳 ?网 ?襾 ; 280
95    ?巾 ?巿 ?帛 ?白 ?㡀 ?黹 ?人 ?𠤎 ?匕 ?从 ; 290
96    ])
97
98 (defun shuowen-radical (number)
99   (aref shuowen-radicals (1- number)))
100
101 (defvar char-db-file-coding-system 'utf-8-mcs-er)
102
103 (defvar char-db-ignored-attributes '(ideographic-products))
104
105 (defvar char-db-coded-charset-priority-list
106   '(ascii
107     control-1
108     latin-iso8859-1
109     latin-iso8859-2
110     latin-iso8859-3
111     latin-iso8859-4
112     latin-iso8859-9
113     latin-jisx0201
114     cyrillic-iso8859-5
115     greek-iso8859-7
116     thai-tis620
117     =jis-x0208
118     =jis-x0208@1978
119     =jis-x0208@1983
120     japanese-jisx0212
121     chinese-gb2312
122     =jis-x0208@1990
123     chinese-cns11643-1
124     chinese-cns11643-2
125     chinese-cns11643-3
126     chinese-cns11643-4
127     chinese-cns11643-5
128     chinese-cns11643-6
129     chinese-cns11643-7
130     =jis-x0213-1
131     =jis-x0213-1@2000
132     =jis-x0213-1@2004
133     =jis-x0213-2
134     korean-ksc5601
135     chinese-isoir165
136     katakana-jisx0201
137     hebrew-iso8859-8
138     chinese-gb12345
139     latin-viscii
140     ethiopic-ucs
141     =big5-cdp
142     =gt
143     =>>gt
144     =>gt
145     ideograph-daikanwa-2
146     ideograph-daikanwa
147     =cbeta
148     ideograph-hanziku-1
149     ideograph-hanziku-2
150     ideograph-hanziku-3
151     ideograph-hanziku-4
152     ideograph-hanziku-5
153     ideograph-hanziku-6
154     ideograph-hanziku-7
155     ideograph-hanziku-8
156     ideograph-hanziku-9
157     ideograph-hanziku-10
158     ideograph-hanziku-11
159     ideograph-hanziku-12
160     =gt-k
161     =ucs@iso
162     =ucs@unicode
163     =>>ucs@unicode
164     =>>jis-x0208
165     =>>jis-x0213-1
166     =>>jis-x0213-1@2000
167     =>>jis-x0213-1@2004
168     =>>jis-x0213-2
169     =>>jis-x0208@1978
170     =>jis-x0208
171     =>jis-x0208@1997
172     =>jis-x0213-1
173     =>jis-x0213-1@2000
174     =>jis-x0213-1@2004
175     =>jis-x0213-2
176     =big5
177     =big5-eten
178     =zinbun-oracle
179     =>zinbun-oracle
180     =ruimoku-v6
181     =jef-china3
182     =shinjigen))
183
184
185 ;;; @ char-db formatters
186 ;;;
187
188 (defun char-db-make-char-spec (char)
189   (let (ret char-spec)
190     (cond ((characterp char)
191            (cond ((and (setq ret (encode-char char '=ucs 'defined-only))
192                        (not (and (<= #xE000 ret)(<= ret #xF8FF))))
193                   (setq char-spec (list (cons '=ucs ret)))
194                   (cond ((setq ret (get-char-attribute char 'name))
195                          (setq char-spec (cons (cons 'name ret) char-spec))
196                          )
197                         ((setq ret (get-char-attribute char 'name*))
198                          (setq char-spec (cons (cons 'name* ret) char-spec))
199                          ))
200                   )
201                  ((setq ret
202                         (catch 'tag
203                           (let ((rest char-db-coded-charset-priority-list)
204                                 ccs)
205                             (while rest
206                               (setq ccs (charset-name
207                                          (find-charset (car rest))))
208                               (if (setq ret
209                                         (encode-char char ccs
210                                                      'defined-only))
211                                   (throw 'tag (cons ccs ret)))
212                               (setq rest (cdr rest))))))
213                   (setq char-spec (list ret))
214                   (dolist (ccs (delq (car ret) (charset-list)))
215                     (if (and (or (charset-iso-final-char ccs)
216                                  (memq ccs
217                                        '(=daikanwa
218                                          =daikanwa@rev2
219                                          ;; =gt-k
220                                          =jis-x0208@1997
221                                          ))
222                                  (string-match "=ucs@" (symbol-name ccs)))
223                              (setq ccs (charset-name ccs))
224                              (null (assq ccs char-spec))
225                              (setq ret (encode-char char ccs 'defined-only)))
226                         (setq char-spec (cons (cons ccs ret) char-spec))))
227                   (if (null char-spec)
228                       (setq char-spec (split-char char)))
229                   (cond ((setq ret (get-char-attribute char 'name))
230                          (setq char-spec (cons (cons 'name ret) char-spec))
231                          )
232                         ((setq ret (get-char-attribute char 'name*))
233                          (setq char-spec (cons (cons 'name* ret) char-spec))
234                          ))
235                   )
236                  ((setq ret (get-char-attribute
237                              char 'ideographic-combination))
238                   (setq char-spec
239                         (cons (cons 'ideographic-combination ret)
240                               char-spec))
241                   ))
242            char-spec)
243           ((consp char)
244            char))))
245     
246 (defun char-db-insert-char-spec (char &optional readable column
247                                       required-features)
248   (unless column
249     (setq column (current-column)))
250   (let (char-spec temp-char)
251     (setq char-spec (char-db-make-char-spec char))
252     (unless (or (characterp char) ; char
253                 (condition-case nil
254                     (setq char (find-char char-spec))
255                   (error nil)))
256       ;; define temporary character
257       ;;   Current implementation is dirty.
258       (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
259                                          char-spec)))
260       (remove-char-attribute temp-char 'ideograph-daikanwa)
261       (setq char temp-char))
262     (insert-char-attributes char
263                             readable
264                             (union (mapcar #'car char-spec)
265                                    required-features))
266     (when temp-char
267       ;; undefine temporary character
268       ;;   Current implementation is dirty.
269       (setq char-spec (char-attribute-alist temp-char))
270       (while char-spec
271         (remove-char-attribute temp-char (car (car char-spec)))
272         (setq char-spec (cdr char-spec))))))
273
274 (defun char-db-insert-alist (alist &optional readable column)
275   (unless column
276     (setq column (current-column)))
277   (let ((line-breaking
278          (concat "\n" (make-string (1+ column) ?\ )))
279         name value
280         ret al ; cal
281         key
282         lbs cell rest separator)
283     (insert "(")
284     (while alist
285       (setq name (car (car alist))
286             value (cdr (car alist)))
287       (cond ((eq name 'char)
288              (insert "(char . ")
289              (if (setq ret (condition-case nil
290                                (find-char value)
291                              (error nil)))
292                  (progn
293                    (setq al nil
294                          ;; cal nil
295                          )
296                    (while value
297                      (setq key (car (car value)))
298                      ;; (if (find-charset key)
299                      ;;     (setq cal (cons key cal))
300                      (setq al (cons key al))
301                      ;; )
302                      (setq value (cdr value)))
303                    (insert-char-attributes ret
304                                            readable
305                                            (or al 'none) ; cal
306                                            ))
307                (insert (prin1-to-string value)))
308              (insert ")")
309              (insert line-breaking))
310             ((consp value)
311              (insert (format "(%-18s " name))
312              (setq lbs (concat "\n" (make-string (current-column) ?\ )))
313              (while (consp value)
314                (setq cell (car value))
315                (if (and (consp cell)
316                         (consp (car cell))
317                         (setq ret (condition-case nil
318                                       (find-char cell)
319                                     (error nil)))
320                         )
321                    (progn
322                      (setq rest cell
323                            al nil
324                            ;; cal nil
325                            )
326                      (while rest
327                        (setq key (car (car rest)))
328                        ;; (if (find-charset key)
329                        ;;     (setq cal (cons key cal))
330                        (setq al (cons key al))
331                        ;; )
332                        (setq rest (cdr rest)))
333                      (if separator
334                          (insert lbs))
335                      (insert-char-attributes ret
336                                              readable
337                                              al ; cal
338                                              )
339                      (setq separator lbs))
340                  (if separator
341                      (insert separator))
342                  (insert (prin1-to-string cell))
343                  (setq separator " "))
344                (setq value (cdr value)))
345              (insert ")")
346              (insert line-breaking))
347             (t
348              (insert (format "(%-18s . %S)%s"
349                              name value
350                              line-breaking))))
351       (setq alist (cdr alist))))
352   (insert ")"))
353
354 (defun char-db-insert-char-reference (plist &optional readable column)
355   (unless column
356     (setq column (current-column)))
357   (let ((line-breaking
358          (concat "\n" (make-string (1+ column) ?\ )))
359         (separator "")
360         name value)
361     (insert "(")
362     (while plist
363       (setq name (pop plist))
364       (setq value (pop plist))
365       (cond ((eq name :char)
366              (insert separator)
367              (insert ":char\t")
368              (cond ((numberp value)
369                     (setq value (decode-char '=ucs value)))
370                    ;; ((consp value)
371                    ;;  (setq value (or (find-char value)
372                    ;;                  value)))
373                    )
374              (char-db-insert-char-spec value readable)
375              (insert line-breaking)
376              (setq separator ""))
377             ((eq name :radical)
378              (insert (format "%s%s\t%d ; %c%s"
379                              separator
380                              name value
381                              (ideographic-radical value)
382                              line-breaking))
383              (setq separator ""))
384             (t
385              (insert (format "%s%s\t%S" separator name value))
386              (setq separator line-breaking)))
387       ))
388   (insert ")"))
389
390 (defun char-db-decode-isolated-char (ccs code-point)
391   (let (ret)
392     (setq ret
393           (cond ((eq ccs 'arabic-iso8859-6)
394                  (decode-char ccs code-point))
395                 ((and (memq ccs '(=gt-pj-1
396                                   =gt-pj-2
397                                   =gt-pj-3
398                                   =gt-pj-4
399                                   =gt-pj-5
400                                   =gt-pj-6
401                                   =gt-pj-7
402                                   =gt-pj-8
403                                   =gt-pj-9
404                                   =gt-pj-10
405                                   =gt-pj-11))
406                       (setq ret (decode-char ccs code-point))
407                       (setq ret (encode-char ret '=gt 'defined-only)))
408                  (decode-builtin-char '=gt ret))
409                 (t
410                  (decode-builtin-char ccs code-point))))
411     (cond ((and (<= 0 (char-int ret))
412                 (<= (char-int ret) #x1F))
413            (decode-char '=ucs (+ #x2400 (char-int ret))))
414           ((= (char-int ret) #x7F)
415            ?\u2421)
416           (t ret))))
417
418 (defvar char-db-convert-obsolete-format t)
419
420 (defun char-db-insert-ccs-feature (name value line-breaking)
421   (insert
422    (format
423     (cond ((memq name '(=shinjigen
424                         =shinjigen@1ed
425                         =shinjigen@rev =shinjigen/+p@rev))
426            "(%-18s .  %04d)\t; %c")
427           ((eq name '=shinjigen@1ed/24pr)
428            "(%-18s . %04d)\t; %c")
429           ((or (memq name '(=daikanwa
430                             =daikanwa@rev1 =daikanwa@rev2
431                             =gt =>>gt =>gt =gt-k =cbeta
432                             =zinbun-oracle =>zinbun-oracle))
433                (string-match "^=adobe-" (symbol-name name)))
434            "(%-18s . %05d)\t; %c")
435           ((eq name 'mojikyo)
436            "(%-18s . %06d)\t; %c")
437           ((>= (charset-dimension name) 2)
438            "(%-18s . #x%04X)\t; %c")
439           (t
440            "(%-18s . #x%02X)\t; %c"))
441     name
442     (if (= (charset-iso-graphic-plane name) 1)
443         (logior value
444                 (cond ((= (charset-dimension name) 1)
445                        #x80)
446                       ((= (charset-dimension name) 2)
447                        #x8080)
448                       ((= (charset-dimension name) 3)
449                        #x808080)
450                       (t 0)))
451       value)
452     (char-db-decode-isolated-char name value)))
453   (if (and (= (charset-chars name) 94)
454            (= (charset-dimension name) 2))
455       (insert (format " [%02d-%02d]"
456                       (- (lsh value -8) 32)
457                       (- (logand value 255) 32))))
458   (insert line-breaking))
459
460 (defun char-db-insert-relation-feature (char name value line-breaking
461                                              ccss readable)
462   (insert (format "(%-18s%s " name line-breaking))
463   (let ((lbs (concat "\n" (make-string (current-column) ?\ )))
464         separator cell sources required-features
465         ret)
466     (while (consp value)
467       (setq cell (car value))
468       (if (integerp cell)
469           (setq cell (decode-char '=ucs cell)))
470       (cond
471        ((eq name '->subsumptive)
472         (when (or (not (some (lambda (atr)
473                                (get-char-attribute cell atr))
474                              char-db-ignored-attributes))
475                   (some (lambda (ccs)
476                           (encode-char cell ccs 'defined-only))
477                         ccss))
478           (if separator
479               (insert lbs))
480           (let ((char-db-ignored-attributes
481                  (cons '<-subsumptive
482                        char-db-ignored-attributes)))
483             (insert-char-attributes cell readable))
484           (setq separator lbs))
485         )
486        ((characterp cell)
487         (setq sources
488               (get-char-attribute
489                char (intern (format "%s*sources" name))))
490         (setq required-features nil)
491         (dolist (source sources)
492           (cond
493            ((memq source '(JP
494                            JP/Jouyou
495                            shinjigen shinjigen@1ed shinjigen@rev))
496             (setq required-features
497                   (union required-features
498                          '(=jis-x0208
499                            =jis-x0208@1990
500                            =jis-x0213-1-2000
501                            =jis-x0213-2-2000
502                            =jis-x0212
503                            =jis-x0208@1983
504                            =jis-x0208@1978
505                            =shinjigen))))
506            ((eq source 'CN)
507             (setq required-features
508                   (union required-features
509                          '(=gb2312
510                            =gb12345
511                            =iso-ir165)))))
512           (cond
513            ((find-charset (setq ret (intern (format "=%s" source))))
514             (setq required-features
515                   (cons ret required-features)))
516            (t (setq required-features
517                     (cons source required-features)))))
518         (cond ((string-match "@JP" (symbol-name name))
519                (setq required-features
520                      (union required-features
521                             '(=jis-x0208
522                               =jis-x0208@1990
523                               =jis-x0213-1-2000
524                               =jis-x0213-2-2000
525                               =jis-x0212
526                               =jis-x0208@1983
527                               =jis-x0208@1978))))
528               ((string-match "@CN" (symbol-name name))
529                (setq required-features
530                      (union required-features
531                             '(=gb2312
532                               =gb12345
533                               =iso-ir165)))))
534         (if separator
535             (insert lbs))
536         (if readable
537             (insert (format "%S" cell))
538           (char-db-insert-char-spec cell readable
539                                     nil
540                                     required-features))
541         (setq separator lbs))
542        ((consp cell)
543         (if separator
544             (insert lbs))
545         (if (consp (car cell))
546             (char-db-insert-char-spec cell readable)
547           (char-db-insert-char-reference cell readable))
548         (setq separator lbs))
549        (t
550         (if separator
551             (insert separator))
552         (insert (prin1-to-string cell))
553         (setq separator " ")))
554       (setq value (cdr value)))
555     (insert ")")
556     (insert line-breaking)))
557
558 (defun insert-char-attributes (char &optional readable attributes column)
559   (unless column
560     (setq column (current-column)))
561   (let (name value ; has-long-ccs-name
562         rest
563         radical strokes
564         (line-breaking
565          (concat "\n" (make-string (1+ column) ?\ )))
566         lbs cell separator ret
567         key al cal
568         dest-ccss ; sources required-features
569         ccss)
570     (let (atr-d)
571       (setq attributes
572             (sort (if attributes
573                       (if (consp attributes)
574                           (progn
575                             (dolist (name attributes)
576                               (unless (memq name char-db-ignored-attributes)
577                                 (if (find-charset name)
578                                     (push name ccss))
579                                 (push name atr-d)))
580                             atr-d))
581                     (dolist (name (char-attribute-list))
582                       (unless (memq name char-db-ignored-attributes)
583                         (if (find-charset name)
584                             (push name ccss))
585                         (push name atr-d)))
586                     atr-d)
587                   #'char-attribute-name<)))
588     (insert "(")
589     (when (memq '<-subsumptive attributes)
590       (when readable
591         (when (setq value (get-char-attribute char '<-subsumptive))
592           (char-db-insert-relation-feature char '<-subsumptive value
593                                            line-breaking
594                                            ccss readable)))
595       (setq attributes (delq '<-subsumptive attributes)))
596     (when (and (memq '<-denotational attributes)
597                (setq value (get-char-attribute char '<-denotational)))
598       (char-db-insert-relation-feature char '<-denotational value
599                                        line-breaking
600                                        ccss readable)
601       (setq attributes (delq '<-denotational attributes)))
602     (when (and (memq 'name attributes)
603                (setq value (get-char-attribute char 'name)))
604       (insert (format
605                (if (> (+ (current-column) (length value)) 48)
606                    "(name . %S)%s"
607                  "(name               . %S)%s")
608                value line-breaking))
609       (setq attributes (delq 'name attributes))
610       )
611     (when (and (memq 'name* attributes)
612                (setq value (get-char-attribute char 'name*)))
613       (insert (format
614                (if (> (+ (current-column) (length value)) 48)
615                    "(name* . %S)%s"
616                  "(name*              . %S)%s")
617                value line-breaking))
618       (setq attributes (delq 'name* attributes))
619       )
620     (when (and (memq 'script attributes)
621                (setq value (get-char-attribute char 'script)))
622       (insert (format "(script\t\t%s)%s"
623                       (mapconcat (function prin1-to-string)
624                                  value " ")
625                       line-breaking))
626       (setq attributes (delq 'script attributes))
627       )
628     (dolist (name '(=>ucs =>ucs*))
629       (when (and (memq name attributes)
630                  (setq value (get-char-attribute char name)))
631         (insert (format "(%-18s . #x%04X)\t; %c%s"
632                         name value (decode-char '=ucs value)
633                         line-breaking))
634         (setq attributes (delq name attributes))))
635     (dolist (name '(=>ucs@gb =>ucs@cns =>ucs@jis =>ucs@ks =>ucs@big5))
636       (when (and (memq name attributes)
637                  (setq value (get-char-attribute char name)))
638         (insert (format "(%-18s . #x%04X)\t; %c%s"
639                         name value
640                         (decode-char (intern
641                                       (concat "="
642                                               (substring
643                                                (symbol-name name) 2)))
644                                      value)
645                         line-breaking))
646         (setq attributes (delq name attributes))
647         ))
648     (dolist (name '(=>daikanwa))
649       (when (and (memq name attributes)
650                  (setq value (get-char-attribute char name)))
651         (insert
652          (if (integerp value)
653              (format "(%-18s . %05d)\t; %c%s"
654                      name value (decode-char '=daikanwa value)
655                      line-breaking)
656            (format "(%-18s %s)\t; %c%s"
657                    name
658                    (mapconcat (function prin1-to-string)
659                               value " ")
660                    (char-representative-of-daikanwa char)
661                    line-breaking)))
662         (setq attributes (delq name attributes))))
663     (when (and (memq 'general-category attributes)
664                (setq value (get-char-attribute char 'general-category)))
665       (insert (format
666                "(general-category\t%s) ; %s%s"
667                (mapconcat (lambda (cell)
668                             (format "%S" cell))
669                           value " ")
670                (cond ((rassoc value unidata-normative-category-alist)
671                       "Normative Category")
672                      ((rassoc value unidata-informative-category-alist)
673                       "Informative Category")
674                      (t
675                       "Unknown Category"))
676                line-breaking))
677       (setq attributes (delq 'general-category attributes))
678       )
679     (when (and (memq 'bidi-category attributes)
680                (setq value (get-char-attribute char 'bidi-category)))
681       (insert (format "(bidi-category\t. %S)%s"
682                       value
683                       line-breaking))
684       (setq attributes (delq 'bidi-category attributes))
685       )
686     (unless (or (not (memq 'mirrored attributes))
687                 (eq (setq value (get-char-attribute char 'mirrored 'empty))
688                     'empty))
689       (insert (format "(mirrored\t\t. %S)%s"
690                       value
691                       line-breaking))
692       (setq attributes (delq 'mirrored attributes))
693       )
694     (cond
695      ((and (memq 'decimal-digit-value attributes)
696            (setq value (get-char-attribute char 'decimal-digit-value)))
697       (insert (format "(decimal-digit-value . %S)%s"
698                       value
699                       line-breaking))
700       (setq attributes (delq 'decimal-digit-value attributes))
701       (when (and (memq 'digit-value attributes)
702                  (setq value (get-char-attribute char 'digit-value)))
703         (insert (format "(digit-value\t . %S)%s"
704                         value
705                         line-breaking))
706         (setq attributes (delq 'digit-value attributes))
707         )
708       (when (and (memq 'numeric-value attributes)
709                  (setq value (get-char-attribute char 'numeric-value)))
710         (insert (format "(numeric-value\t . %S)%s"
711                         value
712                         line-breaking))
713         (setq attributes (delq 'numeric-value attributes))
714         )
715       )
716      (t
717       (when (and (memq 'digit-value attributes)
718                  (setq value (get-char-attribute char 'digit-value)))
719         (insert (format "(digit-value\t. %S)%s"
720                         value
721                         line-breaking))
722         (setq attributes (delq 'digit-value attributes))
723         )
724       (when (and (memq 'numeric-value attributes)
725                  (setq value (get-char-attribute char 'numeric-value)))
726         (insert (format "(numeric-value\t. %S)%s"
727                         value
728                         line-breaking))
729         (setq attributes (delq 'numeric-value attributes))
730         )))
731     (when (and (memq 'iso-10646-comment attributes)
732                (setq value (get-char-attribute char 'iso-10646-comment)))
733       (insert (format "(iso-10646-comment\t. %S)%s"
734                       value
735                       line-breaking))
736       (setq attributes (delq 'iso-10646-comment attributes))
737       )
738     (when (and (memq 'morohashi-daikanwa attributes)
739                (setq value (get-char-attribute char 'morohashi-daikanwa)))
740       (insert (format "(morohashi-daikanwa\t%s)%s"
741                       (mapconcat (function prin1-to-string) value " ")
742                       line-breaking))
743       (setq attributes (delq 'morohashi-daikanwa attributes))
744       )
745     (setq radical nil
746           strokes nil)
747     (when (and (memq 'ideographic-radical attributes)
748                (setq value (get-char-attribute char 'ideographic-radical)))
749       (setq radical value)
750       (insert (format "(ideographic-radical . %S)\t; %c%s"
751                       radical
752                       (ideographic-radical radical)
753                       line-breaking))
754       (setq attributes (delq 'ideographic-radical attributes))
755       )
756     (when (and (memq 'shuowen-radical attributes)
757                (setq value (get-char-attribute char 'shuowen-radical)))
758       (insert (format "(shuowen-radical\t. %S)\t; %c%s"
759                       value
760                       (shuowen-radical value)
761                       line-breaking))
762       (setq attributes (delq 'shuowen-radical attributes))
763       )
764     (let (key)
765       (dolist (domain
766                (append
767                 char-db-feature-domains
768                 (let (dest domain)
769                   (dolist (feature (char-attribute-list))
770                     (setq feature (symbol-name feature))
771                     (when (string-match
772                            "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
773                            feature)
774                       (setq domain (intern (match-string 2 feature)))
775                      (unless (memq domain dest)
776                        (setq dest (cons domain dest)))))
777                   (sort dest #'string<))))
778         (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
779         (when (and (memq key attributes)
780                    (setq value (get-char-attribute char key)))
781           (setq radical value)
782           (insert (format "(%s . %S)\t; %c%s"
783                           key
784                           radical
785                           (ideographic-radical radical)
786                           line-breaking))
787           (setq attributes (delq key attributes))
788           )
789         (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
790         (when (and (memq key attributes)
791                    (setq value (get-char-attribute char key)))
792           (setq strokes value)
793           (insert (format "(%s . %S)%s"
794                           key
795                           strokes
796                           line-breaking))
797           (setq attributes (delq key attributes))
798           )
799         (setq key (intern (format "%s@%s" 'total-strokes domain)))
800         (when (and (memq key attributes)
801                    (setq value (get-char-attribute char key)))
802           (insert (format "(%s       . %S)%s"
803                           key
804                           value
805                           line-breaking))
806           (setq attributes (delq key attributes))
807           )
808         (dolist (feature '(ideographic-radical
809                            ideographic-strokes
810                            total-strokes))
811           (setq key (intern (format "%s@%s*sources" feature domain)))
812           (when (and (memq key attributes)
813                      (setq value (get-char-attribute char key)))
814             (insert (format "(%s%s" key line-breaking))
815             (dolist (cell value)
816               (insert (format " %s" cell)))
817             (insert ")")
818             (insert line-breaking)
819             (setq attributes (delq key attributes))
820             ))
821         ))
822     (when (and (memq 'ideographic-strokes attributes)
823                (setq value (get-char-attribute char 'ideographic-strokes)))
824       (setq strokes value)
825       (insert (format "(ideographic-strokes . %S)%s"
826                       strokes
827                       line-breaking))
828       (setq attributes (delq 'ideographic-strokes attributes))
829       )
830     (when (and (memq 'kangxi-radical attributes)
831                (setq value (get-char-attribute char 'kangxi-radical)))
832       (unless (eq value radical)
833         (insert (format "(kangxi-radical\t . %S)\t; %c%s"
834                         value
835                         (ideographic-radical value)
836                         line-breaking))
837         (or radical
838             (setq radical value)))
839       (setq attributes (delq 'kangxi-radical attributes))
840       )
841     (when (and (memq 'kangxi-strokes attributes)
842                (setq value (get-char-attribute char 'kangxi-strokes)))
843       (unless (eq value strokes)
844         (insert (format "(kangxi-strokes\t . %S)%s"
845                         value
846                         line-breaking))
847         (or strokes
848             (setq strokes value)))
849       (setq attributes (delq 'kangxi-strokes attributes))
850       )
851     (when (and (memq 'japanese-radical attributes)
852                (setq value (get-char-attribute char 'japanese-radical)))
853       (unless (eq value radical)
854         (insert (format "(japanese-radical\t . %S)\t; %c%s"
855                         value
856                         (ideographic-radical value)
857                         line-breaking))
858         (or radical
859             (setq radical value)))
860       (setq attributes (delq 'japanese-radical attributes))
861       )
862     (when (and (memq 'japanese-strokes attributes)
863                (setq value (get-char-attribute char 'japanese-strokes)))
864       (unless (eq value strokes)
865         (insert (format "(japanese-strokes\t . %S)%s"
866                         value
867                         line-breaking))
868         (or strokes
869             (setq strokes value)))
870       (setq attributes (delq 'japanese-strokes attributes))
871       )
872     (when (and (memq 'cns-radical attributes)
873                (setq value (get-char-attribute char 'cns-radical)))
874       (insert (format "(cns-radical\t . %S)\t; %c%s"
875                       value
876                       (ideographic-radical value)
877                       line-breaking))
878       (setq attributes (delq 'cns-radical attributes))
879       )
880     (when (and (memq 'cns-strokes attributes)
881                (setq value (get-char-attribute char 'cns-strokes)))
882       (unless (eq value strokes)
883         (insert (format "(cns-strokes\t . %S)%s"
884                         value
885                         line-breaking))
886         (or strokes
887             (setq strokes value)))
888       (setq attributes (delq 'cns-strokes attributes))
889       )
890     (when (and (memq 'shinjigen-1-radical attributes)
891                (setq value (get-char-attribute char 'shinjigen-1-radical)))
892       (unless (eq value radical)
893         (insert (format "(shinjigen-1-radical . %S)\t; %c%s"
894                         value
895                         (ideographic-radical value)
896                         line-breaking))
897         (or radical
898             (setq radical value)))
899       (setq attributes (delq 'shinjigen-1-radical attributes))
900       )
901     (when (and (memq 'ideographic- attributes)
902                (setq value (get-char-attribute char 'ideographic-)))
903       (insert "(ideographic-       ")
904       (setq lbs (concat "\n" (make-string (current-column) ?\ ))
905             separator nil)
906       (while (consp value)
907         (setq cell (car value))
908         (if (integerp cell)
909             (setq cell (decode-char '=ucs cell)))
910         (cond ((characterp cell)
911                (if separator
912                    (insert lbs))
913                (if readable
914                    (insert (format "%S" cell))
915                  (char-db-insert-char-spec cell readable))
916                (setq separator lbs))
917               ((consp cell)
918                (if separator
919                    (insert lbs))
920                (if (consp (car cell))
921                    (char-db-insert-char-spec cell readable)
922                  (char-db-insert-char-reference cell readable))
923                (setq separator lbs))
924               (t
925                (if separator
926                    (insert separator))
927                (insert (prin1-to-string cell))
928                (setq separator " ")))
929         (setq value (cdr value)))
930       (insert ")")
931       (insert line-breaking)
932       (setq attributes (delq 'ideographic- attributes)))
933     (when (and (memq 'total-strokes attributes)
934                (setq value (get-char-attribute char 'total-strokes)))
935       (insert (format "(total-strokes       . %S)%s"
936                       value
937                       line-breaking))
938       (setq attributes (delq 'total-strokes attributes))
939       )
940     (when (and (memq '->ideograph attributes)
941                (setq value (get-char-attribute char '->ideograph)))
942       (insert (format "(->ideograph\t%s)%s"
943                       (mapconcat (lambda (code)
944                                    (cond ((symbolp code)
945                                           (symbol-name code))
946                                          ((integerp code)
947                                           (format "#x%04X" code))
948                                          (t
949                                           (format "%s %S"
950                                                   line-breaking code))))
951                                  value " ")
952                       line-breaking))
953       (setq attributes (delq '->ideograph attributes))
954       )
955     ;; (when (and (memq '->decomposition attributes)
956     ;;            (setq value (get-char-attribute char '->decomposition)))
957     ;;   (insert (format "(->decomposition\t%s)%s"
958     ;;                   (mapconcat (lambda (code)
959     ;;                                (cond ((symbolp code)
960     ;;                                       (symbol-name code))
961     ;;                                      ((characterp code)
962     ;;                                       (if readable
963     ;;                                           (format "%S" code)
964     ;;                                         (format "#x%04X"
965     ;;                                                 (char-int code))
966     ;;                                         ))
967     ;;                                      ((integerp code)
968     ;;                                       (format "#x%04X" code))
969     ;;                                      (t
970     ;;                                       (format "%s%S" line-breaking code))))
971     ;;                              value " ")
972     ;;                   line-breaking))
973     ;;   (setq attributes (delq '->decomposition attributes))
974     ;;   )
975     (if (equal (get-char-attribute char '->titlecase)
976                (get-char-attribute char '->uppercase))
977         (setq attributes (delq '->titlecase attributes)))
978     (when (and (memq '->mojikyo attributes)
979                (setq value (get-char-attribute char '->mojikyo)))
980       (insert (format "(->mojikyo\t\t. %06d)\t; %c%s"
981                       value (decode-char 'mojikyo value)
982                       line-breaking))
983       (setq attributes (delq '->mojikyo attributes))
984       )
985     (when (and (memq 'hanyu-dazidian-vol attributes)
986                (setq value (get-char-attribute char 'hanyu-dazidian-vol)))
987       (insert (format "(hanyu-dazidian-vol  . %d)%s"
988                       value line-breaking))
989       (setq attributes (delq 'hanyu-dazidian-vol attributes))
990       )
991     (when (and (memq 'hanyu-dazidian-page attributes)
992                (setq value (get-char-attribute char 'hanyu-dazidian-page)))
993       (insert (format "(hanyu-dazidian-page . %d)%s"
994                       value line-breaking))
995       (setq attributes (delq 'hanyu-dazidian-page attributes))
996       )
997     (when (and (memq 'hanyu-dazidian-char attributes)
998                (setq value (get-char-attribute char 'hanyu-dazidian-char)))
999       (insert (format "(hanyu-dazidian-char . %d)%s"
1000                       value line-breaking))
1001       (setq attributes (delq 'hanyu-dazidian-char attributes))
1002       )
1003     (unless readable
1004       (dolist (ignored '(composition
1005                          ->denotational <-subsumptive ->ucs-unified
1006                          ->ideographic-component-forms))
1007         (setq attributes (delq ignored attributes))))
1008     (while attributes
1009       (setq name (car attributes))
1010       (if (setq value (get-char-attribute char name))
1011           (cond ((setq ret (find-charset name))
1012                  (setq name (charset-name ret))
1013                  (if (and (not (memq name dest-ccss))
1014                           (prog1
1015                               (setq value (get-char-attribute char name))
1016                             (setq dest-ccss (cons name dest-ccss))))
1017                      (char-db-insert-ccs-feature name value line-breaking))
1018                  )
1019                 ((string-match "^=>ucs@" (symbol-name name))
1020                  (insert (format "(%-18s . #x%04X)\t; %c%s"
1021                                  name value (decode-char '=ucs value)
1022                                  line-breaking))
1023                  )
1024                 ((eq name 'jisx0208-1978/4X)
1025                  (insert (format "(%-18s . #x%04X)%s"
1026                                  name value
1027                                  line-breaking))
1028                  )
1029                 ((and
1030                   (not readable)
1031                   (not (eq name '->subsumptive))
1032                   (not (eq name '->uppercase))
1033                   (not (eq name '->lowercase))
1034                   (not (eq name '->titlecase))
1035                   (not (eq name '->canonical))
1036                   (not (eq name '->Bopomofo))
1037                   (not (eq name '->mistakable))
1038                   (not (eq name '->ideographic-variants))
1039                   (null (get-char-attribute
1040                          char (intern (format "%s*sources" name))))
1041                   (not (string-match "\\*sources$" (symbol-name name)))
1042                   (null (get-char-attribute
1043                          char (intern (format "%s*note" name))))
1044                   (not (string-match "\\*note$" (symbol-name name)))
1045                   (or (eq name '<-identical)
1046                       (eq name '<-uppercase)
1047                       (eq name '<-lowercase)
1048                       (eq name '<-titlecase)
1049                       (eq name '<-canonical)
1050                       (eq name '<-ideographic-variants)
1051                       ;; (eq name '<-synonyms)
1052                       (string-match "^<-synonyms" (symbol-name name))
1053                       (eq name '<-mistakable)
1054                       (when (string-match "^->" (symbol-name name))
1055                         (cond
1056                          ((string-match "^->fullwidth" (symbol-name name))
1057                           (not (and (consp value)
1058                                     (characterp (car value))
1059                                     (encode-char
1060                                      (car value) '=ucs 'defined-only)))
1061                           )
1062                          (t)))
1063                       ))
1064                  )
1065                 ((or (eq name 'ideographic-structure)
1066                      (eq name 'ideographic-combination)
1067                      (eq name 'ideographic-)
1068                      (eq name '=decomposition)
1069                      (string-match "^=>decomposition" (symbol-name name))
1070                      (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name name))
1071                      (string-match "^\\(->\\|<-\\)[^*]*\\*sources$"
1072                                    (symbol-name name))
1073                      )
1074                  (char-db-insert-relation-feature char name value
1075                                                   line-breaking
1076                                                   ccss readable))
1077                 ((memq name '(ideograph=
1078                               original-ideograph-of
1079                               ancient-ideograph-of
1080                               vulgar-ideograph-of
1081                               wrong-ideograph-of
1082                               ;; simplified-ideograph-of
1083                               ideographic-variants
1084                               ;; ideographic-different-form-of
1085                               ))
1086                  (insert (format "(%-18s%s " name line-breaking))
1087                  (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1088                        separator nil)
1089                  (while (consp value)
1090                    (setq cell (car value))
1091                    (if (and (consp cell)
1092                             (consp (car cell)))
1093                        (progn
1094                          (if separator
1095                              (insert lbs))
1096                          (char-db-insert-alist cell readable)
1097                          (setq separator lbs))
1098                      (if separator
1099                          (insert separator))
1100                      (insert (prin1-to-string cell))
1101                      (setq separator " "))
1102                    (setq value (cdr value)))
1103                  (insert ")")
1104                  (insert line-breaking))
1105                 ((consp value)
1106                  (insert (format "(%-18s " name))
1107                  (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1108                        separator nil)
1109                  (while (consp value)
1110                    (setq cell (car value))
1111                    (if (and (consp cell)
1112                             (consp (car cell))
1113                             (setq ret (condition-case nil
1114                                           (find-char cell)
1115                                         (error nil))))
1116                        (progn
1117                          (setq rest cell
1118                                al nil
1119                                cal nil)
1120                          (while rest
1121                            (setq key (car (car rest)))
1122                            (if (find-charset key)
1123                                (setq cal (cons key cal))
1124                              (setq al (cons key al)))
1125                            (setq rest (cdr rest)))
1126                          (if separator
1127                              (insert lbs))
1128                          (insert-char-attributes ret
1129                                                  readable
1130                                                  al cal)
1131                          (setq separator lbs))
1132                      (setq ret (prin1-to-string cell))
1133                      (if separator
1134                          (if (< (+ (current-column)
1135                                    (length ret)
1136                                    (length separator))
1137                                 76)
1138                              (insert separator)
1139                            (insert lbs)))
1140                      (insert ret)
1141                      (setq separator " "))
1142                    (setq value (cdr value)))
1143                  (insert ")")
1144                  (insert line-breaking))
1145                 (t
1146                  (insert (format "(%-18s" name))
1147                  (setq ret (prin1-to-string value))
1148                  (unless (< (+ (current-column)
1149                                (length ret)
1150                                3)
1151                             76)
1152                    (insert line-breaking))
1153                  (insert " . " ret ")" line-breaking)
1154                  ;; (insert (format "(%-18s . %S)%s"
1155                  ;;                 name value
1156                  ;;                 line-breaking))
1157                  )
1158                 ))
1159       (setq attributes (cdr attributes)))
1160     (insert ")")))
1161
1162 (defun insert-char-data (char &optional readable
1163                               attributes)
1164   (save-restriction
1165     (narrow-to-region (point)(point))
1166     (insert "(define-char
1167   '")
1168     (insert-char-attributes char readable attributes)
1169     (insert ")\n")
1170     (goto-char (point-min))
1171     (while (re-search-forward "[ \t]+$" nil t)
1172       (replace-match ""))
1173     ;; from tabify.
1174     (goto-char (point-min))
1175     (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
1176       (let ((column (current-column))
1177             (indent-tabs-mode t))
1178         (delete-region (match-beginning 0) (point))
1179         (indent-to column)))
1180     (goto-char (point-max))
1181     ;; (tabify (point-min)(point-max))
1182     ))
1183
1184 (defun insert-char-data-with-variant (char &optional printable
1185                                            no-ucs-unified
1186                                            script excluded-script)
1187   (insert-char-data char printable)
1188   (let ((variants (char-variants char))
1189         rest
1190         variant vs ret)
1191     (setq variants (sort variants #'<))
1192     (setq rest variants)
1193     (setq variants (cons char variants))
1194     (while rest
1195       (setq variant (car rest))
1196       (unless (get-char-attribute variant '<-subsumptive)
1197         (if (and (or (null script)
1198                      (null (setq vs (get-char-attribute variant 'script)))
1199                      (memq script vs))
1200                  (or (null excluded-script)
1201                      (null (setq vs (get-char-attribute variant 'script)))
1202                      (not (memq excluded-script vs))))
1203             (unless (and no-ucs-unified (get-char-attribute variant '=ucs))
1204               (insert-char-data variant printable)
1205               (if (setq ret (char-variants variant))
1206                   (while ret
1207                     (or (memq (car ret) variants)
1208                         ;; (get-char-attribute (car ret) '<-subsumptive)
1209                         (setq rest (nconc rest (list (car ret)))))
1210                     (setq ret (cdr ret)))))))
1211       (setq rest (cdr rest)))))
1212
1213 (defun insert-char-range-data (min max &optional script excluded-script)
1214   (let ((code min)
1215         char)
1216     (while (<= code max)
1217       (setq char (decode-char '=ucs code))
1218       (if (encode-char char '=ucs 'defined-only)
1219           (insert-char-data-with-variant char nil 'no-ucs-unified
1220                                          script excluded-script))
1221       (setq code (1+ code)))))
1222
1223 (defun write-char-range-data-to-file (min max file
1224                                           &optional script excluded-script)
1225   (let ((coding-system-for-write char-db-file-coding-system))
1226     (with-temp-buffer
1227       (insert (format ";; -*- coding: %s -*-\n"
1228                       char-db-file-coding-system))
1229       (insert-char-range-data min max script excluded-script)
1230       (write-region (point-min)(point-max) file))))
1231
1232 (defvar what-character-original-window-configuration)
1233
1234 ;;;###autoload
1235 (defun what-char-definition (char)
1236   (interactive (list (char-after)))
1237   (let ((buf (get-buffer-create "*Character Description*"))
1238         (the-buf (current-buffer))
1239         (win-conf (current-window-configuration)))
1240     (pop-to-buffer buf)
1241     (make-local-variable 'what-character-original-window-configuration)
1242     (setq what-character-original-window-configuration win-conf)
1243     (setq buffer-read-only nil)
1244     (erase-buffer)
1245     (condition-case err
1246         (progn
1247           (insert-char-data-with-variant char 'printable)
1248           (unless (char-attribute-alist char)
1249             (insert (format ";; = %c\n"
1250                             (let* ((rest (split-char char))
1251                                    (ccs (pop rest))
1252                                    (code (pop rest)))
1253                               (while rest
1254                                 (setq code (logior (lsh code 8)
1255                                                    (pop rest))))
1256                               (decode-char ccs code)))))
1257           ;; (char-db-update-comment)
1258           (set-buffer-modified-p nil)
1259           (view-mode the-buf (lambda (buf)
1260                                (set-window-configuration
1261                                 what-character-original-window-configuration)
1262                                ))
1263           (goto-char (point-min)))
1264       (error (progn
1265                (set-window-configuration
1266                 what-character-original-window-configuration)
1267                (signal (car err) (cdr err)))))))
1268
1269
1270 ;;; @ end
1271 ;;;
1272
1273 (provide 'char-db-util)
1274
1275 ;;; char-db-util.el ends here