1 ;;; char-db-util.el --- Character Database utility
3 ;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004 MORIOKA Tomohiko.
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, Unicode, UCS-4, MULE.
8 ;; This file is part of XEmacs CHISE.
10 ;; XEmacs CHISE is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
15 ;; XEmacs CHISE is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with XEmacs CHISE; see the file COPYING. If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
29 (defconst unidata-normative-category-alist
30 '(("Lu" letter uppercase)
31 ("Ll" letter lowercase)
32 ("Lt" letter titlecase)
33 ("Mn" mark non-spacing)
34 ("Mc" mark spacing-combining)
36 ("Nd" number decimal-digit)
39 ("Zs" separator space)
41 ("Zp" separator paragraph)
44 ("Cs" other surrogate)
45 ("Co" other private-use)
46 ("Cn" other not-assigned)))
48 (defconst unidata-informative-category-alist
49 '(("Lm" letter modifier)
51 ("Pc" punctuation connector)
52 ("Pd" punctuation dash)
53 ("Ps" punctuation open)
54 ("Pe" punctuation close)
55 ("Pi" punctuation initial-quote)
56 ("Pf" punctuation final-quote)
57 ("Po" punctuation other)
59 ("Sc" symbol currency)
60 ("Sk" symbol modifier)
64 (defconst ideographic-radicals
65 (let ((v (make-vector 215 nil))
68 (aset v i (decode-char '=ucs (+ #x2EFF i)))
72 (defvar char-db-feature-domains
73 '(ucs daikanwa cns gt jis jis/alt jis/a jis/b
74 jis-x0213 misc unknown))
76 (defvar char-db-ignored-attributes nil)
78 (defun char-attribute-name< (ka kb)
83 (if (<= (charset-id ka) 0)
84 (if (<= (charset-id kb) 0)
86 ((= (charset-dimension ka)
87 (charset-dimension kb))
88 (> (charset-id ka)(charset-id kb)))
90 (> (charset-dimension ka)
91 (charset-dimension kb))
94 (if (<= (charset-id kb) 0)
96 (< (charset-id ka)(charset-id kb)))))
101 ((eq '->denotational kb)
103 ((eq '->subsumptive kb)
104 (not (eq '->denotational ka)))
105 ((eq '->denotational ka)
107 ((eq '->subsumptive ka)
113 (string< (symbol-name ka)
119 (defvar char-db-coded-charset-priority-list
134 japanese-jisx0208-1978
175 (defun char-db-make-char-spec (char)
177 (cond ((characterp char)
178 (cond ((and (setq ret (encode-char char '=ucs 'defined-only))
179 (not (and (<= #xE000 ret)(<= ret #xF8FF))))
180 (setq char-spec (list (cons '=ucs ret)))
181 (cond ((setq ret (get-char-attribute char 'name))
182 (setq char-spec (cons (cons 'name ret) char-spec))
184 ((setq ret (get-char-attribute char 'name*))
185 (setq char-spec (cons (cons 'name* ret) char-spec))
190 (let ((rest char-db-coded-charset-priority-list)
193 (setq ccs (charset-name
194 (find-charset (car rest))))
196 (encode-char char ccs
198 (throw 'tag (cons ccs ret)))
199 (setq rest (cdr rest))))))
200 (setq char-spec (list ret))
201 (dolist (ccs (delq (car ret) (charset-list)))
202 (if (and (or (charset-iso-final-char ccs)
208 (setq ret (encode-char char ccs 'defined-only)))
209 (setq char-spec (cons (cons ccs ret) char-spec))))
211 (setq char-spec (split-char char)))
212 (cond ((setq ret (get-char-attribute char 'name))
213 (setq char-spec (cons (cons 'name ret) char-spec))
215 ((setq ret (get-char-attribute char 'name*))
216 (setq char-spec (cons (cons 'name* ret) char-spec))
223 (defun char-db-insert-char-spec (char &optional readable column
226 (setq column (current-column)))
227 (let (char-spec al cal key temp-char)
228 (setq char-spec (char-db-make-char-spec char))
229 (unless (or (characterp char) ; char
231 (setq char (find-char char-spec))
233 ;; define temporary character
234 ;; Current implementation is dirty.
235 (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
237 (remove-char-attribute temp-char 'ideograph-daikanwa)
238 (setq char temp-char))
242 ;; (setq key (car (car char-spec)))
243 ;; (unless (memq key char-db-ignored-attributes)
244 ;; (if (find-charset key)
245 ;; (if (encode-char char key 'defined-only)
246 ;; (setq cal (cons key cal)))
247 ;; (setq al (cons key al))))
248 ;; (setq char-spec (cdr char-spec)))
250 ;; (setq char-spec (char-db-make-char-spec char))
252 ;; (setq key (car (car char-spec)))
253 ;; (unless (memq key char-db-ignored-attributes)
254 ;; (if (find-charset key)
255 ;; (setq cal (cons key cal))
256 ;; (setq al (cons key al))))
257 ;; (setq char-spec (cdr char-spec)))
260 ;; (memq 'ideographic-structure al))
261 ;; (push 'ideographic-structure al))
262 ;; (dolist (feature required-features)
263 ;; (if (find-charset feature)
264 ;; (if (encode-char char feature 'defined-only)
265 ;; (setq cal (adjoin feature cal)))
266 ;; (setq al (adjoin feature al))))
267 (insert-char-attributes char
270 (union (mapcar #'car char-spec)
274 ;; undefine temporary character
275 ;; Current implementation is dirty.
276 (setq char-spec (char-attribute-alist temp-char))
278 (remove-char-attribute temp-char (car (car char-spec)))
279 (setq char-spec (cdr char-spec))))))
281 (defun char-db-insert-alist (alist &optional readable column)
283 (setq column (current-column)))
285 (concat "\n" (make-string (1+ column) ?\ )))
289 lbs cell rest separator)
292 (setq name (car (car alist))
293 value (cdr (car alist)))
294 (cond ((eq name 'char)
296 (if (setq ret (condition-case nil
303 (setq key (car (car value)))
304 ;; (if (find-charset key)
305 ;; (setq cal (cons key cal))
306 (setq al (cons key al))
308 (setq value (cdr value)))
309 (insert-char-attributes ret
313 (insert (prin1-to-string value)))
315 (insert line-breaking))
317 (insert (format "(%-18s " name))
318 (setq lbs (concat "\n" (make-string (current-column) ?\ )))
320 (setq cell (car value))
321 (if (and (consp cell)
323 (setq ret (condition-case nil
332 (setq key (car (car rest)))
333 ;; (if (find-charset key)
334 ;; (setq cal (cons key cal))
335 (setq al (cons key al))
337 (setq rest (cdr rest)))
340 (insert-char-attributes ret
344 (setq separator lbs))
347 (insert (prin1-to-string cell))
348 (setq separator " "))
349 (setq value (cdr value)))
351 (insert line-breaking))
353 (insert (format "(%-18s . %S)%s"
356 (setq alist (cdr alist))))
359 (defun char-db-insert-char-reference (plist &optional readable column)
361 (setq column (current-column)))
363 (concat "\n" (make-string (1+ column) ?\ )))
368 (setq name (pop plist))
369 (setq value (pop plist))
370 (cond ((eq name :char)
373 (cond ((numberp value)
374 (setq value (decode-char '=ucs value)))
376 ;; (setq value (or (find-char value)
379 (char-db-insert-char-spec value readable)
380 (insert line-breaking)
383 (insert (format "%s%s\t%d ; %c%s"
386 (aref ideographic-radicals value)
390 (insert (format "%s%s\t%S" separator name value))
391 (setq separator line-breaking)))
395 (defun char-db-decode-isolated-char (ccs code-point)
398 (cond ((eq ccs 'arabic-iso8859-6)
399 (decode-char ccs code-point))
400 ((and (memq ccs '(=gt-pj-1
411 (setq ret (decode-char ccs code-point))
412 (setq ret (encode-char ret '=gt 'defined-only)))
413 (decode-builtin-char '=gt ret))
415 (decode-builtin-char ccs code-point))))
416 (cond ((and (<= 0 (char-int ret))
417 (<= (char-int ret) #x1F))
418 (decode-char '=ucs (+ #x2400 (char-int ret))))
419 ((= (char-int ret) #x7F)
423 (defvar char-db-convert-obsolete-format t)
425 (defun insert-char-attributes (char &optional readable attributes column)
429 (if (consp attributes)
431 (dolist (name attributes)
432 (unless (memq name char-db-ignored-attributes)
435 (dolist (name (char-attribute-list))
436 (unless (memq name char-db-ignored-attributes)
439 #'char-attribute-name<)))
441 (setq column (current-column)))
442 (let (name value has-long-ccs-name rest
445 (concat "\n" (make-string (1+ column) ?\ )))
446 lbs cell separator ret
449 sources required-features)
451 (when (and (memq 'name attributes)
452 (setq value (get-char-attribute char 'name)))
454 (if (> (+ (current-column) (length value)) 48)
457 value line-breaking))
458 (setq attributes (delq 'name attributes))
460 (when (and (memq 'name* attributes)
461 (setq value (get-char-attribute char 'name*)))
463 (if (> (+ (current-column) (length value)) 48)
466 value line-breaking))
467 (setq attributes (delq 'name* attributes))
469 (when (and (memq 'script attributes)
470 (setq value (get-char-attribute char 'script)))
471 (insert (format "(script\t\t%s)%s"
472 (mapconcat (function prin1-to-string)
475 (setq attributes (delq 'script attributes))
477 (dolist (name '(=>ucs =>ucs*))
478 (when (and (memq name attributes)
479 (setq value (get-char-attribute char name)))
480 (insert (format "(%-18s . #x%04X)\t; %c%s"
481 name value (decode-char '=ucs value)
483 (setq attributes (delq name attributes))))
484 (dolist (name '(=>ucs@gb =>ucs@cns =>ucs@jis =>ucs@ks =>ucs@big5))
485 (when (and (memq name attributes)
486 (setq value (get-char-attribute char name)))
487 (insert (format "(%-18s . #x%04X)\t; %c%s"
492 (symbol-name name) 2)))
495 (setq attributes (delq name attributes))
497 ;; (dolist (name '(=>ucs-gb =>ucs-cns =>ucs-jis =>ucs-ks =>ucs-big5))
498 ;; (when (and (memq name attributes)
499 ;; (setq value (get-char-attribute char name)))
500 ;; (insert (format "(%-18s . #x%04X)\t; %c%s"
503 ;; (substring (symbol-name name) 6)))
505 ;; (decode-char (intern
508 ;; (symbol-name name) 6)))
511 ;; (setq attributes (delq name attributes))))
512 ;; (when (and (memq '->ucs attributes)
513 ;; (setq value (get-char-attribute char '->ucs)))
514 ;; (insert (format (if char-db-convert-obsolete-format
515 ;; "(=>ucs\t\t. #x%04X)\t; %c%s"
516 ;; "(->ucs\t\t. #x%04X)\t; %c%s")
517 ;; value (decode-char '=ucs value)
519 ;; (setq attributes (delq '->ucs attributes))
521 (dolist (name '(=>daikanwa))
522 (when (and (memq name attributes)
523 (setq value (get-char-attribute char name)))
526 (format "(%-18s . %05d)\t; %c%s"
527 name value (decode-char '=daikanwa value)
529 (format "(%-18s %s)\t; %c%s"
531 (mapconcat #'prin1-to-string
533 (char-representative-of-daikanwa char)
535 (setq attributes (delq name attributes))))
536 (when (and (memq 'general-category attributes)
537 (setq value (get-char-attribute char 'general-category)))
539 "(general-category\t%s) ; %s%s"
540 (mapconcat (lambda (cell)
543 (cond ((rassoc value unidata-normative-category-alist)
544 "Normative Category")
545 ((rassoc value unidata-informative-category-alist)
546 "Informative Category")
550 (setq attributes (delq 'general-category attributes))
552 (when (and (memq 'bidi-category attributes)
553 (setq value (get-char-attribute char 'bidi-category)))
554 (insert (format "(bidi-category\t. %S)%s"
557 (setq attributes (delq 'bidi-category attributes))
559 (unless (or (not (memq 'mirrored attributes))
560 (eq (setq value (get-char-attribute char 'mirrored 'empty))
562 (insert (format "(mirrored\t\t. %S)%s"
565 (setq attributes (delq 'mirrored attributes))
568 ((and (memq 'decimal-digit-value attributes)
569 (setq value (get-char-attribute char 'decimal-digit-value)))
570 (insert (format "(decimal-digit-value . %S)%s"
573 (setq attributes (delq 'decimal-digit-value attributes))
574 (when (and (memq 'digit-value attributes)
575 (setq value (get-char-attribute char 'digit-value)))
576 (insert (format "(digit-value\t . %S)%s"
579 (setq attributes (delq 'digit-value attributes))
581 (when (and (memq 'numeric-value attributes)
582 (setq value (get-char-attribute char 'numeric-value)))
583 (insert (format "(numeric-value\t . %S)%s"
586 (setq attributes (delq 'numeric-value attributes))
590 (when (and (memq 'digit-value attributes)
591 (setq value (get-char-attribute char 'digit-value)))
592 (insert (format "(digit-value\t. %S)%s"
595 (setq attributes (delq 'digit-value attributes))
597 (when (and (memq 'numeric-value attributes)
598 (setq value (get-char-attribute char 'numeric-value)))
599 (insert (format "(numeric-value\t. %S)%s"
602 (setq attributes (delq 'numeric-value attributes))
604 (when (and (memq 'iso-10646-comment attributes)
605 (setq value (get-char-attribute char 'iso-10646-comment)))
606 (insert (format "(iso-10646-comment\t. %S)%s"
609 (setq attributes (delq 'iso-10646-comment attributes))
611 (when (and (memq 'morohashi-daikanwa attributes)
612 (setq value (get-char-attribute char 'morohashi-daikanwa)))
613 (insert (format "(morohashi-daikanwa\t%s)%s"
614 (mapconcat (function prin1-to-string) value " ")
616 (setq attributes (delq 'morohashi-daikanwa attributes))
620 (when (and (memq 'ideographic-radical attributes)
621 (setq value (get-char-attribute char 'ideographic-radical)))
623 (insert (format "(ideographic-radical . %S)\t; %c%s"
625 (aref ideographic-radicals radical)
627 (setq attributes (delq 'ideographic-radical attributes))
630 (dolist (domain char-db-feature-domains)
631 (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
632 (when (and (memq key attributes)
633 (setq value (get-char-attribute char key)))
635 (insert (format "(%s . %S)\t; %c%s"
638 (aref ideographic-radicals radical)
640 (setq attributes (delq key attributes))
642 (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
643 (when (and (memq key attributes)
644 (setq value (get-char-attribute char key)))
646 (insert (format "(%s . %S)%s"
650 (setq attributes (delq key attributes))
652 (setq key (intern (format "%s@%s" 'total-strokes domain)))
653 (when (and (memq key attributes)
654 (setq value (get-char-attribute char key)))
655 (insert (format "(%s . %S)%s"
659 (setq attributes (delq key attributes))
661 (dolist (feature '(ideographic-radical
664 (setq key (intern (format "%s@%s*sources" feature domain)))
665 (when (and (memq key attributes)
666 (setq value (get-char-attribute char key)))
667 (insert (format "(%s%s" key line-breaking))
669 (insert (format " %s" cell)))
671 (insert line-breaking)
672 (setq attributes (delq key attributes))
675 (when (and (memq 'ideographic-strokes attributes)
676 (setq value (get-char-attribute char 'ideographic-strokes)))
678 (insert (format "(ideographic-strokes . %S)%s"
681 (setq attributes (delq 'ideographic-strokes attributes))
683 (when (and (memq 'kangxi-radical attributes)
684 (setq value (get-char-attribute char 'kangxi-radical)))
685 (unless (eq value radical)
686 (insert (format "(kangxi-radical\t . %S)\t; %c%s"
688 (aref ideographic-radicals value)
691 (setq radical value)))
692 (setq attributes (delq 'kangxi-radical attributes))
694 (when (and (memq 'kangxi-strokes attributes)
695 (setq value (get-char-attribute char 'kangxi-strokes)))
696 (unless (eq value strokes)
697 (insert (format "(kangxi-strokes\t . %S)%s"
701 (setq strokes value)))
702 (setq attributes (delq 'kangxi-strokes attributes))
704 (when (and (memq 'japanese-radical attributes)
705 (setq value (get-char-attribute char 'japanese-radical)))
706 (unless (eq value radical)
707 (insert (format "(japanese-radical\t . %S)\t; %c%s"
709 (aref ideographic-radicals value)
712 (setq radical value)))
713 (setq attributes (delq 'japanese-radical attributes))
715 (when (and (memq 'japanese-strokes attributes)
716 (setq value (get-char-attribute char 'japanese-strokes)))
717 (unless (eq value strokes)
718 (insert (format "(japanese-strokes\t . %S)%s"
722 (setq strokes value)))
723 (setq attributes (delq 'japanese-strokes attributes))
725 (when (and (memq 'cns-radical attributes)
726 (setq value (get-char-attribute char 'cns-radical)))
727 (insert (format "(cns-radical\t . %S)\t; %c%s"
729 (aref ideographic-radicals value)
731 (setq attributes (delq 'cns-radical attributes))
733 (when (and (memq 'cns-strokes attributes)
734 (setq value (get-char-attribute char 'cns-strokes)))
735 (unless (eq value strokes)
736 (insert (format "(cns-strokes\t . %S)%s"
740 (setq strokes value)))
741 (setq attributes (delq 'cns-strokes attributes))
743 (when (and (memq 'shinjigen-1-radical attributes)
744 (setq value (get-char-attribute char 'shinjigen-1-radical)))
745 (unless (eq value radical)
746 (insert (format "(shinjigen-1-radical . %S)\t; %c%s"
748 (aref ideographic-radicals value)
751 (setq radical value)))
752 (setq attributes (delq 'shinjigen-1-radical attributes))
754 (when (and (memq 'ideographic- attributes)
755 (setq value (get-char-attribute char 'ideographic-)))
756 (insert "(ideographic- ")
757 (setq lbs (concat "\n" (make-string (current-column) ?\ ))
760 (setq cell (car value))
762 (setq cell (decode-char '=ucs cell)))
763 (cond ((characterp cell)
767 (insert (format "%S" cell))
768 (char-db-insert-char-spec cell readable))
769 (setq separator lbs))
773 (if (consp (car cell))
774 (char-db-insert-char-spec cell readable)
775 (char-db-insert-char-reference cell readable))
776 (setq separator lbs))
780 (insert (prin1-to-string cell))
781 (setq separator " ")))
782 (setq value (cdr value)))
784 (insert line-breaking)
785 (setq attributes (delq 'ideographic- attributes)))
786 (when (and (memq 'total-strokes attributes)
787 (setq value (get-char-attribute char 'total-strokes)))
788 (insert (format "(total-strokes . %S)%s"
791 (setq attributes (delq 'total-strokes attributes))
793 (when (and (memq '->ideograph attributes)
794 (setq value (get-char-attribute char '->ideograph)))
795 (insert (format "(->ideograph\t%s)%s"
796 (mapconcat (lambda (code)
797 (cond ((symbolp code)
800 (format "#x%04X" code))
803 line-breaking code))))
806 (setq attributes (delq '->ideograph attributes))
808 (when (and (memq '->decomposition attributes)
809 (setq value (get-char-attribute char '->decomposition)))
810 (insert (format "(->decomposition\t%s)%s"
811 (mapconcat (lambda (code)
812 (cond ((symbolp code)
821 (format "#x%04X" code))
823 (format "%s%S" line-breaking code))))
826 (setq attributes (delq '->decomposition attributes))
828 (if (equal (get-char-attribute char '->titlecase)
829 (get-char-attribute char '->uppercase))
830 (setq attributes (delq '->titlecase attributes)))
831 (when (and (memq '->mojikyo attributes)
832 (setq value (get-char-attribute char '->mojikyo)))
833 (insert (format "(->mojikyo\t\t. %06d)\t; %c%s"
834 value (decode-char 'mojikyo value)
836 (setq attributes (delq '->mojikyo attributes))
838 (when (and (memq 'hanyu-dazidian-vol attributes)
839 (setq value (get-char-attribute char 'hanyu-dazidian-vol)))
840 (insert (format "(hanyu-dazidian-vol . %d)%s"
841 value line-breaking))
842 (setq attributes (delq 'hanyu-dazidian-vol attributes))
844 (when (and (memq 'hanyu-dazidian-page attributes)
845 (setq value (get-char-attribute char 'hanyu-dazidian-page)))
846 (insert (format "(hanyu-dazidian-page . %d)%s"
847 value line-breaking))
848 (setq attributes (delq 'hanyu-dazidian-page attributes))
850 (when (and (memq 'hanyu-dazidian-char attributes)
851 (setq value (get-char-attribute char 'hanyu-dazidian-char)))
852 (insert (format "(hanyu-dazidian-char . %d)%s"
853 value line-breaking))
854 (setq attributes (delq 'hanyu-dazidian-char attributes))
857 (dolist (ignored '(composition
858 ->denotational <-subsumptive ->ucs-unified))
859 (setq attributes (delq ignored attributes))))
860 ;; (setq rest ccs-attributes)
863 ;; (setq value (get-char-attribute char (car rest)))
865 ;; (if (>= (length (symbol-name (car rest))) 19)
867 ;; (setq has-long-ccs-name t)
871 ;; (setq rest (cdr rest)))
873 (setq name (car attributes))
874 (if (setq value (get-char-attribute char name))
875 (cond ((setq ret (find-charset name))
876 (setq name (charset-name ret))
877 (if (and (not (memq name dest-ccss))
879 (setq value (get-char-attribute char name))
880 (setq dest-ccss (cons name dest-ccss))))
883 (cond ((memq name '(=daikanwa
884 =daikanwa-rev1 =daikanwa-rev2
886 (if has-long-ccs-name
887 "(%-26s . %05d)\t; %c%s"
888 "(%-18s . %05d)\t; %c%s"))
890 (if has-long-ccs-name
891 "(%-26s . %06d)\t; %c%s"
892 "(%-18s . %06d)\t; %c%s"))
893 ((>= (charset-dimension name) 2)
894 (if has-long-ccs-name
895 "(%-26s . #x%04X)\t; %c%s"
896 "(%-18s . #x%04X)\t; %c%s"))
898 (if has-long-ccs-name
899 "(%-26s . #x%02X)\t; %c%s"
900 "(%-18s . #x%02X)\t; %c%s")))
902 (if (= (charset-iso-graphic-plane name) 1)
904 (cond ((= (charset-dimension name) 1)
906 ((= (charset-dimension name) 2)
908 ((= (charset-dimension name) 3)
912 (char-db-decode-isolated-char name value)
915 ((string-match "^=>ucs@" (symbol-name name))
916 (insert (format "(%-18s . #x%04X)\t; %c%s"
917 name value (decode-char '=ucs value)
920 ((eq name 'jisx0208-1978/4X)
921 (insert (format "(%-18s . #x%04X)%s"
926 (string-match "^->simplified" (symbol-name name)))
928 ((or (eq name 'ideographic-structure)
929 (eq name 'ideographic-)
930 (string-match "^\\(->\\|<-\\)" (symbol-name name)))
931 (insert (format "(%-18s%s " name line-breaking))
932 (setq lbs (concat "\n" (make-string (current-column) ?\ ))
935 (setq cell (car value))
937 (setq cell (decode-char '=ucs cell)))
938 (cond ((eq name '->subsumptive)
941 (let ((char-db-ignored-attributes
943 char-db-ignored-attributes)))
944 (insert-char-attributes cell readable))
945 (setq separator lbs))
950 (intern (format "%s*sources" name))))
951 (setq required-features nil)
952 (dolist (source sources)
953 (setq required-features
957 (format "=%s" source))))
961 (when (string-match "@JP" (symbol-name name))
962 (setq required-features
963 (union required-features
974 (insert (format "%S" cell))
975 (char-db-insert-char-spec cell readable
978 (setq separator lbs))
982 (if (consp (car cell))
983 (char-db-insert-char-spec cell readable)
984 (char-db-insert-char-reference cell readable))
985 (setq separator lbs))
989 (insert (prin1-to-string cell))
990 (setq separator " ")))
991 (setq value (cdr value)))
993 (insert line-breaking))
994 ((memq name '(ideograph=
995 original-ideograph-of
999 ;; simplified-ideograph-of
1000 ideographic-variants
1001 ;; ideographic-different-form-of
1003 (insert (format "(%-18s%s " name line-breaking))
1004 (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1006 (while (consp value)
1007 (setq cell (car value))
1008 (if (and (consp cell)
1013 (char-db-insert-alist cell readable)
1014 (setq separator lbs))
1017 (insert (prin1-to-string cell))
1018 (setq separator " "))
1019 (setq value (cdr value)))
1021 (insert line-breaking))
1023 (insert (format "(%-18s " name))
1024 (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1026 (while (consp value)
1027 (setq cell (car value))
1028 (if (and (consp cell)
1030 (setq ret (condition-case nil
1038 (setq key (car (car rest)))
1039 (if (find-charset key)
1040 (setq cal (cons key cal))
1041 (setq al (cons key al)))
1042 (setq rest (cdr rest)))
1045 (insert-char-attributes ret
1048 (setq separator lbs))
1049 (setq ret (prin1-to-string cell))
1051 (if (< (+ (current-column)
1058 (setq separator " "))
1059 (setq value (cdr value)))
1061 (insert line-breaking))
1063 (insert (format "(%-18s . %S)%s"
1067 (setq attributes (cdr attributes)))
1068 ;; (while ccs-attributes
1069 ;; (setq name (charset-name (car ccs-attributes)))
1070 ;; (if (and (not (memq name dest-ccss))
1072 ;; (setq value (get-char-attribute char name))
1073 ;; (setq dest-ccss (cons name dest-ccss))))
1076 ;; (cond ((memq name '(=daikanwa
1077 ;; =daikanwa-rev1 =daikanwa-rev2
1078 ;; =gt =gt-k =cbeta))
1079 ;; (if has-long-ccs-name
1080 ;; "(%-26s . %05d)\t; %c%s"
1081 ;; "(%-18s . %05d)\t; %c%s"))
1082 ;; ((eq name 'mojikyo)
1083 ;; (if has-long-ccs-name
1084 ;; "(%-26s . %06d)\t; %c%s"
1085 ;; "(%-18s . %06d)\t; %c%s"))
1086 ;; ((>= (charset-dimension name) 2)
1087 ;; (if has-long-ccs-name
1088 ;; "(%-26s . #x%04X)\t; %c%s"
1089 ;; "(%-18s . #x%04X)\t; %c%s"))
1091 ;; (if has-long-ccs-name
1092 ;; "(%-26s . #x%02X)\t; %c%s"
1093 ;; "(%-18s . #x%02X)\t; %c%s")))
1095 ;; (if (= (charset-iso-graphic-plane name) 1)
1097 ;; (cond ((= (charset-dimension name) 1)
1099 ;; ((= (charset-dimension name) 2)
1101 ;; ((= (charset-dimension name) 3)
1105 ;; (char-db-decode-isolated-char name value)
1107 ;; (setq ccs-attributes (cdr ccs-attributes)))
1110 (defun insert-char-data (char &optional readable
1113 (narrow-to-region (point)(point))
1114 (insert "(define-char
1116 (insert-char-attributes char readable attributes)
1118 (goto-char (point-min))
1119 (while (re-search-forward "[ \t]+$" nil t)
1122 (goto-char (point-min))
1123 (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
1124 (let ((column (current-column))
1125 (indent-tabs-mode t))
1126 (delete-region (match-beginning 0) (point))
1127 (indent-to column)))
1128 (goto-char (point-max))
1129 ;; (tabify (point-min)(point-max))
1132 (defun insert-char-data-with-variant (char &optional printable
1134 script excluded-script)
1135 (insert-char-data char printable)
1136 (let ((variants (or (char-variants char)
1137 (let ((ucs (get-char-attribute char '->ucs)))
1139 (delete char (char-variants (int-char ucs)))))))
1141 (setq variants (sort variants #'<))
1143 (setq variant (car variants))
1144 (if (and (or (null script)
1145 (null (setq vs (get-char-attribute variant 'script)))
1147 (or (null excluded-script)
1148 (null (setq vs (get-char-attribute variant 'script)))
1149 (not (memq excluded-script vs))))
1150 (or (and no-ucs-unified (get-char-attribute variant '=ucs))
1151 (insert-char-data variant printable)))
1152 (setq variants (cdr variants))
1155 (defun insert-char-range-data (min max &optional script excluded-script)
1158 (while (<= code max)
1159 (setq char (decode-char '=ucs code))
1160 (if (encode-char char '=ucs 'defined-only)
1161 (insert-char-data-with-variant char nil 'no-ucs-unified
1162 script excluded-script))
1163 (setq code (1+ code)))))
1165 (defun write-char-range-data-to-file (min max file
1166 &optional script excluded-script)
1167 (let ((coding-system-for-write 'utf-8-mcs))
1169 (insert ";; -*- coding: utf-8-mcs -*-\n")
1170 (insert-char-range-data min max script excluded-script)
1171 (write-region (point-min)(point-max) file))))
1173 (defvar what-character-original-window-configuration)
1176 (defun what-char-definition (char)
1177 (interactive (list (char-after)))
1178 (let ((buf (get-buffer-create "*Character Description*"))
1179 (the-buf (current-buffer))
1180 (win-conf (current-window-configuration)))
1182 (make-local-variable 'what-character-original-window-configuration)
1183 (setq what-character-original-window-configuration win-conf)
1184 (setq buffer-read-only nil)
1188 (insert-char-data-with-variant char 'printable)
1189 (unless (char-attribute-alist char)
1190 (insert (format ";; = %c\n"
1191 (let* ((rest (split-char char))
1195 (setq code (logior (lsh code 8)
1197 (decode-char ccs code)))))
1198 ;; (char-db-update-comment)
1199 (set-buffer-modified-p nil)
1200 (view-mode the-buf (lambda (buf)
1201 (set-window-configuration
1202 what-character-original-window-configuration)
1204 (goto-char (point-min)))
1206 (set-window-configuration
1207 what-character-original-window-configuration)
1208 (signal (car err) (cdr err)))))))
1210 (provide 'char-db-util)
1212 ;;; char-db-util.el ends here