X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Futf-2000%2Fchar-db-util.el;h=17049aa0e7004b2a6b43e825877eb51aa3709905;hb=84b3a55c297a508fba10c9d8e9a29a10f3827317;hp=45d4fa13818f7b96d6a0d2ae87e617e4677c69f8;hpb=b967dbbbd1be53c8ebe0f79baad1a19355d22a22;p=chise%2Fxemacs-chise.git diff --git a/lisp/utf-2000/char-db-util.el b/lisp/utf-2000/char-db-util.el index 45d4fa1..17049aa 100644 --- a/lisp/utf-2000/char-db-util.el +++ b/lisp/utf-2000/char-db-util.el @@ -1,9 +1,9 @@ -;;; char-db-util.el --- Character Database utility +;;; char-db-util.el --- Character Database utility -*- coding: utf-8-er; -*- -;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004 MORIOKA Tomohiko. +;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004,2005 MORIOKA Tomohiko. ;; Author: MORIOKA Tomohiko -;; Keywords: CHISE, Character Database, ISO/IEC 10646, Unicode, UCS-4, MULE. +;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE. ;; This file is part of XEmacs CHISE. @@ -69,13 +69,22 @@ (setq i (1+ i))) v)) +(defun ideographic-radical (number) + (aref ideographic-radicals number)) + +(defconst shuowen-radicals + [?一 ?上 ?示 ?三 ?王 ?玉 ?玨 ?气 ?士 ?丨 ?屮 ?艸 ?茻]) + +(defun shuowen-radical (number) + (aref shuowen-radicals (1- number))) + (defvar char-db-file-coding-system 'utf-8-mcs-er) (defvar char-db-feature-domains '(ucs daikanwa cns gt jis jis/alt jis/a jis/b jis-x0212 jis-x0213 cdp shinjigen misc unknown)) -(defvar char-db-ignored-attributes nil) +(defvar char-db-ignored-attributes '(ideographic-products)) (defun char-attribute-name< (ka kb) (cond @@ -87,6 +96,17 @@ nil) ((eq '->subsumptive ka) nil) + ((and (symbolp ka) + (string-match "^->" (symbol-name ka))) + (cond ((and (symbolp kb) + (string-match "^->" (symbol-name kb))) + (string< (symbol-name ka) + (symbol-name kb)) + )) + ) + ((and (symbolp kb) + (string-match "^->" (symbol-name kb))) + t) ((find-charset ka) (if (find-charset kb) (if (<= (charset-id ka) 1) @@ -127,9 +147,9 @@ greek-iso8859-7 thai-tis620 =jis-x0208 - japanese-jisx0208 + =jis-x0208@1978 + =jis-x0208@1983 japanese-jisx0212 - japanese-jisx0208-1978 chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2 @@ -138,7 +158,7 @@ chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7 - =jis-x0208-1990 + =jis-x0208@1990 =jis-x0213-1-2000 =jis-x0213-2-2000 korean-ksc5601 @@ -165,10 +185,13 @@ ideograph-hanziku-10 ideograph-hanziku-11 ideograph-hanziku-12 + =gt-k + =ucs@iso + =ucs@unicode =big5 =big5-eten - =gt-k =jis-x0208@1997 + =zinbun-oracle =jef-china3)) (defun char-db-make-char-spec (char) @@ -204,6 +227,8 @@ =daikanwa@rev2 ;; =gt-k ))) + (setq ccs (charset-name ccs)) + (null (assq ccs char-spec)) (setq ret (encode-char char ccs 'defined-only))) (setq char-spec (cons (cons ccs ret) char-spec)))) (if (null char-spec) @@ -354,7 +379,7 @@ (insert (format "%s%s\t%d ; %c%s" separator name value - (aref ideographic-radicals value) + (ideographic-radical value) line-breaking)) (setq separator "")) (t @@ -398,7 +423,7 @@ (format (cond ((memq name '(=daikanwa =daikanwa@rev1 =daikanwa@rev2 - =gt =gt-k =cbeta)) + =gt =gt-k =cbeta =zinbun-oracle)) "(%-18s . %05d)\t; %c") ((eq name 'mojikyo) "(%-18s . %06d)\t; %c") @@ -425,6 +450,101 @@ (- (logand value 255) 32)))) (insert line-breaking)) +(defun char-db-insert-relation-feature (char name value line-breaking + ccss readable) + (insert (format "(%-18s%s " name line-breaking)) + (let ((lbs (concat "\n" (make-string (current-column) ?\ ))) + separator cell sources required-features + ret) + (while (consp value) + (setq cell (car value)) + (if (integerp cell) + (setq cell (decode-char '=ucs cell))) + (cond + ((eq name '->subsumptive) + (when (or (not (some (lambda (atr) + (get-char-attribute cell atr)) + char-db-ignored-attributes)) + (some (lambda (ccs) + (encode-char cell ccs 'defined-only)) + ccss)) + (if separator + (insert lbs)) + (let ((char-db-ignored-attributes + (cons '<-subsumptive + char-db-ignored-attributes))) + (insert-char-attributes cell readable)) + (setq separator lbs)) + ) + ((characterp cell) + (setq sources + (get-char-attribute + char (intern (format "%s*sources" name)))) + (setq required-features nil) + (dolist (source sources) + (cond + ((memq source '(JP JP/Jouyou shinjigen-1)) + (setq required-features + (union required-features + '(=jis-x0208 + =jis-x0208@1990 + =jis-x0213-1-2000 + =jis-x0213-2-2000 + =jis-x0212 + =jis-x0208@1983 + =jis-x0208@1978)))) + ((eq source 'CN) + (setq required-features + (union required-features + '(=gb2312 + =gb12345 + =iso-ir165))))) + (cond + ((find-charset (setq ret (intern (format "=%s" source)))) + (setq required-features + (cons ret required-features))) + (t (setq required-features + (cons source required-features))))) + (cond ((string-match "@JP" (symbol-name name)) + (setq required-features + (union required-features + '(=jis-x0208 + =jis-x0208@1990 + =jis-x0213-1-2000 + =jis-x0213-2-2000 + =jis-x0212 + =jis-x0208@1983 + =jis-x0208@1978)))) + ((string-match "@CN" (symbol-name name)) + (setq required-features + (union required-features + '(=gb2312 + =gb12345 + =iso-ir165))))) + (if separator + (insert lbs)) + (if readable + (insert (format "%S" cell)) + (char-db-insert-char-spec cell readable + nil + required-features)) + (setq separator lbs)) + ((consp cell) + (if separator + (insert lbs)) + (if (consp (car cell)) + (char-db-insert-char-spec cell readable) + (char-db-insert-char-reference cell readable)) + (setq separator lbs)) + (t + (if separator + (insert separator)) + (insert (prin1-to-string cell)) + (setq separator " "))) + (setq value (cdr value))) + (insert ")") + (insert line-breaking))) + (defun insert-char-attributes (char &optional readable attributes column) (unless column (setq column (current-column))) @@ -435,8 +555,7 @@ (concat "\n" (make-string (1+ column) ?\ ))) lbs cell separator ret key al cal - dest-ccss - sources required-features + dest-ccss ; sources required-features ccss) (let (atr-d) (setq attributes @@ -457,6 +576,19 @@ atr-d) #'char-attribute-name<))) (insert "(") + (when (memq '<-subsumptive attributes) + (when readable + (when (setq value (get-char-attribute char '<-subsumptive)) + (char-db-insert-relation-feature char '<-subsumptive value + line-breaking + ccss readable))) + (setq attributes (delq '<-subsumptive attributes))) + (when (and (memq '<-denotational attributes) + (setq value (get-char-attribute char '<-denotational))) + (char-db-insert-relation-feature char '<-denotational value + line-breaking + ccss readable) + (setq attributes (delq '<-denotational attributes))) (when (and (memq 'name attributes) (setq value (get-char-attribute char 'name))) (insert (format @@ -483,14 +615,6 @@ line-breaking)) (setq attributes (delq 'script attributes)) ) - ;; (when (and (memq '<-denotational attributes) - ;; (setq value (get-char-attribute char '<-denotational)) - ;; (null (cdr value)) - ;; (setq value (encode-char (car value) 'ucs 'defined-only))) - ;; (insert (format "(%-18s . #x%04X)\t; %c%s" - ;; '=>ucs value (decode-char 'ucs value) - ;; line-breaking)) - ;; (setq attributes (delq '<-denotational attributes))) (dolist (name '(=>ucs =>ucs*)) (when (and (memq name attributes) (setq value (get-char-attribute char name))) @@ -615,12 +739,32 @@ (setq radical value) (insert (format "(ideographic-radical . %S)\t; %c%s" radical - (aref ideographic-radicals radical) + (ideographic-radical radical) line-breaking)) (setq attributes (delq 'ideographic-radical attributes)) ) + (when (and (memq 'shuowen-radical attributes) + (setq value (get-char-attribute char 'shuowen-radical))) + (insert (format "(shuowen-radical\t. %S)\t; %c%s" + value + (shuowen-radical value) + line-breaking)) + (setq attributes (delq 'shuowen-radical attributes)) + ) (let (key) - (dolist (domain char-db-feature-domains) + (dolist (domain + (append + char-db-feature-domains + (let (dest domain) + (dolist (feature (char-attribute-list)) + (setq feature (symbol-name feature)) + (when (string-match + "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)" + feature) + (setq domain (intern (match-string 2 feature))) + (unless (memq domain dest) + (setq dest (cons domain dest))))) + (sort dest #'string<)))) (setq key (intern (format "%s@%s" 'ideographic-radical domain))) (when (and (memq key attributes) (setq value (get-char-attribute char key))) @@ -628,7 +772,7 @@ (insert (format "(%s . %S)\t; %c%s" key radical - (aref ideographic-radicals radical) + (ideographic-radical radical) line-breaking)) (setq attributes (delq key attributes)) ) @@ -678,7 +822,7 @@ (unless (eq value radical) (insert (format "(kangxi-radical\t . %S)\t; %c%s" value - (aref ideographic-radicals value) + (ideographic-radical value) line-breaking)) (or radical (setq radical value))) @@ -699,7 +843,7 @@ (unless (eq value radical) (insert (format "(japanese-radical\t . %S)\t; %c%s" value - (aref ideographic-radicals value) + (ideographic-radical value) line-breaking)) (or radical (setq radical value))) @@ -719,7 +863,7 @@ (setq value (get-char-attribute char 'cns-radical))) (insert (format "(cns-radical\t . %S)\t; %c%s" value - (aref ideographic-radicals value) + (ideographic-radical value) line-breaking)) (setq attributes (delq 'cns-radical attributes)) ) @@ -738,7 +882,7 @@ (unless (eq value radical) (insert (format "(shinjigen-1-radical . %S)\t; %c%s" value - (aref ideographic-radicals value) + (ideographic-radical value) line-breaking)) (or radical (setq radical value))) @@ -798,26 +942,26 @@ line-breaking)) (setq attributes (delq '->ideograph attributes)) ) - (when (and (memq '->decomposition attributes) - (setq value (get-char-attribute char '->decomposition))) - (insert (format "(->decomposition\t%s)%s" - (mapconcat (lambda (code) - (cond ((symbolp code) - (symbol-name code)) - ((characterp code) - (if readable - (format "%S" code) - (format "#x%04X" - (char-int code)) - )) - ((integerp code) - (format "#x%04X" code)) - (t - (format "%s%S" line-breaking code)))) - value " ") - line-breaking)) - (setq attributes (delq '->decomposition attributes)) - ) + ;; (when (and (memq '->decomposition attributes) + ;; (setq value (get-char-attribute char '->decomposition))) + ;; (insert (format "(->decomposition\t%s)%s" + ;; (mapconcat (lambda (code) + ;; (cond ((symbolp code) + ;; (symbol-name code)) + ;; ((characterp code) + ;; (if readable + ;; (format "%S" code) + ;; (format "#x%04X" + ;; (char-int code)) + ;; )) + ;; ((integerp code) + ;; (format "#x%04X" code)) + ;; (t + ;; (format "%s%S" line-breaking code)))) + ;; value " ") + ;; line-breaking)) + ;; (setq attributes (delq '->decomposition attributes)) + ;; ) (if (equal (get-char-attribute char '->titlecase) (get-char-attribute char '->uppercase)) (setq attributes (delq '->titlecase attributes))) @@ -878,6 +1022,12 @@ (intern (format "%s*sources" name)))) (not (string-match "\\*sources$" (symbol-name name))) (or (eq name '<-identical) + (eq name '<-canonical) + (eq name '->superscript) + (eq name '->subscript) + (eq name '->circled) + (string-match "^->font" (symbol-name name)) + (string-match "^->compat" (symbol-name name)) (string-match "^->halfwidth" (symbol-name name)) (and (string-match "^->fullwidth" (symbol-name name)) @@ -890,108 +1040,21 @@ (string-match "^->vulgar" (symbol-name name)) (string-match "^->wrong" (symbol-name name)) (string-match "^->same" (symbol-name name)) + (string-match "^->formed" (symbol-name name)) (string-match "^->original" (symbol-name name)) (string-match "^->ancient" (symbol-name name)) + (string-match "^->Oracle-Bones" (symbol-name name)) )) ) ((or (eq name 'ideographic-structure) + (eq name 'ideographic-combination) (eq name 'ideographic-) + (eq name '=decomposition) + (string-match "^=>decomposition" (symbol-name name)) (string-match "^\\(->\\|<-\\)" (symbol-name name))) - (insert (format "(%-18s%s " name line-breaking)) - (setq lbs (concat "\n" (make-string (current-column) ?\ )) - separator nil) - (while (consp value) - (setq cell (car value)) - (if (integerp cell) - (setq cell (decode-char '=ucs cell))) - (cond ((eq name '->subsumptive) - (when (or (not - (some (lambda (atr) - (get-char-attribute cell atr)) - char-db-ignored-attributes)) - (some (lambda (ccs) - (encode-char cell ccs - 'defined-only)) - ccss)) - (if separator - (insert lbs)) - (let ((char-db-ignored-attributes - (cons '<-subsumptive - char-db-ignored-attributes))) - (insert-char-attributes cell readable)) - (setq separator lbs)) - ) - ((characterp cell) - (setq sources - (get-char-attribute - char - (intern (format "%s*sources" name)))) - (setq required-features nil) - (dolist (source sources) - (cond - ((memq source '(JP JP/Jouyou - shinjigen-1)) - (setq required-features - (union required-features - '(=jis-x0208 - =jis-x0208@1990 - =jis-x0213-1-2000 - =jis-x0213-2-2000 - =jis-x0212 - =jis-x0208@1983 - =jis-x0208@1978)))) - ((eq source 'CN) - (setq required-features - (union required-features - '(=gb2312 - =gb12345 - =iso-ir165))))) - (cond - ((find-charset - (setq ret (intern (format "=%s" source)))) - (setq required-features - (cons ret required-features))) - (t (setq required-features - (cons source required-features))))) - (cond ((string-match "@JP" (symbol-name name)) - (setq required-features - (union required-features - '(=jis-x0208 - =jis-x0208@1990 - =jis-x0213-1-2000 - =jis-x0213-2-2000 - =jis-x0212 - =jis-x0208@1983 - =jis-x0208@1978)))) - ((string-match "@CN" (symbol-name name)) - (setq required-features - (union required-features - '(=gb2312 - =gb12345 - =iso-ir165))))) - (if separator - (insert lbs)) - (if readable - (insert (format "%S" cell)) - (char-db-insert-char-spec cell readable - nil - required-features)) - (setq separator lbs)) - ((consp cell) - (if separator - (insert lbs)) - (if (consp (car cell)) - (char-db-insert-char-spec cell readable) - (char-db-insert-char-reference cell readable)) - (setq separator lbs)) - (t - (if separator - (insert separator)) - (insert (prin1-to-string cell)) - (setq separator " "))) - (setq value (cdr value))) - (insert ")") - (insert line-breaking)) + (char-db-insert-relation-feature char name value + line-breaking + ccss readable)) ((memq name '(ideograph= original-ideograph-of ancient-ideograph-of @@ -1094,24 +1157,30 @@ no-ucs-unified script excluded-script) (insert-char-data char printable) - (let ((variants (or (char-variants char) - (let ((ucs (get-char-attribute char '->ucs))) - (if ucs - (delete char (char-variants (int-char ucs))))))) - variant vs) + (let ((variants (char-variants char)) + rest + variant vs ret) (setq variants (sort variants #'<)) - (while variants - (setq variant (car variants)) - (if (and (or (null script) - (null (setq vs (get-char-attribute variant 'script))) - (memq script vs)) - (or (null excluded-script) - (null (setq vs (get-char-attribute variant 'script))) - (not (memq excluded-script vs)))) - (or (and no-ucs-unified (get-char-attribute variant '=ucs)) - (insert-char-data variant printable))) - (setq variants (cdr variants)) - ))) + (setq rest variants) + (setq variants (cons char variants)) + (while rest + (setq variant (car rest)) + (unless (get-char-attribute variant '<-subsumptive) + (if (and (or (null script) + (null (setq vs (get-char-attribute variant 'script))) + (memq script vs)) + (or (null excluded-script) + (null (setq vs (get-char-attribute variant 'script))) + (not (memq excluded-script vs)))) + (unless (and no-ucs-unified (get-char-attribute variant '=ucs)) + (insert-char-data variant printable) + (if (setq ret (char-variants variant)) + (while ret + (or (memq (car ret) variants) + ;; (get-char-attribute (car ret) '<-subsumptive) + (setq rest (nconc rest (list (car ret))))) + (setq ret (cdr ret))))))) + (setq rest (cdr rest))))) (defun insert-char-range-data (min max &optional script excluded-script) (let ((code min)