X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Futf-2000%2Fchar-db-util.el;h=907dff1f8ba2dbd31f7470a6443c445eab9e5003;hb=5ec3565dadfbfac8a809f588317728e9698bcf67;hp=e6d3b8132f89e824b82b49e6eeca7f4882c49c43;hpb=fccc97af2808c50fa982c76c03be661868adb6f8;p=chise%2Fxemacs-chise.git diff --git a/lisp/utf-2000/char-db-util.el b/lisp/utf-2000/char-db-util.el index e6d3b81..907dff1 100644 --- a/lisp/utf-2000/char-db-util.el +++ b/lisp/utf-2000/char-db-util.el @@ -1,9 +1,10 @@ -;;; char-db-util.el --- Character Database utility +;;; char-db-util.el --- Character Database utility -*- coding: utf-8-er; -*- -;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004 MORIOKA Tomohiko. +;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, +;; 2007, 2008, 2009 MORIOKA Tomohiko. ;; Author: MORIOKA Tomohiko -;; Keywords: CHISE, Character Database, ISO/IEC 10646, Unicode, UCS-4, MULE. +;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE. ;; This file is part of XEmacs CHISE. @@ -69,11 +70,51 @@ (setq i (1+ i))) v)) +(defun ideographic-radical (number) + (aref ideographic-radicals number)) + +(defconst shuowen-radicals + [?一 ?上 ?示 ?三 ?王 ?玉 ?玨 ?气 ?士 ?丨 ; 010 + ?屮 ?艸 ?蓐 ?茻 ?小 ?八 ?釆 ?半 ?牛 ?犛 ; 020 + ?告 ?口 ?凵 ?吅 ?哭 ?走 ?止 ?癶 ?步 ?此 ; 030 + ?正 ?是 ?辵 ?彳 ?廴 ?㢟 ?行 ?齒 ?牙 ?足 ; 040 + ?疋 ?品 ?龠 ?冊 ?㗊 ?舌 ?干 ?谷 ?只 ?㕯 ; 050 + ?句 ?丩 ?古 ?十 ?卅 ?言 ?誩 ?音 ?䇂 ?丵 ; 060 + ?菐 ?𠬞 ?廾 ?共 ?異 ?舁 ?𦥑 ?䢅 ?爨 ?革 ; 070 + ?鬲 ?䰜 ?爪 ?𠃨 ?鬥 ?又 ?𠂇 ?㕜 ?支 ?𦘒 ; 080 + ?聿 ?畫 ?隶 ?臤 ?臣 ?殳 ?殺 ?𠘧 ?寸 ?皮 ; 090 + ?㼱 ?攴 ?敎 ?卜 ?用 ?爻 ?㸚 ?𥄎 ?目 ?䀠 ; 100 + ?眉 ?盾 ?自 ?白 ?鼻 ?皕 ?習 ?羽 ?隹 ?奞 ; 110 + ?萑 ?𦫳 ?苜 ?羊 ?羴 ?瞿 ?雔 ?雥 ?鳥 ?烏 ; 120 + ?𠦒 ?冓 ?幺 ?𢆶 ?叀 ?玄 ?予 ?放 ?𠬪 ?𣦼 ; 130 + ?歺 ?死 ?冎 ?骨 ?肉 ?筋 ?刀 ?刃 ?㓞 ?丰 ; 140 + ?耒 ?𧢲 ?竹 ?箕 ?丌 ?左 ?工 ?㠭 ?巫 ?甘 ; 150 + ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?旨 ?喜 ?壴 ; 160 + ?鼓 ?豈 ?豆 ?豊 ?豐 ?䖒 ?虍 ?虎 ?虤 ?皿 ; 170 + ?𠙴 ?去 ?血 ?丶 ?丹 ?青 ?井 ?皀 ?鬯 ?食 ; 180 + ?亼 ?會 ?倉 ?入 ?缶 ?矢 ?高 ?冂 ?𩫏 ?京 ; 190 + ?亯 ?𣆪 ?畗 ?㐭 ?嗇 ?來 ?麥 ?夊 ?舛 ?䑞 ; 200 + ?韋 ?弟 ?夂 ?久 ?桀 ?木 ?東 ?林 ?才 ?叒 ; 210 + ?之 ?帀 ?出 ?𣎵 ?生 ?乇 ?𠂹 ?𠌶 ?華 ?𥝌 ; 220 + ?稽 ?巢 ?桼 ?束 ?㯻 ?囗 ?員 ?貝 ?邑 ?𨛜 ; 230 + ?日 ?旦 ?倝 ?㫃 ?冥 ?晶 ?月 ?有 ?明 ?囧 ; 240 + ?夕 ?多 ?毌 ?𢎘 ?𣐺 ?卣 ?齊 ?朿 ?片 ?鼎 ; 250 + ?克 ?彔 ?禾 ?秝 ?黍 ?香 ?米 ?毇 ?臼 ?凶 ; 260 + ?𣎳 ?林 ?麻 ?尗 ?耑 ?韭 ?瓜 ?瓠 ?宀 ?宮 ; 270 + ?呂 ?穴 ?㝱 ?𤕫 ?冖 ?𠔼 ?冃 ?㒳 ?网 ?襾 ; 280 + ?巾 ?巿 ?帛 ?白 ?㡀 ?黹 ?人 ?𠤎 ?匕 ?从 ; 290 + ]) + +(defun shuowen-radical (number) + (aref shuowen-radicals (1- number))) + +(defvar char-db-file-coding-system 'utf-8-mcs-er) + (defvar char-db-feature-domains - '(ucs daikanwa cns gt jis jis/alt jis/a jis/b - jis-x0213 misc unknown)) + '(ucs ucs/compat daikanwa cns gt jis jis/alt jis/a jis/b + jis-x0212 jis-x0213 cdp shinjigen misc unknown)) -(defvar char-db-ignored-attributes nil) +(defvar char-db-ignored-attributes '(ideographic-products)) (defun char-attribute-name< (ka kb) (cond @@ -85,25 +126,56 @@ nil) ((eq '->subsumptive ka) nil) + ((and (symbolp ka) + (string-match "^->" (symbol-name ka))) + (cond ((and (symbolp kb) + (string-match "^->" (symbol-name kb))) + (string< (symbol-name ka) + (symbol-name kb)) + )) + ) + ((and (symbolp kb) + (string-match "^->" (symbol-name kb))) + t) + ((and (symbolp ka) + (string-match "^<-" (symbol-name ka))) + (cond ((symbolp kb) + (cond ((string-match "^<-" (symbol-name kb)) + (string< (symbol-name ka) + (symbol-name kb)) + ) + ;; ((string-match "^->" (symbol-name kb)) + ;; t) + ))) + ) + ((and (symbolp kb) + (string-match "^<-" (symbol-name kb))) + t + ;; (not (string-match "^->" (symbol-name ka))) + ) ((find-charset ka) (if (find-charset kb) - (if (<= (charset-id ka) 0) - (if (<= (charset-id kb) 0) - (cond - ((= (charset-dimension ka) - (charset-dimension kb)) - (> (charset-id ka)(charset-id kb))) - (t - (> (charset-dimension ka) - (charset-dimension kb)) - )) - t) - (if (<= (charset-id kb) 0) - nil - (< (charset-id ka)(charset-id kb)))) - nil)) - ((find-charset kb) - t) + (let (a-ir b-ir) + (if (setq a-ir (charset-property ka 'iso-ir)) + (if (setq b-ir (charset-property kb 'iso-ir)) + (cond + ((= a-ir b-ir) + (< (charset-id ka)(charset-id kb)) + ) + ((= a-ir 177) + t) + ((= b-ir 177) + nil) + ((< a-ir + b-ir) + )) + t) + (if (charset-property kb 'iso-ir) + nil + (< (charset-id ka)(charset-id kb))))) + nil) + ) + ((find-charset kb)) ((symbolp ka) (cond ((symbolp kb) (string< (symbol-name ka) @@ -125,10 +197,11 @@ greek-iso8859-7 thai-tis620 =jis-x0208 - japanese-jisx0208 + =jis-x0208@1978 + =jis-x0208@1983 japanese-jisx0212 - japanese-jisx0208-1978 chinese-gb2312 + =jis-x0208@1990 chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3 @@ -136,7 +209,6 @@ chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7 - =jis-x0208-1990 =jis-x0213-1-2000 =jis-x0213-2-2000 korean-ksc5601 @@ -163,9 +235,14 @@ ideograph-hanziku-10 ideograph-hanziku-11 ideograph-hanziku-12 + =gt-k + =ucs@iso + =ucs@unicode =big5 =big5-eten - =gt-k + =jis-x0208@1997 + =zinbun-oracle + =ruimoku-v6 =jef-china3)) (defun char-db-make-char-spec (char) @@ -198,9 +275,13 @@ (if (and (or (charset-iso-final-char ccs) (memq ccs '(=daikanwa - =daikanwa-rev2 + =daikanwa@rev2 ;; =gt-k - ))) + =jis-x0208@1997 + )) + (string-match "=ucs@" (symbol-name ccs))) + (setq ccs (charset-name ccs)) + (null (assq ccs char-spec)) (setq ret (encode-char char ccs 'defined-only))) (setq char-spec (cons (cons ccs ret) char-spec)))) (if (null char-spec) @@ -211,6 +292,12 @@ ((setq ret (get-char-attribute char 'name*)) (setq char-spec (cons (cons 'name* ret) char-spec)) )) + ) + ((setq ret (get-char-attribute + char 'ideographic-combination)) + (setq char-spec + (cons (cons 'ideographic-combination ret) + char-spec)) )) char-spec) ((consp char) @@ -220,7 +307,7 @@ required-features) (unless column (setq column (current-column))) - (let (char-spec al cal key temp-char) + (let (char-spec temp-char) (setq char-spec (char-db-make-char-spec char)) (unless (or (characterp char) ; char (condition-case nil @@ -232,40 +319,10 @@ char-spec))) (remove-char-attribute temp-char 'ideograph-daikanwa) (setq char temp-char)) - ;; (setq al nil - ;; cal nil) - ;; (while char-spec - ;; (setq key (car (car char-spec))) - ;; (unless (memq key char-db-ignored-attributes) - ;; (if (find-charset key) - ;; (if (encode-char char key 'defined-only) - ;; (setq cal (cons key cal))) - ;; (setq al (cons key al)))) - ;; (setq char-spec (cdr char-spec))) - ;; (unless cal - ;; (setq char-spec (char-db-make-char-spec char)) - ;; (while char-spec - ;; (setq key (car (car char-spec))) - ;; (unless (memq key char-db-ignored-attributes) - ;; (if (find-charset key) - ;; (setq cal (cons key cal)) - ;; (setq al (cons key al)))) - ;; (setq char-spec (cdr char-spec))) - ;; ) - ;; (unless (or cal - ;; (memq 'ideographic-structure al)) - ;; (push 'ideographic-structure al)) - ;; (dolist (feature required-features) - ;; (if (find-charset feature) - ;; (if (encode-char char feature 'defined-only) - ;; (setq cal (adjoin feature cal))) - ;; (setq al (adjoin feature al)))) (insert-char-attributes char readable - ;; (or al 'none) cal - (union (mapcar #'car char-spec) - required-features) - ) + (union (mapcar #'car char-spec) + required-features)) (when temp-char ;; undefine temporary character ;; Current implementation is dirty. @@ -294,7 +351,8 @@ (error nil))) (progn (setq al nil - cal nil) + ;; cal nil + ) (while value (setq key (car (car value))) ;; (if (find-charset key) @@ -323,7 +381,8 @@ (progn (setq rest cell al nil - cal nil) + ;; cal nil + ) (while rest (setq key (car (car rest))) ;; (if (find-charset key) @@ -379,7 +438,7 @@ (insert (format "%s%s\t%d ; %c%s" separator name value - (aref ideographic-radicals value) + (ideographic-radical value) line-breaking)) (setq separator "")) (t @@ -418,17 +477,154 @@ (defvar char-db-convert-obsolete-format t) +(defun char-db-insert-ccs-feature (name value line-breaking) + (insert + (format + (cond ((memq name '(=shinjigen + =shinjigen@1ed + =shinjigen@rev =shinjigen/+p@rev)) + "(%-18s . %04d)\t; %c") + ((eq name '=shinjigen@1ed/24pr) + "(%-18s . %04d)\t; %c") + ((or (memq name '(=daikanwa + =daikanwa@rev1 =daikanwa@rev2 + =gt =gt-k =cbeta =zinbun-oracle)) + (string-match "^=adobe-" (symbol-name name))) + "(%-18s . %05d)\t; %c") + ((eq name 'mojikyo) + "(%-18s . %06d)\t; %c") + ((>= (charset-dimension name) 2) + "(%-18s . #x%04X)\t; %c") + (t + "(%-18s . #x%02X)\t; %c")) + name + (if (= (charset-iso-graphic-plane name) 1) + (logior value + (cond ((= (charset-dimension name) 1) + #x80) + ((= (charset-dimension name) 2) + #x8080) + ((= (charset-dimension name) 3) + #x808080) + (t 0))) + value) + (char-db-decode-isolated-char name value))) + (if (and (= (charset-chars name) 94) + (= (charset-dimension name) 2)) + (insert (format " [%02d-%02d]" + (- (lsh value -8) 32) + (- (logand value 255) 32)))) + (insert line-breaking)) + +(defun char-db-insert-relation-feature (char name value line-breaking + ccss readable) + (insert (format "(%-18s%s " name line-breaking)) + (let ((lbs (concat "\n" (make-string (current-column) ?\ ))) + separator cell sources required-features + ret) + (while (consp value) + (setq cell (car value)) + (if (integerp cell) + (setq cell (decode-char '=ucs cell))) + (cond + ((eq name '->subsumptive) + (when (or (not (some (lambda (atr) + (get-char-attribute cell atr)) + char-db-ignored-attributes)) + (some (lambda (ccs) + (encode-char cell ccs 'defined-only)) + ccss)) + (if separator + (insert lbs)) + (let ((char-db-ignored-attributes + (cons '<-subsumptive + char-db-ignored-attributes))) + (insert-char-attributes cell readable)) + (setq separator lbs)) + ) + ((characterp cell) + (setq sources + (get-char-attribute + char (intern (format "%s*sources" name)))) + (setq required-features nil) + (dolist (source sources) + (cond + ((memq source '(JP + JP/Jouyou + shinjigen shinjigen@1ed shinjigen@rev)) + (setq required-features + (union required-features + '(=jis-x0208 + =jis-x0208@1990 + =jis-x0213-1-2000 + =jis-x0213-2-2000 + =jis-x0212 + =jis-x0208@1983 + =jis-x0208@1978 + =shinjigen)))) + ((eq source 'CN) + (setq required-features + (union required-features + '(=gb2312 + =gb12345 + =iso-ir165))))) + (cond + ((find-charset (setq ret (intern (format "=%s" source)))) + (setq required-features + (cons ret required-features))) + (t (setq required-features + (cons source required-features))))) + (cond ((string-match "@JP" (symbol-name name)) + (setq required-features + (union required-features + '(=jis-x0208 + =jis-x0208@1990 + =jis-x0213-1-2000 + =jis-x0213-2-2000 + =jis-x0212 + =jis-x0208@1983 + =jis-x0208@1978)))) + ((string-match "@CN" (symbol-name name)) + (setq required-features + (union required-features + '(=gb2312 + =gb12345 + =iso-ir165))))) + (if separator + (insert lbs)) + (if readable + (insert (format "%S" cell)) + (char-db-insert-char-spec cell readable + nil + required-features)) + (setq separator lbs)) + ((consp cell) + (if separator + (insert lbs)) + (if (consp (car cell)) + (char-db-insert-char-spec cell readable) + (char-db-insert-char-reference cell readable)) + (setq separator lbs)) + (t + (if separator + (insert separator)) + (insert (prin1-to-string cell)) + (setq separator " "))) + (setq value (cdr value))) + (insert ")") + (insert line-breaking))) + (defun insert-char-attributes (char &optional readable attributes column) (unless column (setq column (current-column))) - (let (name value has-long-ccs-name rest + (let (name value ; has-long-ccs-name + rest radical strokes (line-breaking (concat "\n" (make-string (1+ column) ?\ ))) lbs cell separator ret key al cal - dest-ccss - sources required-features + dest-ccss ; sources required-features ccss) (let (atr-d) (setq attributes @@ -449,6 +645,19 @@ atr-d) #'char-attribute-name<))) (insert "(") + (when (memq '<-subsumptive attributes) + (when readable + (when (setq value (get-char-attribute char '<-subsumptive)) + (char-db-insert-relation-feature char '<-subsumptive value + line-breaking + ccss readable))) + (setq attributes (delq '<-subsumptive attributes))) + (when (and (memq '<-denotational attributes) + (setq value (get-char-attribute char '<-denotational))) + (char-db-insert-relation-feature char '<-denotational value + line-breaking + ccss readable) + (setq attributes (delq '<-denotational attributes))) (when (and (memq 'name attributes) (setq value (get-char-attribute char 'name))) (insert (format @@ -495,30 +704,6 @@ line-breaking)) (setq attributes (delq name attributes)) )) - ;; (dolist (name '(=>ucs-gb =>ucs-cns =>ucs-jis =>ucs-ks =>ucs-big5)) - ;; (when (and (memq name attributes) - ;; (setq value (get-char-attribute char name))) - ;; (insert (format "(%-18s . #x%04X)\t; %c%s" - ;; (intern - ;; (concat "=>ucs@" - ;; (substring (symbol-name name) 6))) - ;; value - ;; (decode-char (intern - ;; (concat "=ucs@" - ;; (substring - ;; (symbol-name name) 6))) - ;; value) - ;; line-breaking)) - ;; (setq attributes (delq name attributes)))) - ;; (when (and (memq '->ucs attributes) - ;; (setq value (get-char-attribute char '->ucs))) - ;; (insert (format (if char-db-convert-obsolete-format - ;; "(=>ucs\t\t. #x%04X)\t; %c%s" - ;; "(->ucs\t\t. #x%04X)\t; %c%s") - ;; value (decode-char '=ucs value) - ;; line-breaking)) - ;; (setq attributes (delq '->ucs attributes)) - ;; ) (dolist (name '(=>daikanwa)) (when (and (memq name attributes) (setq value (get-char-attribute char name))) @@ -623,12 +808,32 @@ (setq radical value) (insert (format "(ideographic-radical . %S)\t; %c%s" radical - (aref ideographic-radicals radical) + (ideographic-radical radical) line-breaking)) (setq attributes (delq 'ideographic-radical attributes)) ) + (when (and (memq 'shuowen-radical attributes) + (setq value (get-char-attribute char 'shuowen-radical))) + (insert (format "(shuowen-radical\t. %S)\t; %c%s" + value + (shuowen-radical value) + line-breaking)) + (setq attributes (delq 'shuowen-radical attributes)) + ) (let (key) - (dolist (domain char-db-feature-domains) + (dolist (domain + (append + char-db-feature-domains + (let (dest domain) + (dolist (feature (char-attribute-list)) + (setq feature (symbol-name feature)) + (when (string-match + "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)" + feature) + (setq domain (intern (match-string 2 feature))) + (unless (memq domain dest) + (setq dest (cons domain dest))))) + (sort dest #'string<)))) (setq key (intern (format "%s@%s" 'ideographic-radical domain))) (when (and (memq key attributes) (setq value (get-char-attribute char key))) @@ -636,7 +841,7 @@ (insert (format "(%s . %S)\t; %c%s" key radical - (aref ideographic-radicals radical) + (ideographic-radical radical) line-breaking)) (setq attributes (delq key attributes)) ) @@ -686,7 +891,7 @@ (unless (eq value radical) (insert (format "(kangxi-radical\t . %S)\t; %c%s" value - (aref ideographic-radicals value) + (ideographic-radical value) line-breaking)) (or radical (setq radical value))) @@ -707,7 +912,7 @@ (unless (eq value radical) (insert (format "(japanese-radical\t . %S)\t; %c%s" value - (aref ideographic-radicals value) + (ideographic-radical value) line-breaking)) (or radical (setq radical value))) @@ -727,7 +932,7 @@ (setq value (get-char-attribute char 'cns-radical))) (insert (format "(cns-radical\t . %S)\t; %c%s" value - (aref ideographic-radicals value) + (ideographic-radical value) line-breaking)) (setq attributes (delq 'cns-radical attributes)) ) @@ -746,7 +951,7 @@ (unless (eq value radical) (insert (format "(shinjigen-1-radical . %S)\t; %c%s" value - (aref ideographic-radicals value) + (ideographic-radical value) line-breaking)) (or radical (setq radical value))) @@ -806,26 +1011,26 @@ line-breaking)) (setq attributes (delq '->ideograph attributes)) ) - (when (and (memq '->decomposition attributes) - (setq value (get-char-attribute char '->decomposition))) - (insert (format "(->decomposition\t%s)%s" - (mapconcat (lambda (code) - (cond ((symbolp code) - (symbol-name code)) - ((characterp code) - (if readable - (format "%S" code) - (format "#x%04X" - (char-int code)) - )) - ((integerp code) - (format "#x%04X" code)) - (t - (format "%s%S" line-breaking code)))) - value " ") - line-breaking)) - (setq attributes (delq '->decomposition attributes)) - ) + ;; (when (and (memq '->decomposition attributes) + ;; (setq value (get-char-attribute char '->decomposition))) + ;; (insert (format "(->decomposition\t%s)%s" + ;; (mapconcat (lambda (code) + ;; (cond ((symbolp code) + ;; (symbol-name code)) + ;; ((characterp code) + ;; (if readable + ;; (format "%S" code) + ;; (format "#x%04X" + ;; (char-int code)) + ;; )) + ;; ((integerp code) + ;; (format "#x%04X" code)) + ;; (t + ;; (format "%s%S" line-breaking code)))) + ;; value " ") + ;; line-breaking)) + ;; (setq attributes (delq '->decomposition attributes)) + ;; ) (if (equal (get-char-attribute char '->titlecase) (get-char-attribute char '->uppercase)) (setq attributes (delq '->titlecase attributes))) @@ -856,20 +1061,9 @@ ) (unless readable (dolist (ignored '(composition - ->denotational <-subsumptive ->ucs-unified)) + ->denotational <-subsumptive ->ucs-unified + ->ideographic-component-forms)) (setq attributes (delq ignored attributes)))) - ;; (setq rest ccs-attributes) - ;; (while (and rest - ;; (progn - ;; (setq value (get-char-attribute char (car rest))) - ;; (if value - ;; (if (>= (length (symbol-name (car rest))) 19) - ;; (progn - ;; (setq has-long-ccs-name t) - ;; nil) - ;; t) - ;; t))) - ;; (setq rest (cdr rest))) (while attributes (setq name (car attributes)) (if (setq value (get-char-attribute char name)) @@ -879,39 +1073,7 @@ (prog1 (setq value (get-char-attribute char name)) (setq dest-ccss (cons name dest-ccss)))) - (insert - (format - (cond ((memq name '(=daikanwa - =daikanwa-rev1 =daikanwa-rev2 - =gt =gt-k =cbeta)) - (if has-long-ccs-name - "(%-26s . %05d)\t; %c%s" - "(%-18s . %05d)\t; %c%s")) - ((eq name 'mojikyo) - (if has-long-ccs-name - "(%-26s . %06d)\t; %c%s" - "(%-18s . %06d)\t; %c%s")) - ((>= (charset-dimension name) 2) - (if has-long-ccs-name - "(%-26s . #x%04X)\t; %c%s" - "(%-18s . #x%04X)\t; %c%s")) - (t - (if has-long-ccs-name - "(%-26s . #x%02X)\t; %c%s" - "(%-18s . #x%02X)\t; %c%s"))) - name - (if (= (charset-iso-graphic-plane name) 1) - (logior value - (cond ((= (charset-dimension name) 1) - #x80) - ((= (charset-dimension name) 2) - #x8080) - ((= (charset-dimension name) 3) - #x808080) - (t 0))) - value) - (char-db-decode-isolated-char name value) - line-breaking))) + (char-db-insert-ccs-feature name value line-breaking)) ) ((string-match "^=>ucs@" (symbol-name name)) (insert (format "(%-18s . #x%04X)\t; %c%s" @@ -923,84 +1085,54 @@ name value line-breaking)) ) - ((and (not readable) - (string-match "^->simplified" (symbol-name name))) + ((and + (not readable) + (not (eq name '->subsumptive)) + (not (eq name '->uppercase)) + (not (eq name '->lowercase)) + (not (eq name '->titlecase)) + (not (eq name '->canonical)) + (not (eq name '->Bopomofo)) + (not (eq name '->mistakable)) + (not (eq name '->ideographic-variants)) + (null (get-char-attribute + char (intern (format "%s*sources" name)))) + (not (string-match "\\*sources$" (symbol-name name))) + (null (get-char-attribute + char (intern (format "%s*note" name)))) + (not (string-match "\\*note$" (symbol-name name))) + (or (eq name '<-identical) + (eq name '<-uppercase) + (eq name '<-lowercase) + (eq name '<-titlecase) + (eq name '<-canonical) + (eq name '<-ideographic-variants) + ;; (eq name '<-synonyms) + (string-match "^<-synonyms" (symbol-name name)) + (eq name '<-mistakable) + (when (string-match "^->" (symbol-name name)) + (cond + ((string-match "^->fullwidth" (symbol-name name)) + (not (and (consp value) + (characterp (car value)) + (encode-char + (car value) '=ucs 'defined-only))) + ) + (t))) + )) ) ((or (eq name 'ideographic-structure) + (eq name 'ideographic-combination) (eq name 'ideographic-) - (string-match "^\\(->\\|<-\\)" (symbol-name name))) - (insert (format "(%-18s%s " name line-breaking)) - (setq lbs (concat "\n" (make-string (current-column) ?\ )) - separator nil) - (while (consp value) - (setq cell (car value)) - (if (integerp cell) - (setq cell (decode-char '=ucs cell))) - (cond ((eq name '->subsumptive) - (when (or (not - (some (lambda (atr) - (get-char-attribute cell atr)) - char-db-ignored-attributes)) - (some (lambda (ccs) - (encode-char cell ccs - 'defined-only)) - ccss)) - (if separator - (insert lbs)) - (let ((char-db-ignored-attributes - (cons '<-subsumptive - char-db-ignored-attributes))) - (insert-char-attributes cell readable)) - (setq separator lbs)) - ) - ((characterp cell) - (setq sources - (get-char-attribute - char - (intern (format "%s*sources" name)))) - (setq required-features nil) - (dolist (source sources) - (setq required-features - (cons - (if (find-charset - (setq ret (intern - (format "=%s" source)))) - ret - source) - required-features))) - (when (string-match "@JP" (symbol-name name)) - (setq required-features - (union required-features - '(=jis-x0208 - =jis-x0208-1990 - =jis-x0213-1-2000 - =jis-x0213-2-2000 - =jis-x0212 - =jis-x0208-1983 - =jis-x0208-1978)))) - (if separator - (insert lbs)) - (if readable - (insert (format "%S" cell)) - (char-db-insert-char-spec cell readable - nil - required-features)) - (setq separator lbs)) - ((consp cell) - (if separator - (insert lbs)) - (if (consp (car cell)) - (char-db-insert-char-spec cell readable) - (char-db-insert-char-reference cell readable)) - (setq separator lbs)) - (t - (if separator - (insert separator)) - (insert (prin1-to-string cell)) - (setq separator " "))) - (setq value (cdr value))) - (insert ")") - (insert line-breaking)) + (eq name '=decomposition) + (string-match "^=>decomposition" (symbol-name name)) + (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name name)) + (string-match "^\\(->\\|<-\\)[^*]*\\*sources$" + (symbol-name name)) + ) + (char-db-insert-relation-feature char name value + line-breaking + ccss readable)) ((memq name '(ideograph= original-ideograph-of ancient-ideograph-of @@ -1070,51 +1202,20 @@ (insert ")") (insert line-breaking)) (t - (insert (format "(%-18s . %S)%s" - name value - line-breaking))) + (insert (format "(%-18s" name)) + (setq ret (prin1-to-string value)) + (unless (< (+ (current-column) + (length ret) + 3) + 76) + (insert line-breaking)) + (insert " . " ret ")" line-breaking) + ;; (insert (format "(%-18s . %S)%s" + ;; name value + ;; line-breaking)) + ) )) (setq attributes (cdr attributes))) - ;; (while ccs-attributes - ;; (setq name (charset-name (car ccs-attributes))) - ;; (if (and (not (memq name dest-ccss)) - ;; (prog1 - ;; (setq value (get-char-attribute char name)) - ;; (setq dest-ccss (cons name dest-ccss)))) - ;; (insert - ;; (format - ;; (cond ((memq name '(=daikanwa - ;; =daikanwa-rev1 =daikanwa-rev2 - ;; =gt =gt-k =cbeta)) - ;; (if has-long-ccs-name - ;; "(%-26s . %05d)\t; %c%s" - ;; "(%-18s . %05d)\t; %c%s")) - ;; ((eq name 'mojikyo) - ;; (if has-long-ccs-name - ;; "(%-26s . %06d)\t; %c%s" - ;; "(%-18s . %06d)\t; %c%s")) - ;; ((>= (charset-dimension name) 2) - ;; (if has-long-ccs-name - ;; "(%-26s . #x%04X)\t; %c%s" - ;; "(%-18s . #x%04X)\t; %c%s")) - ;; (t - ;; (if has-long-ccs-name - ;; "(%-26s . #x%02X)\t; %c%s" - ;; "(%-18s . #x%02X)\t; %c%s"))) - ;; name - ;; (if (= (charset-iso-graphic-plane name) 1) - ;; (logior value - ;; (cond ((= (charset-dimension name) 1) - ;; #x80) - ;; ((= (charset-dimension name) 2) - ;; #x8080) - ;; ((= (charset-dimension name) 3) - ;; #x808080) - ;; (t 0))) - ;; value) - ;; (char-db-decode-isolated-char name value) - ;; line-breaking))) - ;; (setq ccs-attributes (cdr ccs-attributes))) (insert ")"))) (defun insert-char-data (char &optional readable @@ -1143,24 +1244,30 @@ no-ucs-unified script excluded-script) (insert-char-data char printable) - (let ((variants (or (char-variants char) - (let ((ucs (get-char-attribute char '->ucs))) - (if ucs - (delete char (char-variants (int-char ucs))))))) - variant vs) + (let ((variants (char-variants char)) + rest + variant vs ret) (setq variants (sort variants #'<)) - (while variants - (setq variant (car variants)) - (if (and (or (null script) - (null (setq vs (get-char-attribute variant 'script))) - (memq script vs)) - (or (null excluded-script) - (null (setq vs (get-char-attribute variant 'script))) - (not (memq excluded-script vs)))) - (or (and no-ucs-unified (get-char-attribute variant '=ucs)) - (insert-char-data variant printable))) - (setq variants (cdr variants)) - ))) + (setq rest variants) + (setq variants (cons char variants)) + (while rest + (setq variant (car rest)) + (unless (get-char-attribute variant '<-subsumptive) + (if (and (or (null script) + (null (setq vs (get-char-attribute variant 'script))) + (memq script vs)) + (or (null excluded-script) + (null (setq vs (get-char-attribute variant 'script))) + (not (memq excluded-script vs)))) + (unless (and no-ucs-unified (get-char-attribute variant '=ucs)) + (insert-char-data variant printable) + (if (setq ret (char-variants variant)) + (while ret + (or (memq (car ret) variants) + ;; (get-char-attribute (car ret) '<-subsumptive) + (setq rest (nconc rest (list (car ret))))) + (setq ret (cdr ret))))))) + (setq rest (cdr rest))))) (defun insert-char-range-data (min max &optional script excluded-script) (let ((code min) @@ -1174,9 +1281,10 @@ (defun write-char-range-data-to-file (min max file &optional script excluded-script) - (let ((coding-system-for-write 'utf-8-mcs)) + (let ((coding-system-for-write char-db-file-coding-system)) (with-temp-buffer - (insert ";; -*- coding: utf-8-mcs -*-\n") + (insert (format ";; -*- coding: %s -*-\n" + char-db-file-coding-system)) (insert-char-range-data min max script excluded-script) (write-region (point-min)(point-max) file))))