-;;; char-db-util.el --- Character Database utility
+;;; char-db-util.el --- Character Database utility -*- coding: utf-8-er; -*-
-;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004 MORIOKA Tomohiko.
+;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+;; 2007, 2008, 2009, 2010, 2011 MORIOKA Tomohiko.
;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
-;; Keywords: CHISE, Character Database, ISO/IEC 10646, Unicode, UCS-4, MULE.
+;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
;; This file is part of XEmacs CHISE.
;;; Code:
-(require 'alist)
+(require 'chise-subr)
+(require 'ideograph-subr)
(defconst unidata-normative-category-alist
'(("Lu" letter uppercase)
("So" symbol other)
))
-(defconst ideographic-radicals
- (let ((v (make-vector 215 nil))
- (i 1))
- (while (< i 215)
- (aset v i (decode-char '=ucs (+ #x2EFF i)))
- (setq i (1+ i)))
- v))
+(defconst shuowen-radicals
+ [?一 ?上 ?示 ?三 ?王 ?玉 ?玨 ?气 ?士 ?丨 ; 010
+ ?屮 ?艸 ?蓐 ?茻 ?小 ?八 ?釆 ?半 ?牛 ?犛 ; 020
+ ?告 ?口 ?凵 ?吅 ?哭 ?走 ?止 ?癶 ?步 ?此 ; 030
+ ?正 ?是 ?辵 ?彳 ?廴 ?㢟 ?行 ?齒 ?牙 ?足 ; 040
+ ?疋 ?品 ?龠 ?冊 ?㗊 ?舌 ?干 ?谷 ?只 ?㕯 ; 050
+ ?句 ?丩 ?古 ?十 ?卅 ?言 ?誩 ?音 ?䇂 ?丵 ; 060
+ ?菐 ?𠬞 ?𠬜 ?共 ?異 ?舁 ?𦥑 ?䢅 ?爨 ?革 ; 070
+ ?鬲 ?䰜 ?爪 ?𠃨 ?鬥 ?又 ?𠂇 ?㕜 ?支 ?𦘒 ; 080
+ ?聿 ?畫 ?隶 ?臤 ?臣 ?殳 ?殺 ?𠘧 ?寸 ?皮 ; 090
+ ?㼱 ?攴 ?敎 ?卜 ?用 ?爻 ?㸚 ?𥄎 ?目 ?䀠 ; 100
+ ?眉 ?盾 ?自 ?白 ?鼻 ?皕 ?習 ?羽 ?隹 ?奞 ; 110
+ ?萑 ?𦫳 ?苜 ?羊 ?羴 ?瞿 ?雔 ?雥 ?鳥 ?烏 ; 120
+ ?𠦒 ?冓 ?幺 ?𢆶 ?叀 ?玄 ?予 ?放 ?𠬪 ?𣦼 ; 130
+ ?歺 ?死 ?冎 ?骨 ?肉 ?筋 ?刀 ?刃 ?㓞 ?丰 ; 140
+ ?耒 ?𧢲 ?竹 ?箕 ?丌 ?左 ?工 ?㠭 ?巫 ?甘 ; 150
+ ?旨 ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?喜 ?壴 ; 160
+ ?鼓 ?豈 ?豆 ?豊 ?豐 ?䖒 ?虍 ?虎 ?虤 ?皿 ; 170
+ ?𠙴 ?去 ?血 ?丶 ?丹 ?青 ?井 ?皀 ?鬯 ?食 ; 180
+ ?亼 ?會 ?倉 ?入 ?缶 ?矢 ?高 ?冂 ?𩫏 ?京 ; 190
+ ?亯 ?𣆪 ?畗 ?㐭 ?嗇 ?來 ?麥 ?夊 ?舛 ?䑞 ; 200
+ ?韋 ?弟 ?夂 ?久 ?桀 ?木 ?東 ?林 ?才 ?叒 ; 210
+ ?之 ?帀 ?出 ?𣎵 ?生 ?乇 ?𠂹 ?𠌶 ?華 ?𥝌 ; 220
+ ?稽 ?巢 ?桼 ?束 ?㯻 ?囗 ?員 ?貝 ?邑 ?𨛜 ; 230
+ ?日 ?旦 ?倝 ?㫃 ?冥 ?晶 ?月 ?有 ?明 ?囧 ; 240
+ ?夕 ?多 ?毌 ?𢎘 ?𣐺 ?卣 ?齊 ?朿 ?片 ?鼎 ; 250
+ ?克 ?彔 ?禾 ?秝 ?黍 ?香 ?米 ?毇 ?臼 ?凶 ; 260
+ ?𣎳 ?林 ?麻 ?尗 ?耑 ?韭 ?瓜 ?瓠 ?宀 ?宮 ; 270
+ ?呂 ?穴 ?㝱 ?𤕫 ?冖 ?𠔼 ?冃 ?㒳 ?网 ?襾 ; 280
+ ?巾 ?巿 ?帛 ?白 ?㡀 ?黹 ?人 ?𠤎 ?匕 ?从 ; 290
+ ])
-(defvar char-db-file-coding-system 'utf-8-mcs-er)
-
-(defvar char-db-feature-domains
- '(ucs daikanwa cns gt jis jis/alt jis/a jis/b
- jis-x0213 misc unknown))
+(defun shuowen-radical (number)
+ (aref shuowen-radicals (1- number)))
-(defvar char-db-ignored-attributes nil)
+(defvar char-db-file-coding-system 'utf-8-mcs-er)
-(defun char-attribute-name< (ka kb)
- (cond
- ((eq '->denotational kb)
- t)
- ((eq '->subsumptive kb)
- (not (eq '->denotational ka)))
- ((eq '->denotational ka)
- nil)
- ((eq '->subsumptive ka)
- nil)
- ((find-charset ka)
- (if (find-charset kb)
- (if (<= (charset-id ka) 1)
- (if (<= (charset-id kb) 1)
- (cond
- ((= (charset-dimension ka)
- (charset-dimension kb))
- (> (charset-id ka)(charset-id kb)))
- (t
- (> (charset-dimension ka)
- (charset-dimension kb))
- ))
- t)
- (if (<= (charset-id kb) 1)
- nil
- (< (charset-id ka)(charset-id kb))))
- nil))
- ((find-charset kb)
- t)
- ((symbolp ka)
- (cond ((symbolp kb)
- (string< (symbol-name ka)
- (symbol-name kb)))
- (t)))
- ((symbolp kb)
- nil)))
+(defvar char-db-ignored-attributes '(ideographic-products))
(defvar char-db-coded-charset-priority-list
'(ascii
greek-iso8859-7
thai-tis620
=jis-x0208
- japanese-jisx0208
+ =jis-x0208@1978
+ =jis-x0208@1983
japanese-jisx0212
- japanese-jisx0208-1978
chinese-gb2312
+ =jis-x0208@1990
chinese-cns11643-1
chinese-cns11643-2
chinese-cns11643-3
chinese-cns11643-5
chinese-cns11643-6
chinese-cns11643-7
- =jis-x0208-1990
- =jis-x0213-1-2000
- =jis-x0213-2-2000
+ =jis-x0213-1
+ =jis-x0213-1@2000
+ =jis-x0213-1@2004
+ =jis-x0213-2
korean-ksc5601
chinese-isoir165
katakana-jisx0201
ethiopic-ucs
=big5-cdp
=gt
- ideograph-daikanwa-2
- ideograph-daikanwa
+ =adobe-japan1-0
+ =adobe-japan1-1
+ =adobe-japan1-2
+ =adobe-japan1-3
+ =adobe-japan1-4
+ =adobe-japan1-5
+ =adobe-japan1-6
+ =hanyo-denshi/ja
+ =hanyo-denshi/jb
+ =hanyo-denshi/jc
+ =hanyo-denshi/jd
+ =hanyo-denshi/ft
+ =hanyo-denshi/ia
+ =hanyo-denshi/ib
+ =hanyo-denshi/hg
+ =hanyo-denshi/jt
+ =hanyo-denshi/ks
+ =daikanwa
+ =daikanwa@rev2
+ =daikanwa@rev1
=cbeta
+ =gt-k
ideograph-hanziku-1
ideograph-hanziku-2
ideograph-hanziku-3
ideograph-hanziku-10
ideograph-hanziku-11
ideograph-hanziku-12
+ =>>>jis-x0208
+ =>>>jis-x0213-1
+ =>>>jis-x0213-2
+ =>>jis-x0208
+ =>>jis-x0213-1
+ =>>jis-x0213-1@2000
+ =>>jis-x0213-1@2004
+ =>>jis-x0213-2
+ =>>jis-x0208@1978
+ =>>hanyo-denshi/ft
+ =>>hanyo-denshi/ks
+ =>>gt
+ =>>daikanwa
+ =+>jis-x0208
+ =+>jis-x0213-1
+ =+>jis-x0213-2
+ =+>jis-x0208@1978
+ =>jis-x0208
+ =>jis-x0208@1997
+ =>jis-x0213-1
+ =>jis-x0213-1@2000
+ =>jis-x0213-1@2004
+ =>jis-x0213-2
+ ==>ucs@bucs
+ =>ucs@iso
+ =>ucs@unicode
+ =>ucs@jis
+ =>ucs@cns
+ =>ucs@ks
+ =+>ucs@unicode
+ =+>ucs@jis
+ =+>ucs@jis/1990
+ =+>ucs@cns
+ =+>ucs@ks
+ =>>ucs@iso
+ =>>ucs@unicode
+ =>>ucs@jis
+ =>>ucs@cns
+ =>>>ucs@iso
+ =>>>ucs@unicode
+ =ucs@iso
+ =ucs@unicode
+ =>>big5-cdp
+ =>>gt-k
+ =+>gt
+ =>gt
+ =>big5-cdp
+ =>daikanwa
=big5
=big5-eten
- =gt-k
- =jef-china3))
+ =>gt-k
+ =zinbun-oracle
+ =>zinbun-oracle
+ =ruimoku-v6
+ =>>ruimoku-v6
+ =jef-china3
+ =shinjigen))
+
+
+;;; @ char-db formatters
+;;;
(defun char-db-make-char-spec (char)
(let (ret char-spec)
(if (and (or (charset-iso-final-char ccs)
(memq ccs
'(=daikanwa
- =daikanwa-rev2
+ =daikanwa@rev2
;; =gt-k
- )))
+ =jis-x0208@1997
+ ))
+ (string-match "=ucs@" (symbol-name ccs)))
+ (setq ccs (charset-name ccs))
+ (null (assq ccs char-spec))
(setq ret (encode-char char ccs 'defined-only)))
(setq char-spec (cons (cons ccs ret) char-spec))))
(if (null char-spec)
((setq ret (get-char-attribute char 'name*))
(setq char-spec (cons (cons 'name* ret) char-spec))
))
+ )
+ ((setq ret (get-char-attribute
+ char 'ideographic-combination))
+ (setq char-spec
+ (cons (cons 'ideographic-combination ret)
+ char-spec))
))
char-spec)
((consp char)
(insert-char-attributes char
readable
(union (mapcar #'car char-spec)
- required-features))
+ required-features)
+ nil 'for-sub-node)
(when temp-char
;; undefine temporary character
;; Current implementation is dirty.
(insert-char-attributes ret
readable
(or al 'none) ; cal
- ))
+ nil 'for-sub-node))
(insert (prin1-to-string value)))
(insert ")")
(insert line-breaking))
(insert-char-attributes ret
readable
al ; cal
- )
+ nil 'for-sub-node)
(setq separator lbs))
(if separator
(insert separator))
(insert (format "%s%s\t%d ; %c%s"
separator
name value
- (aref ideographic-radicals value)
+ (ideographic-radical value)
line-breaking))
(setq separator ""))
(t
(defvar char-db-convert-obsolete-format t)
-(defun insert-char-attributes (char &optional readable attributes column)
+(defun char-db-insert-ccs-feature (name value line-breaking)
+ (cond
+ ((integerp value)
+ (insert
+ (format
+ (cond
+ ((memq name '(=shinjigen
+ =shinjigen@1ed
+ =shinjigen@rev =shinjigen/+p@rev
+ =daikanwa/ho))
+ "(%-18s . %04d)\t; %c")
+ ((eq name '=shinjigen@1ed/24pr)
+ "(%-18s . %04d)\t; %c")
+ ((or (memq name '(=daikanwa =>>daikanwa =>daikanwa
+ =daikanwa@rev1 =daikanwa@rev2
+ =daikanwa/+p =daikanwa/+2p
+ =gt =>>>gt =>>gt =+>gt =>gt
+ =gt-k =>>gt-k =>gt-k
+ =>>adobe-japan1
+ =cbeta =>>cbeta
+ =zinbun-oracle =>zinbun-oracle))
+ (string-match "^=adobe-" (symbol-name name)))
+ "(%-18s . %05d)\t; %c")
+ ((memq name '(=hanyo-denshi/ks =>>hanyo-denshi/ks mojikyo))
+ "(%-18s . %06d)\t; %c")
+ ((>= (charset-dimension name) 2)
+ "(%-18s . #x%04X)\t; %c")
+ (t
+ "(%-18s . #x%02X)\t; %c"))
+ name
+ (if (= (charset-iso-graphic-plane name) 1)
+ (logior value
+ (cond ((= (charset-dimension name) 1)
+ #x80)
+ ((= (charset-dimension name) 2)
+ #x8080)
+ ((= (charset-dimension name) 3)
+ #x808080)
+ (t 0)))
+ value)
+ (char-db-decode-isolated-char name value)))
+ (if (and (= (charset-chars name) 94)
+ (= (charset-dimension name) 2))
+ (insert (format " [%02d-%02d]"
+ (- (lsh value -8) 32)
+ (- (logand value 255) 32))))
+ )
+ (t
+ (insert (format "(%-18s . %s)" name value))
+ ))
+ (insert line-breaking))
+
+(defun char-db-insert-relation-feature (char name value line-breaking
+ ccss readable)
+ (insert (format "(%-18s%s " name line-breaking))
+ (let ((lbs (concat "\n" (make-string (current-column) ?\ )))
+ separator cell sources required-features
+ ret)
+ (while (consp value)
+ (setq cell (car value))
+ (if (integerp cell)
+ (setq cell (decode-char '=ucs cell)))
+ (cond
+ ((eq name '->subsumptive)
+ (when (or (not (some (lambda (atr)
+ (get-char-attribute cell atr))
+ char-db-ignored-attributes))
+ (some (lambda (ccs)
+ (encode-char cell ccs 'defined-only))
+ ccss))
+ (if separator
+ (insert lbs))
+ (let ((char-db-ignored-attributes
+ (cons '<-subsumptive
+ char-db-ignored-attributes)))
+ (insert-char-attributes cell readable nil nil 'for-sub-node))
+ (setq separator lbs))
+ )
+ ((characterp cell)
+ (setq sources
+ (get-char-attribute
+ char (intern (format "%s*sources" name))))
+ (setq required-features nil)
+ (dolist (source sources)
+ (cond
+ ((memq source '(JP
+ JP/Jouyou
+ shinjigen shinjigen@1ed shinjigen@rev))
+ (setq required-features
+ (union required-features
+ '(=jis-x0208
+ =jis-x0208@1990
+ =jis-x0213-1@2000
+ =jis-x0213-1@2004
+ =jis-x0213-2
+ =jis-x0212
+ =jis-x0208@1983
+ =jis-x0208@1978
+ =shinjigen))))
+ ((eq source 'CN)
+ (setq required-features
+ (union required-features
+ '(=gb2312
+ =gb12345
+ =iso-ir165)))))
+ (cond
+ ((find-charset (setq ret (intern (format "=%s" source))))
+ (setq required-features
+ (cons ret required-features)))
+ (t (setq required-features
+ (cons source required-features)))))
+ (cond ((string-match "@JP" (symbol-name name))
+ (setq required-features
+ (union required-features
+ '(=jis-x0208
+ =jis-x0208@1990
+ =jis-x0213-1-2000
+ =jis-x0213-2-2000
+ =jis-x0212
+ =jis-x0208@1983
+ =jis-x0208@1978))))
+ ((string-match "@CN" (symbol-name name))
+ (setq required-features
+ (union required-features
+ '(=gb2312
+ =gb12345
+ =iso-ir165)))))
+ (if separator
+ (insert lbs))
+ (if readable
+ (insert (format "%S" cell))
+ (char-db-insert-char-spec cell readable
+ nil
+ required-features))
+ (setq separator lbs))
+ ((consp cell)
+ (if separator
+ (insert lbs))
+ (if (consp (car cell))
+ (char-db-insert-char-spec cell readable)
+ (char-db-insert-char-reference cell readable))
+ (setq separator lbs))
+ (t
+ (if separator
+ (insert separator))
+ (insert (prin1-to-string cell))
+ (setq separator " ")))
+ (setq value (cdr value)))
+ (insert ")")
+ (insert line-breaking)))
+
+(defun insert-char-attributes (char &optional readable attributes column
+ for-sub-node)
(unless column
(setq column (current-column)))
- (let (name value has-long-ccs-name rest
+ (let (name value ; has-long-ccs-name
+ rest
radical strokes
(line-breaking
(concat "\n" (make-string (1+ column) ?\ )))
lbs cell separator ret
key al cal
- dest-ccss
- sources required-features
+ dest-ccss ; sources required-features
ccss)
(let (atr-d)
(setq attributes
atr-d)
#'char-attribute-name<)))
(insert "(")
+ (when (memq '<-subsumptive attributes)
+ (when (or readable (not for-sub-node))
+ (when (setq value (get-char-attribute char '<-subsumptive))
+ (char-db-insert-relation-feature char '<-subsumptive value
+ line-breaking
+ ccss readable)))
+ (setq attributes (delq '<-subsumptive attributes)))
+ (when (and (memq '<-denotational attributes)
+ (setq value (get-char-attribute char '<-denotational)))
+ (char-db-insert-relation-feature char '<-denotational value
+ line-breaking
+ ccss readable)
+ (setq attributes (delq '<-denotational attributes)))
(when (and (memq 'name attributes)
(setq value (get-char-attribute char 'name)))
(insert (format
line-breaking))
(setq attributes (delq 'script attributes))
)
- ;; (when (and (memq '<-denotational attributes)
- ;; (setq value (get-char-attribute char '<-denotational))
- ;; (null (cdr value))
- ;; (setq value (encode-char (car value) 'ucs 'defined-only)))
- ;; (insert (format "(%-18s . #x%04X)\t; %c%s"
- ;; '=>ucs value (decode-char 'ucs value)
- ;; line-breaking))
- ;; (setq attributes (delq '<-denotational attributes)))
(dolist (name '(=>ucs =>ucs*))
(when (and (memq name attributes)
(setq value (get-char-attribute char name)))
name value (decode-char '=ucs value)
line-breaking))
(setq attributes (delq name attributes))))
- (dolist (name '(=>ucs@gb =>ucs@cns =>ucs@jis =>ucs@ks =>ucs@big5))
+ (dolist (name '(=>ucs@gb =>ucs@big5))
(when (and (memq name attributes)
(setq value (get-char-attribute char name)))
(insert (format "(%-18s . #x%04X)\t; %c%s"
line-breaking))
(setq attributes (delq name attributes))
))
- ;; (dolist (name '(=>ucs-gb =>ucs-cns =>ucs-jis =>ucs-ks =>ucs-big5))
+ ;; (dolist (name '(=>daikanwa))
;; (when (and (memq name attributes)
;; (setq value (get-char-attribute char name)))
- ;; (insert (format "(%-18s . #x%04X)\t; %c%s"
- ;; (intern
- ;; (concat "=>ucs@"
- ;; (substring (symbol-name name) 6)))
- ;; value
- ;; (decode-char (intern
- ;; (concat "=ucs@"
- ;; (substring
- ;; (symbol-name name) 6)))
- ;; value)
- ;; line-breaking))
+ ;; (insert
+ ;; (if (integerp value)
+ ;; (format "(%-18s . %05d)\t; %c%s"
+ ;; name value (decode-char '=daikanwa value)
+ ;; line-breaking)
+ ;; (format "(%-18s %s)\t; %c%s"
+ ;; name
+ ;; (mapconcat (function prin1-to-string)
+ ;; value " ")
+ ;; (char-representative-of-daikanwa char)
+ ;; line-breaking)))
;; (setq attributes (delq name attributes))))
- ;; (when (and (memq '->ucs attributes)
- ;; (setq value (get-char-attribute char '->ucs)))
- ;; (insert (format (if char-db-convert-obsolete-format
- ;; "(=>ucs\t\t. #x%04X)\t; %c%s"
- ;; "(->ucs\t\t. #x%04X)\t; %c%s")
- ;; value (decode-char '=ucs value)
- ;; line-breaking))
- ;; (setq attributes (delq '->ucs attributes))
- ;; )
- (dolist (name '(=>daikanwa))
- (when (and (memq name attributes)
- (setq value (get-char-attribute char name)))
- (insert
- (if (integerp value)
- (format "(%-18s . %05d)\t; %c%s"
- name value (decode-char '=daikanwa value)
- line-breaking)
- (format "(%-18s %s)\t; %c%s"
- name
- (mapconcat (function prin1-to-string)
- value " ")
- (char-representative-of-daikanwa char)
- line-breaking)))
- (setq attributes (delq name attributes))))
(when (and (memq 'general-category attributes)
(setq value (get-char-attribute char 'general-category)))
(insert (format
(setq radical value)
(insert (format "(ideographic-radical . %S)\t; %c%s"
radical
- (aref ideographic-radicals radical)
+ (ideographic-radical radical)
line-breaking))
(setq attributes (delq 'ideographic-radical attributes))
)
+ (when (and (memq 'shuowen-radical attributes)
+ (setq value (get-char-attribute char 'shuowen-radical)))
+ (insert (format "(shuowen-radical\t. %S)\t; %c%s"
+ value
+ (shuowen-radical value)
+ line-breaking))
+ (setq attributes (delq 'shuowen-radical attributes))
+ )
(let (key)
- (dolist (domain char-db-feature-domains)
+ (dolist (domain
+ (append
+ char-db-feature-domains
+ (let (dest domain)
+ (dolist (feature (char-attribute-list))
+ (setq feature (symbol-name feature))
+ (when (string-match
+ "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
+ feature)
+ (setq domain (intern (match-string 2 feature)))
+ (unless (memq domain dest)
+ (setq dest (cons domain dest)))))
+ (sort dest #'string<))))
(setq key (intern (format "%s@%s" 'ideographic-radical domain)))
(when (and (memq key attributes)
(setq value (get-char-attribute char key)))
(insert (format "(%s . %S)\t; %c%s"
key
radical
- (aref ideographic-radicals radical)
+ (ideographic-radical radical)
line-breaking))
(setq attributes (delq key attributes))
)
(unless (eq value radical)
(insert (format "(kangxi-radical\t . %S)\t; %c%s"
value
- (aref ideographic-radicals value)
+ (ideographic-radical value)
line-breaking))
(or radical
(setq radical value)))
(unless (eq value radical)
(insert (format "(japanese-radical\t . %S)\t; %c%s"
value
- (aref ideographic-radicals value)
+ (ideographic-radical value)
line-breaking))
(or radical
(setq radical value)))
(setq value (get-char-attribute char 'cns-radical)))
(insert (format "(cns-radical\t . %S)\t; %c%s"
value
- (aref ideographic-radicals value)
+ (ideographic-radical value)
line-breaking))
(setq attributes (delq 'cns-radical attributes))
)
(unless (eq value radical)
(insert (format "(shinjigen-1-radical . %S)\t; %c%s"
value
- (aref ideographic-radicals value)
+ (ideographic-radical value)
line-breaking))
(or radical
(setq radical value)))
line-breaking))
(setq attributes (delq '->ideograph attributes))
)
- (when (and (memq '->decomposition attributes)
- (setq value (get-char-attribute char '->decomposition)))
- (insert (format "(->decomposition\t%s)%s"
- (mapconcat (lambda (code)
- (cond ((symbolp code)
- (symbol-name code))
- ((characterp code)
- (if readable
- (format "%S" code)
- (format "#x%04X"
- (char-int code))
- ))
- ((integerp code)
- (format "#x%04X" code))
- (t
- (format "%s%S" line-breaking code))))
- value " ")
- line-breaking))
- (setq attributes (delq '->decomposition attributes))
- )
+ ;; (when (and (memq '->decomposition attributes)
+ ;; (setq value (get-char-attribute char '->decomposition)))
+ ;; (insert (format "(->decomposition\t%s)%s"
+ ;; (mapconcat (lambda (code)
+ ;; (cond ((symbolp code)
+ ;; (symbol-name code))
+ ;; ((characterp code)
+ ;; (if readable
+ ;; (format "%S" code)
+ ;; (format "#x%04X"
+ ;; (char-int code))
+ ;; ))
+ ;; ((integerp code)
+ ;; (format "#x%04X" code))
+ ;; (t
+ ;; (format "%s%S" line-breaking code))))
+ ;; value " ")
+ ;; line-breaking))
+ ;; (setq attributes (delq '->decomposition attributes))
+ ;; )
(if (equal (get-char-attribute char '->titlecase)
(get-char-attribute char '->uppercase))
(setq attributes (delq '->titlecase attributes)))
(unless readable
(dolist (ignored '(composition
->denotational <-subsumptive ->ucs-unified
- ->ideographic-component-forms
- <-same))
+ ->ideographic-component-forms))
(setq attributes (delq ignored attributes))))
- ;; (setq rest ccs-attributes)
- ;; (while (and rest
- ;; (progn
- ;; (setq value (get-char-attribute char (car rest)))
- ;; (if value
- ;; (if (>= (length (symbol-name (car rest))) 19)
- ;; (progn
- ;; (setq has-long-ccs-name t)
- ;; nil)
- ;; t)
- ;; t)))
- ;; (setq rest (cdr rest)))
(while attributes
(setq name (car attributes))
- (if (setq value (get-char-attribute char name))
- (cond ((setq ret (find-charset name))
- (setq name (charset-name ret))
- (if (and (not (memq name dest-ccss))
- (prog1
- (setq value (get-char-attribute char name))
- (setq dest-ccss (cons name dest-ccss))))
- (insert
- (format
- (cond ((memq name '(=daikanwa
- =daikanwa-rev1 =daikanwa-rev2
- =gt =gt-k =cbeta))
- (if has-long-ccs-name
- "(%-26s . %05d)\t; %c%s"
- "(%-18s . %05d)\t; %c%s"))
- ((eq name 'mojikyo)
- (if has-long-ccs-name
- "(%-26s . %06d)\t; %c%s"
- "(%-18s . %06d)\t; %c%s"))
- ((>= (charset-dimension name) 2)
- (if has-long-ccs-name
- "(%-26s . #x%04X)\t; %c%s"
- "(%-18s . #x%04X)\t; %c%s"))
- (t
- (if has-long-ccs-name
- "(%-26s . #x%02X)\t; %c%s"
- "(%-18s . #x%02X)\t; %c%s")))
- name
- (if (= (charset-iso-graphic-plane name) 1)
- (logior value
- (cond ((= (charset-dimension name) 1)
- #x80)
- ((= (charset-dimension name) 2)
- #x8080)
- ((= (charset-dimension name) 3)
- #x808080)
- (t 0)))
- value)
- (char-db-decode-isolated-char name value)
- line-breaking)))
- )
- ((string-match "^=>ucs@" (symbol-name name))
- (insert (format "(%-18s . #x%04X)\t; %c%s"
- name value (decode-char '=ucs value)
- line-breaking))
- )
- ((eq name 'jisx0208-1978/4X)
- (insert (format "(%-18s . #x%04X)%s"
- name value
- line-breaking))
- )
- ((and (not readable)
- (or (eq name '<-identical)
- (string-match "^->simplified" (symbol-name name))
- (string-match "^->vulgar" (symbol-name name))
- ))
- )
- ((or (eq name 'ideographic-structure)
- (eq name 'ideographic-)
- (string-match "^\\(->\\|<-\\)" (symbol-name name)))
- (insert (format "(%-18s%s " name line-breaking))
- (setq lbs (concat "\n" (make-string (current-column) ?\ ))
- separator nil)
- (while (consp value)
- (setq cell (car value))
- (if (integerp cell)
- (setq cell (decode-char '=ucs cell)))
- (cond ((eq name '->subsumptive)
- (when (or (not
- (some (lambda (atr)
- (get-char-attribute cell atr))
- char-db-ignored-attributes))
- (some (lambda (ccs)
- (encode-char cell ccs
- 'defined-only))
- ccss))
- (if separator
- (insert lbs))
- (let ((char-db-ignored-attributes
- (cons '<-subsumptive
- char-db-ignored-attributes)))
- (insert-char-attributes cell readable))
- (setq separator lbs))
- )
- ((characterp cell)
- (setq sources
- (get-char-attribute
- char
- (intern (format "%s*sources" name))))
- (setq required-features nil)
- (dolist (source sources)
- (cond
- ((memq source '(JP JP/Jouyou
- shinjigen-1))
- (setq required-features
- (union required-features
- '(=jis-x0208
- =jis-x0208-1990
- =jis-x0213-1-2000
- =jis-x0213-2-2000
- =jis-x0212
- =jis-x0208-1983
- =jis-x0208-1978))))
- ((eq source 'CN)
- (setq required-features
- (union required-features
- '(=gb2312
- =gb12345
- =iso-ir165)))))
- (cond
- ((find-charset
- (setq ret (intern (format "=%s" source))))
- (setq required-features
- (cons ret required-features)))
- (t (setq required-features
- (cons source required-features)))))
- (cond ((string-match "@JP" (symbol-name name))
- (setq required-features
- (union required-features
- '(=jis-x0208
- =jis-x0208-1990
- =jis-x0213-1-2000
- =jis-x0213-2-2000
- =jis-x0212
- =jis-x0208-1983
- =jis-x0208-1978))))
- ((string-match "@CN" (symbol-name name))
- (setq required-features
- (union required-features
- '(=gb2312
- =gb12345
- =iso-ir165)))))
- (if separator
- (insert lbs))
- (if readable
- (insert (format "%S" cell))
- (char-db-insert-char-spec cell readable
- nil
- required-features))
- (setq separator lbs))
- ((consp cell)
- (if separator
- (insert lbs))
- (if (consp (car cell))
- (char-db-insert-char-spec cell readable)
- (char-db-insert-char-reference cell readable))
- (setq separator lbs))
- (t
- (if separator
- (insert separator))
- (insert (prin1-to-string cell))
- (setq separator " ")))
- (setq value (cdr value)))
- (insert ")")
- (insert line-breaking))
- ((memq name '(ideograph=
- original-ideograph-of
- ancient-ideograph-of
- vulgar-ideograph-of
- wrong-ideograph-of
- ;; simplified-ideograph-of
- ideographic-variants
- ;; ideographic-different-form-of
- ))
- (insert (format "(%-18s%s " name line-breaking))
- (setq lbs (concat "\n" (make-string (current-column) ?\ ))
- separator nil)
- (while (consp value)
- (setq cell (car value))
- (if (and (consp cell)
- (consp (car cell)))
- (progn
- (if separator
- (insert lbs))
- (char-db-insert-alist cell readable)
- (setq separator lbs))
- (if separator
- (insert separator))
- (insert (prin1-to-string cell))
- (setq separator " "))
- (setq value (cdr value)))
- (insert ")")
- (insert line-breaking))
- ((consp value)
- (insert (format "(%-18s " name))
- (setq lbs (concat "\n" (make-string (current-column) ?\ ))
- separator nil)
- (while (consp value)
- (setq cell (car value))
- (if (and (consp cell)
- (consp (car cell))
- (setq ret (condition-case nil
- (find-char cell)
- (error nil))))
- (progn
- (setq rest cell
- al nil
- cal nil)
- (while rest
- (setq key (car (car rest)))
- (if (find-charset key)
- (setq cal (cons key cal))
- (setq al (cons key al)))
- (setq rest (cdr rest)))
- (if separator
- (insert lbs))
- (insert-char-attributes ret
- readable
- al cal)
- (setq separator lbs))
- (setq ret (prin1-to-string cell))
- (if separator
- (if (< (+ (current-column)
- (length ret)
- (length separator))
- 76)
- (insert separator)
- (insert lbs)))
- (insert ret)
- (setq separator " "))
- (setq value (cdr value)))
- (insert ")")
+ (unless (eq (setq value (get-char-attribute char name 'value-is-empty))
+ 'value-is-empty)
+ (cond ((setq ret (find-charset name))
+ (setq name (charset-name ret))
+ (when (not (memq name dest-ccss))
+ (setq dest-ccss (cons name dest-ccss))
+ (char-db-insert-ccs-feature name value line-breaking))
+ )
+ ((string-match "^=>ucs@" (symbol-name name))
+ (insert (format "(%-18s . #x%04X)\t; %c%s"
+ name value (decode-char '=ucs value)
+ line-breaking))
+ )
+ ((eq name 'jisx0208-1978/4X)
+ (insert (format "(%-18s . #x%04X)%s"
+ name value
+ line-breaking))
+ )
+ ((and
+ (not readable)
+ (not (eq name '->subsumptive))
+ (not (eq name '->uppercase))
+ (not (eq name '->lowercase))
+ (not (eq name '->titlecase))
+ (not (eq name '->canonical))
+ (not (eq name '->Bopomofo))
+ (not (eq name '->mistakable))
+ (not (eq name '->ideographic-variants))
+ (null (get-char-attribute
+ char (intern (format "%s*sources" name))))
+ (not (string-match "\\*sources$" (symbol-name name)))
+ (null (get-char-attribute
+ char (intern (format "%s*note" name))))
+ (not (string-match "\\*note$" (symbol-name name)))
+ (or (eq name '<-identical)
+ (eq name '<-uppercase)
+ (eq name '<-lowercase)
+ (eq name '<-titlecase)
+ (eq name '<-canonical)
+ (eq name '<-ideographic-variants)
+ ;; (eq name '<-synonyms)
+ (string-match "^<-synonyms" (symbol-name name))
+ (eq name '<-mistakable)
+ (when (string-match "^->" (symbol-name name))
+ (cond
+ ((string-match "^->fullwidth" (symbol-name name))
+ (not (and (consp value)
+ (characterp (car value))
+ (encode-char
+ (car value) '=ucs 'defined-only)))
+ )
+ (t)))
+ ))
+ )
+ ((or (eq name 'ideographic-structure)
+ (eq name 'ideographic-combination)
+ (eq name 'ideographic-)
+ (eq name '=decomposition)
+ (char-feature-base-name= '=decomposition name)
+ (char-feature-base-name= '=>decomposition name)
+ ;; (string-match "^=>*decomposition\\(@[^*]+\\)?$"
+ ;; (symbol-name name))
+ (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name name))
+ (string-match "^\\(->\\|<-\\)[^*]*\\*sources$"
+ (symbol-name name))
+ )
+ (char-db-insert-relation-feature char name value
+ line-breaking
+ ccss readable))
+ ((memq name '(ideograph=
+ original-ideograph-of
+ ancient-ideograph-of
+ vulgar-ideograph-of
+ wrong-ideograph-of
+ ;; simplified-ideograph-of
+ ideographic-variants
+ ;; ideographic-different-form-of
+ ))
+ (insert (format "(%-18s%s " name line-breaking))
+ (setq lbs (concat "\n" (make-string (current-column) ?\ ))
+ separator nil)
+ (while (consp value)
+ (setq cell (car value))
+ (if (and (consp cell)
+ (consp (car cell)))
+ (progn
+ (if separator
+ (insert lbs))
+ (char-db-insert-alist cell readable)
+ (setq separator lbs))
+ (if separator
+ (insert separator))
+ (insert (prin1-to-string cell))
+ (setq separator " "))
+ (setq value (cdr value)))
+ (insert ")")
+ (insert line-breaking))
+ ((consp value)
+ (insert (format "(%-18s " name))
+ (setq lbs (concat "\n" (make-string (current-column) ?\ ))
+ separator nil)
+ (while (consp value)
+ (setq cell (car value))
+ (if (and (consp cell)
+ (consp (car cell))
+ (setq ret (condition-case nil
+ (find-char cell)
+ (error nil))))
+ (progn
+ (setq rest cell
+ al nil
+ cal nil)
+ (while rest
+ (setq key (car (car rest)))
+ (if (find-charset key)
+ (setq cal (cons key cal))
+ (setq al (cons key al)))
+ (setq rest (cdr rest)))
+ (if separator
+ (insert lbs))
+ (insert-char-attributes ret
+ readable
+ al ; cal
+ nil 'for-sub-node)
+ (setq separator lbs))
+ (setq ret (prin1-to-string cell))
+ (if separator
+ (if (< (+ (current-column)
+ (length ret)
+ (length separator))
+ 76)
+ (insert separator)
+ (insert lbs)))
+ (insert ret)
+ (setq separator " "))
+ (setq value (cdr value)))
+ (insert ")")
+ (insert line-breaking))
+ (t
+ (insert (format "(%-18s" name))
+ (setq ret (prin1-to-string value))
+ (unless (< (+ (current-column)
+ (length ret)
+ 3)
+ 76)
(insert line-breaking))
- (t
- (insert (format "(%-18s . %S)%s"
- name value
- line-breaking)))
- ))
+ (insert " . " ret ")" line-breaking)
+ ;; (insert (format "(%-18s . %S)%s"
+ ;; name value
+ ;; line-breaking))
+ )
+ ))
(setq attributes (cdr attributes)))
- ;; (while ccs-attributes
- ;; (setq name (charset-name (car ccs-attributes)))
- ;; (if (and (not (memq name dest-ccss))
- ;; (prog1
- ;; (setq value (get-char-attribute char name))
- ;; (setq dest-ccss (cons name dest-ccss))))
- ;; (insert
- ;; (format
- ;; (cond ((memq name '(=daikanwa
- ;; =daikanwa-rev1 =daikanwa-rev2
- ;; =gt =gt-k =cbeta))
- ;; (if has-long-ccs-name
- ;; "(%-26s . %05d)\t; %c%s"
- ;; "(%-18s . %05d)\t; %c%s"))
- ;; ((eq name 'mojikyo)
- ;; (if has-long-ccs-name
- ;; "(%-26s . %06d)\t; %c%s"
- ;; "(%-18s . %06d)\t; %c%s"))
- ;; ((>= (charset-dimension name) 2)
- ;; (if has-long-ccs-name
- ;; "(%-26s . #x%04X)\t; %c%s"
- ;; "(%-18s . #x%04X)\t; %c%s"))
- ;; (t
- ;; (if has-long-ccs-name
- ;; "(%-26s . #x%02X)\t; %c%s"
- ;; "(%-18s . #x%02X)\t; %c%s")))
- ;; name
- ;; (if (= (charset-iso-graphic-plane name) 1)
- ;; (logior value
- ;; (cond ((= (charset-dimension name) 1)
- ;; #x80)
- ;; ((= (charset-dimension name) 2)
- ;; #x8080)
- ;; ((= (charset-dimension name) 3)
- ;; #x808080)
- ;; (t 0)))
- ;; value)
- ;; (char-db-decode-isolated-char name value)
- ;; line-breaking)))
- ;; (setq ccs-attributes (cdr ccs-attributes)))
(insert ")")))
(defun insert-char-data (char &optional readable
no-ucs-unified
script excluded-script)
(insert-char-data char printable)
- (let ((variants (or (char-variants char)
- (let ((ucs (get-char-attribute char '->ucs)))
- (if ucs
- (delete char (char-variants (int-char ucs)))))))
- variant vs)
+ (let ((variants (char-variants char))
+ rest
+ variant vs ret)
(setq variants (sort variants #'<))
- (while variants
- (setq variant (car variants))
- (if (and (or (null script)
- (null (setq vs (get-char-attribute variant 'script)))
- (memq script vs))
- (or (null excluded-script)
- (null (setq vs (get-char-attribute variant 'script)))
- (not (memq excluded-script vs))))
- (or (and no-ucs-unified (get-char-attribute variant '=ucs))
- (insert-char-data variant printable)))
- (setq variants (cdr variants))
- )))
+ (setq rest variants)
+ (setq variants (cons char variants))
+ (while rest
+ (setq variant (car rest))
+ (unless (get-char-attribute variant '<-subsumptive)
+ (if (and (or (null script)
+ (null (setq vs (get-char-attribute variant 'script)))
+ (memq script vs))
+ (or (null excluded-script)
+ (null (setq vs (get-char-attribute variant 'script)))
+ (not (memq excluded-script vs))))
+ (unless (and no-ucs-unified (get-char-attribute variant '=ucs))
+ (insert-char-data variant printable)
+ (if (setq ret (char-variants variant))
+ (while ret
+ (or (memq (car ret) variants)
+ ;; (get-char-attribute (car ret) '<-subsumptive)
+ (setq rest (nconc rest (list (car ret)))))
+ (setq ret (cdr ret)))))))
+ (setq rest (cdr rest)))))
(defun insert-char-range-data (min max &optional script excluded-script)
(let ((code min)
what-character-original-window-configuration)
(signal (car err) (cdr err)))))))
+
+;;; @ end
+;;;
+
(provide 'char-db-util)
;;; char-db-util.el ends here