;;; char-db-util.el --- Character Database utility -*- coding: utf-8-er; -*-
;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
-;; 2007, 2008, 2009, 2010 MORIOKA Tomohiko.
+;; 2007, 2008, 2009, 2010, 2011 MORIOKA Tomohiko.
;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
;;; Code:
-(require 'alist)
+(require 'chise-subr)
+(require 'ideograph-subr)
(defconst unidata-normative-category-alist
'(("Lu" letter uppercase)
("So" symbol other)
))
-(defconst ideographic-radicals
- (let ((v (make-vector 215 nil))
- (i 1))
- (while (< i 215)
- (aset v i (decode-char '=ucs (+ #x2EFF i)))
- (setq i (1+ i)))
- v))
-
-(defun ideographic-radical (number)
- (aref ideographic-radicals number))
-
(defconst shuowen-radicals
[?一 ?上 ?示 ?三 ?王 ?玉 ?玨 ?气 ?士 ?丨 ; 010
?屮 ?艸 ?蓐 ?茻 ?小 ?八 ?釆 ?半 ?牛 ?犛 ; 020
?正 ?是 ?辵 ?彳 ?廴 ?㢟 ?行 ?齒 ?牙 ?足 ; 040
?疋 ?品 ?龠 ?冊 ?㗊 ?舌 ?干 ?谷 ?只 ?㕯 ; 050
?句 ?丩 ?古 ?十 ?卅 ?言 ?誩 ?音 ?䇂 ?丵 ; 060
- ?菐 ?𠬞 ?廾 ?共 ?異 ?舁 ?𦥑 ?䢅 ?爨 ?革 ; 070
+ ?菐 ?𠬞 ?𠬜 ?共 ?異 ?舁 ?𦥑 ?䢅 ?爨 ?革 ; 070
?鬲 ?䰜 ?爪 ?𠃨 ?鬥 ?又 ?𠂇 ?㕜 ?支 ?𦘒 ; 080
?聿 ?畫 ?隶 ?臤 ?臣 ?殳 ?殺 ?𠘧 ?寸 ?皮 ; 090
?㼱 ?攴 ?敎 ?卜 ?用 ?爻 ?㸚 ?𥄎 ?目 ?䀠 ; 100
?𠦒 ?冓 ?幺 ?𢆶 ?叀 ?玄 ?予 ?放 ?𠬪 ?𣦼 ; 130
?歺 ?死 ?冎 ?骨 ?肉 ?筋 ?刀 ?刃 ?㓞 ?丰 ; 140
?耒 ?𧢲 ?竹 ?箕 ?丌 ?左 ?工 ?㠭 ?巫 ?甘 ; 150
- ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?旨 ?喜 ?壴 ; 160
+ ?旨 ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?喜 ?壴 ; 160
?鼓 ?豈 ?豆 ?豊 ?豐 ?䖒 ?虍 ?虎 ?虤 ?皿 ; 170
?𠙴 ?去 ?血 ?丶 ?丹 ?青 ?井 ?皀 ?鬯 ?食 ; 180
?亼 ?會 ?倉 ?入 ?缶 ?矢 ?高 ?冂 ?𩫏 ?京 ; 190
(defvar char-db-file-coding-system 'utf-8-mcs-er)
-(defvar char-db-feature-domains
- '(ucs ucs/compat daikanwa cns gt jis jis/alt jis/a jis/b
- jis-x0212 jis-x0213 cdp shinjigen misc unknown))
-
(defvar char-db-ignored-attributes '(ideographic-products))
-(defun char-attribute-name< (ka kb)
- (cond
- ((eq '->denotational kb)
- t)
- ((eq '->subsumptive kb)
- (not (eq '->denotational ka)))
- ((eq '->denotational ka)
- nil)
- ((eq '->subsumptive ka)
- nil)
- ((and (symbolp ka)
- (string-match "^->" (symbol-name ka)))
- (cond ((and (symbolp kb)
- (string-match "^->" (symbol-name kb)))
- (string< (symbol-name ka)
- (symbol-name kb))
- ))
- )
- ((and (symbolp kb)
- (string-match "^->" (symbol-name kb)))
- t)
- ((and (symbolp ka)
- (string-match "^<-" (symbol-name ka)))
- (cond ((symbolp kb)
- (cond ((string-match "^<-" (symbol-name kb))
- (string< (symbol-name ka)
- (symbol-name kb))
- )
- ;; ((string-match "^->" (symbol-name kb))
- ;; t)
- )))
- )
- ((and (symbolp kb)
- (string-match "^<-" (symbol-name kb)))
- t
- ;; (not (string-match "^->" (symbol-name ka)))
- )
- ((find-charset ka)
- (if (find-charset kb)
- (let (a-ir b-ir)
- (if (setq a-ir (charset-property ka 'iso-ir))
- (if (setq b-ir (charset-property kb 'iso-ir))
- (cond
- ((= a-ir b-ir)
- (< (charset-id ka)(charset-id kb))
- )
- ((= a-ir 177)
- t)
- ((= b-ir 177)
- nil)
- ((< a-ir
- b-ir)
- ))
- t)
- (if (charset-property kb 'iso-ir)
- nil
- (< (charset-id ka)(charset-id kb)))))
- nil)
- )
- ((find-charset kb))
- ((symbolp ka)
- (cond ((symbolp kb)
- (string< (symbol-name ka)
- (symbol-name kb)))
- (t)))
- ((symbolp kb)
- nil)))
-
(defvar char-db-coded-charset-priority-list
'(ascii
control-1
ethiopic-ucs
=big5-cdp
=gt
- =>>gt
- ideograph-daikanwa-2
- ideograph-daikanwa
+ =adobe-japan1-0
+ =adobe-japan1-1
+ =adobe-japan1-2
+ =adobe-japan1-3
+ =adobe-japan1-4
+ =adobe-japan1-5
+ =adobe-japan1-6
+ =hanyo-denshi/ja
+ =hanyo-denshi/jb
+ =hanyo-denshi/jc
+ =hanyo-denshi/jd
+ =hanyo-denshi/ft
+ =hanyo-denshi/ia
+ =hanyo-denshi/ib
+ =hanyo-denshi/hg
+ =hanyo-denshi/ks
+ =daikanwa
+ =daikanwa@rev2
+ =daikanwa@rev1
=cbeta
+ =gt-k
ideograph-hanziku-1
ideograph-hanziku-2
ideograph-hanziku-3
ideograph-hanziku-10
ideograph-hanziku-11
ideograph-hanziku-12
- =gt-k
- =ucs@iso
- =ucs@unicode
+ =>>>jis-x0208
+ =>>>jis-x0213-1
=>>jis-x0208
=>>jis-x0213-1
=>>jis-x0213-1@2000
=>>jis-x0213-1@2004
=>>jis-x0213-2
=>>jis-x0208@1978
+ =>>hanyo-denshi/ft
+ =>>hanyo-denshi/ks
+ =>>gt
+ =>>daikanwa
+ =>jis-x0208@usual
=>jis-x0208
=>jis-x0208@1997
=>jis-x0213-1
=>jis-x0213-1@2000
=>jis-x0213-1@2004
+ =>jis-x0213-2@usual
=>jis-x0213-2
+ ==>ucs@bucs
+ =>ucs@iso
+ =>ucs@unicode
+ =>ucs@jis
+ =>ucs@JP
+ =>ucs@cns
+ =>ucs@ks
+ =>>ucs@iso
+ =>>ucs@unicode
+ =>>ucs@jis
+ =>>ucs@cns
+ =>>>ucs@iso
+ =>>>ucs@unicode
+ =ucs@iso
+ =ucs@unicode
+ =>>big5-cdp
+ =>>gt-k
+ =>gt
+ =>big5-cdp
+ =>daikanwa
=big5
=big5-eten
+ =>gt-k
=zinbun-oracle
+ =>zinbun-oracle
=ruimoku-v6
+ =>>ruimoku-v6
=jef-china3
=shinjigen))
+
+;;; @ char-db formatters
+;;;
+
(defun char-db-make-char-spec (char)
(let (ret char-spec)
(cond ((characterp char)
(insert-char-attributes char
readable
(union (mapcar #'car char-spec)
- required-features))
+ required-features)
+ nil 'for-sub-node)
(when temp-char
;; undefine temporary character
;; Current implementation is dirty.
(insert-char-attributes ret
readable
(or al 'none) ; cal
- ))
+ nil 'for-sub-node))
(insert (prin1-to-string value)))
(insert ")")
(insert line-breaking))
(insert-char-attributes ret
readable
al ; cal
- )
+ nil 'for-sub-node)
(setq separator lbs))
(if separator
(insert separator))
(format
(cond ((memq name '(=shinjigen
=shinjigen@1ed
- =shinjigen@rev =shinjigen/+p@rev))
+ =shinjigen@rev =shinjigen/+p@rev
+ =daikanwa/ho))
"(%-18s . %04d)\t; %c")
((eq name '=shinjigen@1ed/24pr)
"(%-18s . %04d)\t; %c")
- ((or (memq name '(=daikanwa
+ ((or (memq name '(=daikanwa =>>daikanwa =>daikanwa
=daikanwa@rev1 =daikanwa@rev2
- =gt =>>gt =>gt =gt-k =cbeta =zinbun-oracle))
+ =daikanwa/+p =daikanwa/+2p
+ =gt =>>>gt =>>gt =>gt
+ =gt-k =>>gt-k =>gt-k
+ =>>adobe-japan1
+ =cbeta =>>cbeta
+ =zinbun-oracle =>zinbun-oracle))
(string-match "^=adobe-" (symbol-name name)))
"(%-18s . %05d)\t; %c")
- ((eq name 'mojikyo)
+ ((memq name '(=hanyo-denshi/ks =>>hanyo-denshi/ks mojikyo))
"(%-18s . %06d)\t; %c")
((>= (charset-dimension name) 2)
"(%-18s . #x%04X)\t; %c")
(let ((char-db-ignored-attributes
(cons '<-subsumptive
char-db-ignored-attributes)))
- (insert-char-attributes cell readable))
+ (insert-char-attributes cell readable nil nil 'for-sub-node))
(setq separator lbs))
)
((characterp cell)
(union required-features
'(=jis-x0208
=jis-x0208@1990
- =jis-x0213-1-2000
- =jis-x0213-2-2000
+ =jis-x0213-1@2000
+ =jis-x0213-1@2004
+ =jis-x0213-2
=jis-x0212
=jis-x0208@1983
=jis-x0208@1978
(insert ")")
(insert line-breaking)))
-(defun insert-char-attributes (char &optional readable attributes column)
+(defun insert-char-attributes (char &optional readable attributes column
+ for-sub-node)
(unless column
(setq column (current-column)))
(let (name value ; has-long-ccs-name
#'char-attribute-name<)))
(insert "(")
(when (memq '<-subsumptive attributes)
- (when readable
+ (when (or readable (not for-sub-node))
(when (setq value (get-char-attribute char '<-subsumptive))
(char-db-insert-relation-feature char '<-subsumptive value
line-breaking
name value (decode-char '=ucs value)
line-breaking))
(setq attributes (delq name attributes))))
- (dolist (name '(=>ucs@gb =>ucs@cns =>ucs@jis =>ucs@ks =>ucs@big5))
+ (dolist (name '(=>ucs@gb =>ucs@big5))
(when (and (memq name attributes)
(setq value (get-char-attribute char name)))
(insert (format "(%-18s . #x%04X)\t; %c%s"
line-breaking))
(setq attributes (delq name attributes))
))
- (dolist (name '(=>daikanwa))
- (when (and (memq name attributes)
- (setq value (get-char-attribute char name)))
- (insert
- (if (integerp value)
- (format "(%-18s . %05d)\t; %c%s"
- name value (decode-char '=daikanwa value)
- line-breaking)
- (format "(%-18s %s)\t; %c%s"
- name
- (mapconcat (function prin1-to-string)
- value " ")
- (char-representative-of-daikanwa char)
- line-breaking)))
- (setq attributes (delq name attributes))))
+ ;; (dolist (name '(=>daikanwa))
+ ;; (when (and (memq name attributes)
+ ;; (setq value (get-char-attribute char name)))
+ ;; (insert
+ ;; (if (integerp value)
+ ;; (format "(%-18s . %05d)\t; %c%s"
+ ;; name value (decode-char '=daikanwa value)
+ ;; line-breaking)
+ ;; (format "(%-18s %s)\t; %c%s"
+ ;; name
+ ;; (mapconcat (function prin1-to-string)
+ ;; value " ")
+ ;; (char-representative-of-daikanwa char)
+ ;; line-breaking)))
+ ;; (setq attributes (delq name attributes))))
(when (and (memq 'general-category attributes)
(setq value (get-char-attribute char 'general-category)))
(insert (format
(eq name 'ideographic-combination)
(eq name 'ideographic-)
(eq name '=decomposition)
- (string-match "^=>decomposition" (symbol-name name))
+ (char-feature-base-name= '=decomposition name)
+ (char-feature-base-name= '=>decomposition name)
+ ;; (string-match "^=>*decomposition\\(@[^*]+\\)?$"
+ ;; (symbol-name name))
(string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name name))
(string-match "^\\(->\\|<-\\)[^*]*\\*sources$"
(symbol-name name))
(insert lbs))
(insert-char-attributes ret
readable
- al cal)
+ al ; cal
+ nil 'for-sub-node)
(setq separator lbs))
(setq ret (prin1-to-string cell))
(if separator
what-character-original-window-configuration)
(signal (car err) (cdr err)))))))
+
+;;; @ end
+;;;
+
(provide 'char-db-util)
;;; char-db-util.el ends here