;;; char-db-util.el --- Character Database utility -*- coding: utf-8-er; -*-
;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
-;; 2007 MORIOKA Tomohiko.
+;; 2007, 2008, 2009, 2010, 2011 MORIOKA Tomohiko.
;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
;;; Code:
-(require 'alist)
+(require 'chise-subr)
+(require 'ideograph-subr)
(defconst unidata-normative-category-alist
'(("Lu" letter uppercase)
("So" symbol other)
))
-(defconst ideographic-radicals
- (let ((v (make-vector 215 nil))
- (i 1))
- (while (< i 215)
- (aset v i (decode-char '=ucs (+ #x2EFF i)))
- (setq i (1+ i)))
- v))
-
-(defun ideographic-radical (number)
- (aref ideographic-radicals number))
-
(defconst shuowen-radicals
[?一 ?上 ?示 ?三 ?王 ?玉 ?玨 ?气 ?士 ?丨 ; 010
?屮 ?艸 ?蓐 ?茻 ?小 ?八 ?釆 ?半 ?牛 ?犛 ; 020
?𠙴 ?去 ?血 ?丶 ?丹 ?青 ?井 ?皀 ?鬯 ?食 ; 180
?亼 ?會 ?倉 ?入 ?缶 ?矢 ?高 ?冂 ?𩫏 ?京 ; 190
?亯 ?𣆪 ?畗 ?㐭 ?嗇 ?來 ?麥 ?夊 ?舛 ?䑞 ; 200
- ?韋 ?弟 ?夂 ?久 ?桀
+ ?韋 ?弟 ?夂 ?久 ?桀 ?木 ?東 ?林 ?才 ?叒 ; 210
+ ?之 ?帀 ?出 ?𣎵 ?生 ?乇 ?𠂹 ?𠌶 ?華 ?𥝌 ; 220
+ ?稽 ?巢 ?桼 ?束 ?㯻 ?囗 ?員 ?貝 ?邑 ?𨛜 ; 230
+ ?日 ?旦 ?倝 ?㫃 ?冥 ?晶 ?月 ?有 ?明 ?囧 ; 240
+ ?夕 ?多 ?毌 ?𢎘 ?𣐺 ?卣 ?齊 ?朿 ?片 ?鼎 ; 250
+ ?克 ?彔 ?禾 ?秝 ?黍 ?香 ?米 ?毇 ?臼 ?凶 ; 260
+ ?𣎳 ?林 ?麻 ?尗 ?耑 ?韭 ?瓜 ?瓠 ?宀 ?宮 ; 270
+ ?呂 ?穴 ?㝱 ?𤕫 ?冖 ?𠔼 ?冃 ?㒳 ?网 ?襾 ; 280
+ ?巾 ?巿 ?帛 ?白 ?㡀 ?黹 ?人 ?𠤎 ?匕 ?从 ; 290
])
(defun shuowen-radical (number)
(defvar char-db-file-coding-system 'utf-8-mcs-er)
-(defvar char-db-feature-domains
- '(ucs ucs/compat daikanwa cns gt jis jis/alt jis/a jis/b
- jis-x0212 jis-x0213 cdp shinjigen misc unknown))
-
(defvar char-db-ignored-attributes '(ideographic-products))
-(defun char-attribute-name< (ka kb)
- (cond
- ((eq '->denotational kb)
- t)
- ((eq '->subsumptive kb)
- (not (eq '->denotational ka)))
- ((eq '->denotational ka)
- nil)
- ((eq '->subsumptive ka)
- nil)
- ((and (symbolp ka)
- (string-match "^->" (symbol-name ka)))
- (cond ((and (symbolp kb)
- (string-match "^->" (symbol-name kb)))
- (string< (symbol-name ka)
- (symbol-name kb))
- ))
- )
- ((and (symbolp kb)
- (string-match "^->" (symbol-name kb)))
- t)
- ((and (symbolp ka)
- (string-match "^<-" (symbol-name ka)))
- (cond ((symbolp kb)
- (cond ((string-match "^<-" (symbol-name kb))
- (string< (symbol-name ka)
- (symbol-name kb))
- )
- ;; ((string-match "^->" (symbol-name kb))
- ;; t)
- )))
- )
- ((and (symbolp kb)
- (string-match "^<-" (symbol-name kb)))
- t
- ;; (not (string-match "^->" (symbol-name ka)))
- )
- ((find-charset ka)
- (if (find-charset kb)
- (if (<= (charset-id ka) 1)
- (if (<= (charset-id kb) 1)
- (cond
- ((= (charset-dimension ka)
- (charset-dimension kb))
- (> (charset-id ka)(charset-id kb)))
- (t
- (> (charset-dimension ka)
- (charset-dimension kb))
- ))
- t)
- (if (<= (charset-id kb) 1)
- nil
- (< (charset-id ka)(charset-id kb))))
- nil))
- ((find-charset kb)
- t)
- ((symbolp ka)
- (cond ((symbolp kb)
- (string< (symbol-name ka)
- (symbol-name kb)))
- (t)))
- ((symbolp kb)
- nil)))
-
(defvar char-db-coded-charset-priority-list
'(ascii
control-1
chinese-cns11643-5
chinese-cns11643-6
chinese-cns11643-7
- =jis-x0213-1-2000
- =jis-x0213-2-2000
+ =jis-x0213-1
+ =jis-x0213-1@2000
+ =jis-x0213-1@2004
+ =jis-x0213-2
korean-ksc5601
chinese-isoir165
katakana-jisx0201
ideograph-daikanwa-2
ideograph-daikanwa
=cbeta
+ =gt-k
ideograph-hanziku-1
ideograph-hanziku-2
ideograph-hanziku-3
ideograph-hanziku-10
ideograph-hanziku-11
ideograph-hanziku-12
- =gt-k
+ =>>jis-x0208
+ =>>jis-x0213-1
+ =>>jis-x0213-1@2000
+ =>>jis-x0213-1@2004
+ =>>jis-x0213-2
+ =>>jis-x0208@1978
+ =>>gt
+ =>jis-x0208@usual
+ =>jis-x0208
+ =>jis-x0208@1997
+ =>jis-x0213-1
+ =>jis-x0213-1@2000
+ =>jis-x0213-1@2004
+ =>jis-x0213-2@usual
+ =>jis-x0213-2
+ ==>ucs@bucs
+ =>ucs@iso
+ =>ucs@unicode
+ =>ucs@jis
+ =>ucs@JP
+ =>ucs@cns
+ =>ucs@ks
+ =>>ucs@unicode
+ =>>ucs@jis
+ =>>ucs@cns
=ucs@iso
=ucs@unicode
+ =>>big5-cdp
+ =>>gt-k
+ =>gt
+ =>big5-cdp
+ =>daikanwa
=big5
=big5-eten
- =jis-x0208@1997
=zinbun-oracle
+ =>zinbun-oracle
=ruimoku-v6
- =jef-china3))
+ =>>ruimoku-v6
+ =jef-china3
+ =shinjigen))
+
+
+;;; @ char-db formatters
+;;;
(defun char-db-make-char-spec (char)
(let (ret char-spec)
=daikanwa@rev2
;; =gt-k
=jis-x0208@1997
- )))
+ ))
+ (string-match "=ucs@" (symbol-name ccs)))
(setq ccs (charset-name ccs))
(null (assq ccs char-spec))
(setq ret (encode-char char ccs 'defined-only)))
((setq ret (get-char-attribute char 'name*))
(setq char-spec (cons (cons 'name* ret) char-spec))
))
+ )
+ ((setq ret (get-char-attribute
+ char 'ideographic-combination))
+ (setq char-spec
+ (cons (cons 'ideographic-combination ret)
+ char-spec))
))
char-spec)
((consp char)
(defun char-db-insert-ccs-feature (name value line-breaking)
(insert
(format
- (cond ((or (memq name '(=daikanwa
+ (cond ((memq name '(=shinjigen
+ =shinjigen@1ed
+ =shinjigen@rev =shinjigen/+p@rev))
+ "(%-18s . %04d)\t; %c")
+ ((eq name '=shinjigen@1ed/24pr)
+ "(%-18s . %04d)\t; %c")
+ ((or (memq name '(=daikanwa
=daikanwa@rev1 =daikanwa@rev2
- =gt =gt-k =cbeta =zinbun-oracle))
+ =>>daikanwa =>daikanwa
+ =gt =>>gt =>gt =gt-k =>>gt-k =cbeta
+ =zinbun-oracle =>zinbun-oracle))
(string-match "^=adobe-" (symbol-name name)))
"(%-18s . %05d)\t; %c")
((eq name 'mojikyo)
(setq required-features nil)
(dolist (source sources)
(cond
- ((memq source '(JP JP/Jouyou shinjigen-1))
+ ((memq source '(JP
+ JP/Jouyou
+ shinjigen shinjigen@1ed shinjigen@rev))
(setq required-features
(union required-features
'(=jis-x0208
=jis-x0208@1990
- =jis-x0213-1-2000
- =jis-x0213-2-2000
+ =jis-x0213-1@2000
+ =jis-x0213-1@2004
+ =jis-x0213-2
=jis-x0212
=jis-x0208@1983
- =jis-x0208@1978))))
+ =jis-x0208@1978
+ =shinjigen))))
((eq source 'CN)
(setq required-features
(union required-features
name value (decode-char '=ucs value)
line-breaking))
(setq attributes (delq name attributes))))
- (dolist (name '(=>ucs@gb =>ucs@cns =>ucs@jis =>ucs@ks =>ucs@big5))
+ (dolist (name '(=>ucs@gb =>ucs@big5))
(when (and (memq name attributes)
(setq value (get-char-attribute char name)))
(insert (format "(%-18s . #x%04X)\t; %c%s"
line-breaking))
(setq attributes (delq name attributes))
))
- (dolist (name '(=>daikanwa))
- (when (and (memq name attributes)
- (setq value (get-char-attribute char name)))
- (insert
- (if (integerp value)
- (format "(%-18s . %05d)\t; %c%s"
- name value (decode-char '=daikanwa value)
- line-breaking)
- (format "(%-18s %s)\t; %c%s"
- name
- (mapconcat (function prin1-to-string)
- value " ")
- (char-representative-of-daikanwa char)
- line-breaking)))
- (setq attributes (delq name attributes))))
+ ;; (dolist (name '(=>daikanwa))
+ ;; (when (and (memq name attributes)
+ ;; (setq value (get-char-attribute char name)))
+ ;; (insert
+ ;; (if (integerp value)
+ ;; (format "(%-18s . %05d)\t; %c%s"
+ ;; name value (decode-char '=daikanwa value)
+ ;; line-breaking)
+ ;; (format "(%-18s %s)\t; %c%s"
+ ;; name
+ ;; (mapconcat (function prin1-to-string)
+ ;; value " ")
+ ;; (char-representative-of-daikanwa char)
+ ;; line-breaking)))
+ ;; (setq attributes (delq name attributes))))
(when (and (memq 'general-category attributes)
(setq value (get-char-attribute char 'general-category)))
(insert (format
what-character-original-window-configuration)
(signal (car err) (cdr err)))))))
+
+;;; @ end
+;;;
+
(provide 'char-db-util)
;;; char-db-util.el ends here