;;; char-db-util.el --- Character Database utility -*- coding: utf-8-er; -*-
-;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004,2005 MORIOKA Tomohiko.
+;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+;; 2007 MORIOKA Tomohiko.
;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
(aref ideographic-radicals number))
(defconst shuowen-radicals
- [?一 ?上 ?示 ?三 ?王 ?玉 ?玨 ?气 ?士 ?丨 ?屮 ?艸 ?茻])
+ [?一 ?上 ?示 ?三 ?王 ?玉 ?玨 ?气 ?士 ?丨
+ ?屮 ?艸 ?蓐 ?茻 ?小 ?八 ?釆 ?半 ?牛 ?犛
+ ?告 ?口 ?凵 ?吅 ?哭 ?走 ?止 ?癶 ?步 ?此
+ ?正 ?是 ?辵 ?彳 ?廴 ?𢓊 ?行 ?齒 ?牙 ?足
+ ?疋 ?品 ?龠 ?冊 ?㗊 ?舌 ?干 ?谷 ?只 ?㕯
+ ?句 ?丩 ?古 ?十 ?卅 ?言 ?誩 ?音 ?䇂 ?丵
+ ?菐 ?𠬞 ?廾 ?共 ?異 ?舁 ?𦥑 ?䢅 ?爨 ?革
+ ?鬲 ?䰜 ?爪 ?𠃨 ?鬥 ?又 ?𠂇 ?㕜 ?支 ?𦘒
+ ?聿 ?畫 ?隶 ?堅 ?臣 ?殳 ?殺 ?𠘧 ?寸 ?皮
+ ?㼱 ?攴 ?敎 ?卜 ?用 ?爻 ?㸚 ?𥄎 ?目 ?䀠
+ ?眉 ?盾 ?自 ?白 ?鼻 ?皕 ?習 ?羽 ?隹 ?奞
+ ?萑 ?𦫳 ?苜 ?羊 ?羴 ?瞿 ?雔 ?雥 ?鳥])
(defun shuowen-radical (number)
(aref shuowen-radicals (1- number)))
(defvar char-db-file-coding-system 'utf-8-mcs-er)
(defvar char-db-feature-domains
- '(ucs daikanwa cns gt jis jis/alt jis/a jis/b
+ '(ucs ucs/compat daikanwa cns gt jis jis/alt jis/a jis/b
jis-x0212 jis-x0213 cdp shinjigen misc unknown))
(defvar char-db-ignored-attributes '(ideographic-products))
nil)
((eq '->subsumptive ka)
nil)
+ ((and (symbolp ka)
+ (string-match "^->" (symbol-name ka)))
+ (cond ((and (symbolp kb)
+ (string-match "^->" (symbol-name kb)))
+ (string< (symbol-name ka)
+ (symbol-name kb))
+ ))
+ )
+ ((and (symbolp kb)
+ (string-match "^->" (symbol-name kb)))
+ t)
+ ((and (symbolp ka)
+ (string-match "^<-" (symbol-name ka)))
+ (cond ((symbolp kb)
+ (cond ((string-match "^<-" (symbol-name kb))
+ (string< (symbol-name ka)
+ (symbol-name kb))
+ )
+ ;; ((string-match "^->" (symbol-name kb))
+ ;; t)
+ )))
+ )
+ ((and (symbolp kb)
+ (string-match "^<-" (symbol-name kb)))
+ t
+ ;; (not (string-match "^->" (symbol-name ka)))
+ )
((find-charset ka)
(if (find-charset kb)
(if (<= (charset-id ka) 1)
=jis-x0208@1983
japanese-jisx0212
chinese-gb2312
+ =jis-x0208@1990
chinese-cns11643-1
chinese-cns11643-2
chinese-cns11643-3
chinese-cns11643-5
chinese-cns11643-6
chinese-cns11643-7
- =jis-x0208@1990
=jis-x0213-1-2000
=jis-x0213-2-2000
korean-ksc5601
=big5-eten
=jis-x0208@1997
=zinbun-oracle
+ =ruimoku-v6
=jef-china3))
(defun char-db-make-char-spec (char)
'(=daikanwa
=daikanwa@rev2
;; =gt-k
+ =jis-x0208@1997
)))
(setq ccs (charset-name ccs))
(null (assq ccs char-spec))
(defun char-db-insert-ccs-feature (name value line-breaking)
(insert
(format
- (cond ((memq name '(=daikanwa
- =daikanwa@rev1 =daikanwa@rev2
- =gt =gt-k =cbeta =zinbun-oracle))
+ (cond ((or (memq name '(=daikanwa
+ =daikanwa@rev1 =daikanwa@rev2
+ =gt =gt-k =cbeta =zinbun-oracle))
+ (string-match "^=adobe-" (symbol-name name)))
"(%-18s . %05d)\t; %c")
((eq name 'mojikyo)
"(%-18s . %06d)\t; %c")
#'char-attribute-name<)))
(insert "(")
(when (memq '<-subsumptive attributes)
- (unless readable
+ (when readable
(when (setq value (get-char-attribute char '<-subsumptive))
(char-db-insert-relation-feature char '<-subsumptive value
line-breaking
name value
line-breaking))
)
- ((and (not readable)
- (null (get-char-attribute
- char
- (intern (format "%s*sources" name))))
- (not (string-match "\\*sources$" (symbol-name name)))
- (or (eq name '<-identical)
- (string-match "^->compat" (symbol-name name))
- (string-match "^->halfwidth" (symbol-name name))
- (and
- (string-match "^->fullwidth" (symbol-name name))
- (not
- (and (consp value)
- (characterp (car value))
- (encode-char
- (car value) '=ucs 'defined-only))))
- (string-match "^->simplified" (symbol-name name))
- (string-match "^->vulgar" (symbol-name name))
- (string-match "^->wrong" (symbol-name name))
- (string-match "^->same" (symbol-name name))
- (string-match "^->formed" (symbol-name name))
- (string-match "^->original" (symbol-name name))
- (string-match "^->ancient" (symbol-name name))
- (string-match "^->Oracle-Bones" (symbol-name name))
- ))
+ ((and
+ (not readable)
+ (not (eq name '->subsumptive))
+ (not (eq name '->uppercase))
+ (not (eq name '->lowercase))
+ (not (eq name '->titlecase))
+ (not (eq name '->canonical))
+ (not (eq name '->Bopomofo))
+ (not (eq name '->mistakable))
+ (not (eq name '->ideographic-variants))
+ (null (get-char-attribute
+ char (intern (format "%s*sources" name))))
+ (not (string-match "\\*sources$" (symbol-name name)))
+ (null (get-char-attribute
+ char (intern (format "%s*note" name))))
+ (not (string-match "\\*note$" (symbol-name name)))
+ (or (eq name '<-identical)
+ (eq name '<-uppercase)
+ (eq name '<-lowercase)
+ (eq name '<-titlecase)
+ (eq name '<-canonical)
+ (eq name '<-ideographic-variants)
+ ;; (eq name '<-synonyms)
+ (string-match "^<-synonyms" (symbol-name name))
+ (eq name '<-mistakable)
+ (when (string-match "^->" (symbol-name name))
+ (cond
+ ((string-match "^->fullwidth" (symbol-name name))
+ (not (and (consp value)
+ (characterp (car value))
+ (encode-char
+ (car value) '=ucs 'defined-only)))
+ )
+ (t)))
+ ))
)
((or (eq name 'ideographic-structure)
(eq name 'ideographic-combination)
(eq name 'ideographic-)
(eq name '=decomposition)
(string-match "^=>decomposition" (symbol-name name))
- (string-match "^\\(->\\|<-\\)" (symbol-name name)))
+ (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name name))
+ (string-match "^\\(->\\|<-\\)[^*]*\\*sources$"
+ (symbol-name name))
+ )
(char-db-insert-relation-feature char name value
line-breaking
ccss readable))
(insert ")")
(insert line-breaking))
(t
- (insert (format "(%-18s . %S)%s"
- name value
- line-breaking)))
+ (insert (format "(%-18s" name))
+ (setq ret (prin1-to-string value))
+ (unless (< (+ (current-column)
+ (length ret)
+ 3)
+ 76)
+ (insert line-breaking))
+ (insert " . " ret ")" line-breaking)
+ ;; (insert (format "(%-18s . %S)%s"
+ ;; name value
+ ;; line-breaking))
+ )
))
(setq attributes (cdr attributes)))
(insert ")")))