;;; char-db-util.el --- Character Database utility -*- coding: utf-8-er; -*-
-;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
-;; 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 MORIOKA Tomohiko.
+;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,
+;; 2008,2009,2010,2011,2012,2013,2014,2015,2016,2017 MORIOKA Tomohiko.
;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
?儿 ?兄 ?兂 ?皃 ?𠑹 ?先 ?秃 ?見 ?覞 ?欠 ; 320
?㱃 ?㳄 ?旡 ?頁 ?𦣻 ?面 ?丏 ?首 ?𥄉 ?須 ; 330
?彡 ?彣 ?文 ?髟 ?后 ?司 ?卮 ?卩 ?印 ?色 ; 340
+ ?𠨍 ?辟 ?勹 ?包 ?茍 ?鬼 ?甶 ?厶 ?嵬 ?山 ; 350
+ ?屾 ?屵 ?广 ?厂 ?丸 ?危 ?石 ?長 ?勿 ?冄 ; 360
+ ?而 ?豕 ?㣇 ?彑 ?豚 ?豸 ?𤉡 ?易 ?象 ?馬 ; 370
+ ?𢊁 ?鹿 ?麤 ?㲋 ?兔 ?萈 ?犬 ?㹜 ?鼠 ?能 ; 380
+ ?熊 ?火 ?炎 ?黑 ?囪 ?焱 ?炙 ?赤 ?大 ?亦 ; 390
])
(defun shuowen-radical (number)
=hanyo-denshi/hg
=hanyo-denshi/jt
=hanyo-denshi/ks
+ =hanyo-denshi/tk
;; ==mj
;; ==adobe-japan1-0
;; ==adobe-japan1-1
==jis-x0208
==jis-x0213-1
==jis-x0213-2
+ ==jis-x0212
==hanyo-denshi/ja
==hanyo-denshi/jb
==hanyo-denshi/jc
==hanyo-denshi/ft
+ ==hanyo-denshi/ia
==hanyo-denshi/ib
==hanyo-denshi/hg
==hanyo-denshi/jt
==hanyo-denshi/ks
+ ==hanyo-denshi/tk
=gt
=gt-k
=daikanwa
=daikanwa@rev2
=daikanwa@rev1
+ =daikanwa/+p
==daikanwa
=cbeta
ideograph-hanziku-1
=>jis-x0213-1@2004
=>jis-x0213-2
==>ucs@bucs
+ =>iwds-1
=>ucs@hanyo-denshi
=>ucs@iso
=>ucs@unicode
==gt
==jis-x0208@1990
;; ==jis-x0208@1983
- ;; ==jis-x0208@1978
+ ==jis-x0208@1978
==gt-k
=ucs@iso
=ucs@unicode
=>>gt-k
=+>gt
=>gt
+ =>mj
=>big5-cdp
=>daikanwa
=>daikanwa/ho
+ =>cns11643-5
=>cns11643-7
=big5
=big5-eten
=>zinbun-oracle
=ruimoku-v6
=>>ruimoku-v6
+ ==ruimoku-v6
=jef-china3
+ =>cbeta
=shinjigen
- =big5-cdp-var-3
- =big5-cdp-var-5))
+ =ucs-var-001
+ =ucs-var-002
+ =ucs-var-003
+ =ucs-var-004
+ =ucs-var-005
+ =ucs-var-006
+ =ucs-var-008
+ =ucs-var-010
+ =ucs-itaiji-001
+ =ucs-itaiji-002
+ =ucs-itaiji-003
+ =ucs-itaiji-004
+ =ucs-itaiji-005
+ =ucs-itaiji-006
+ =ucs-itaiji-007
+ =ucs-itaiji-008
+ =ucs-itaiji-009
+ =ucs-itaiji-011
+ =>ucs-itaiji-001
+ =>ucs-itaiji-002
+ =>ucs-itaiji-003
+ =>ucs-itaiji-005
+ =>ucs-itaiji-006
+ =>ucs-itaiji-007
+ =big5-cdp-var-001
+ =big5-cdp-var-002
+ =big5-cdp-var-003
+ =big5-cdp-var-004
+ =big5-cdp-var-005
+ =big5-cdp-itaiji-001
+ =big5-cdp-itaiji-002
+ =big5-cdp-itaiji-003
+ =>big5-cdp-itaiji-001
+ =>ucs@iwds-1
+ =>ucs@component
+ ==ucs@gb
+ =ucs@gb
+ =ucs@JP/hanazono))
;;; @ char-db formatters
(decode-builtin-char '=gt ret))
(t
(decode-builtin-char ccs code-point))))
- (cond ((and (<= 0 (char-int ret))
+ (cond ((null ret)
+ (or (decode-char ccs code-point)
+ (define-char (list (cons ccs code-point)))))
+ ((and (<= 0 (char-int ret))
(<= (char-int ret) #x1F))
(decode-char '=ucs (+ #x2400 (char-int ret))))
((= (char-int ret) #x7F)
(insert
(format
(cond
- ((memq name '(==shinjigen
+ ((memq name '(=>iwds-1
+ ==shinjigen
=shinjigen
=shinjigen@1ed ==shinjigen@1ed
=shinjigen@rev ==shinjigen@rev
"(%-18s . %04d)\t; %c")
((eq name '=shinjigen@1ed/24pr)
"(%-18s . %04d)\t; %c")
- ((or (memq name '(===daikanwa
- ==daikanwa =daikanwa =>>daikanwa =>daikanwa
- =daikanwa@rev1 =daikanwa@rev2
- =daikanwa/+p ==daikanwa/+p =>>daikanwa/+p
- =daikanwa/+2p =>>daikanwa/+2p
- =gt ==gt ===gt
- =>>gt =+>gt =>gt
- =gt-k ==gt-k ===gt-k
- =>>gt-k =>gt-k
- =adobe-japan1-0 ==adobe-japan1-0 ===adobe-japan1-0
- =adobe-japan1-1 ==adobe-japan1-1 ===adobe-japan1-1
- =adobe-japan1-2 ==adobe-japan1-2 ===adobe-japan1-2
- =adobe-japan1-3 ==adobe-japan1-3 ===adobe-japan1-3
- =adobe-japan1-4 ==adobe-japan1-4 ===adobe-japan1-4
- =adobe-japan1-5 ==adobe-japan1-5 ===adobe-japan1-5
- =adobe-japan1-6 ==adobe-japan1-6 ===adobe-japan1-6
- =>>adobe-japan1-0 =+>adobe-japan1-0
- =>>adobe-japan1-1 =+>adobe-japan1-1
- =>>adobe-japan1-2 =+>adobe-japan1-2
- =>>adobe-japan1-3 =+>adobe-japan1-3
- =>>adobe-japan1-4 =+>adobe-japan1-4
- =>>adobe-japan1-5 =+>adobe-japan1-5
- =>>adobe-japan1-6 =+>adobe-japan1-6
- =cbeta ==cbeta ===cbeta =>>cbeta
- =zinbun-oracle =>zinbun-oracle))
- ;; (string-match "^=adobe-" (symbol-name name))
- )
+ ((or
+ (memq name
+ '(===daikanwa
+ ==daikanwa =daikanwa =>>daikanwa =>daikanwa
+ =daikanwa@rev1 =daikanwa@rev2
+ =daikanwa/+p ==daikanwa/+p ===daikanwa/+p
+ =>>daikanwa/+p
+ =daikanwa/+2p =>>daikanwa/+2p
+ =gt ==gt ===gt
+ =>>gt =+>gt =>gt
+ =gt-k ==gt-k ===gt-k
+ =>>gt-k =>gt-k
+ =adobe-japan1-0 ==adobe-japan1-0 ===adobe-japan1-0
+ =adobe-japan1-1 ==adobe-japan1-1 ===adobe-japan1-1
+ =adobe-japan1-2 ==adobe-japan1-2 ===adobe-japan1-2
+ =adobe-japan1-3 ==adobe-japan1-3 ===adobe-japan1-3
+ =adobe-japan1-4 ==adobe-japan1-4 ===adobe-japan1-4
+ =adobe-japan1-5 ==adobe-japan1-5 ===adobe-japan1-5
+ =adobe-japan1-6 ==adobe-japan1-6 ===adobe-japan1-6
+ =>>adobe-japan1-0 =+>adobe-japan1-0
+ =>>adobe-japan1-1 =+>adobe-japan1-1
+ =>>adobe-japan1-2 =+>adobe-japan1-2
+ =>>adobe-japan1-3 =+>adobe-japan1-3
+ =>>adobe-japan1-4 =+>adobe-japan1-4
+ =>>adobe-japan1-5 =+>adobe-japan1-5
+ =>>adobe-japan1-6 =+>adobe-japan1-6
+ =>cbeta =cbeta =>>cbeta ==cbeta ===cbeta
+ =zinbun-oracle =>zinbun-oracle
+ ===hng-jou ===hng-keg ===hng-dng ===hng-mam
+ ===hng-drt ===hng-kgk ===hng-myz ===hng-kda
+ ===hng-khi ===hng-khm ===hng-hok ===hng-kyd ===hng-sok
+ ===hng-yhk ===hng-kak ===hng-kar ===hng-kae
+ ===hng-sys ===hng-tsu ===hng-tzj
+ ===hng-hos ===hng-nak ===hng-jhk
+ ===hng-hod ===hng-gok ===hng-ink ===hng-nto
+ ===hng-nkm ===hng-k24 ===hng-nkk
+ ===hng-kcc ===hng-kcj ===hng-kbk ===hng-sik
+ ===hng-skk ===hng-kyu ===hng-ksk ===hng-wan
+ ===hng-okd ===hng-wad ===hng-kmi
+ ===hng-zkd ===hng-doh ===hng-jyu
+ ===hng-tzs ===hng-kss ===hng-kyo
+ ===hng-smk))
+ ;; (string-match "^=adobe-" (symbol-name name))
+ )
"(%-18s . %05d)\t; %c")
((memq name '(=hanyo-denshi/ks
==hanyo-denshi/ks ===hanyo-denshi/ks
=>>hanyo-denshi/ks
- =koseki
- =mj ==mj ===mj =>>mj
+ =koseki ==koseki
+ =mj ==mj ===mj =>>mj =>mj
=zihai mojikyo))
"(%-18s . %06d)\t; %c")
+ ((memq name '(=hanyo-denshi/tk ==hanyo-denshi/tk))
+ "(%-18s . %08d)\t; %c")
((>= (charset-dimension name) 2)
"(%-18s . #x%04X)\t; %c")
(t
line-breaking
ccss readable)
(setq attributes (delq '<-denotational attributes)))
+ (when (and (memq '<-denotational@component attributes)
+ (setq value (get-char-attribute char '<-denotational@component)))
+ (char-db-insert-relation-feature char '<-denotational@component value
+ line-breaking
+ ccss readable)
+ (setq attributes (delq '<-denotational@component attributes)))
(when (and (memq 'name attributes)
(setq value (get-char-attribute char 'name)))
(insert (format
(setq strokes value)))
(setq attributes (delq 'kangxi-strokes attributes))
)
- (when (and (memq 'japanese-radical attributes)
- (setq value (get-char-attribute char 'japanese-radical)))
- (unless (eq value radical)
- (insert (format "(japanese-radical\t . %S)\t; %c%s"
- value
- (ideographic-radical value)
- line-breaking))
- (or radical
- (setq radical value)))
- (setq attributes (delq 'japanese-radical attributes))
- )
+ ;; (when (and (memq 'japanese-radical attributes)
+ ;; (setq value (get-char-attribute char 'japanese-radical)))
+ ;; (unless (eq value radical)
+ ;; (insert (format "(japanese-radical\t . %S)\t; %c%s"
+ ;; value
+ ;; (ideographic-radical value)
+ ;; line-breaking))
+ ;; (or radical
+ ;; (setq radical value)))
+ ;; (setq attributes (delq 'japanese-radical attributes))
+ ;; )
(when (and (memq 'japanese-strokes attributes)
(setq value (get-char-attribute char 'japanese-strokes)))
(unless (eq value strokes)
;; (setq radical value)))
;; (setq attributes (delq 'shinjigen-1-radical attributes))
;; )
- (when (and (memq 'ideographic- attributes)
- (setq value (get-char-attribute char 'ideographic-)))
- (insert "(ideographic- ")
- (setq lbs (concat "\n" (make-string (current-column) ?\ ))
- separator nil)
- (while (consp value)
- (setq cell (car value))
- (if (integerp cell)
- (setq cell (decode-char '=ucs cell)))
- (cond ((characterp cell)
- (if separator
- (insert lbs))
- (if readable
- (insert (format "%S" cell))
- (char-db-insert-char-spec cell readable))
- (setq separator lbs))
- ((consp cell)
- (if separator
- (insert lbs))
- (if (consp (car cell))
- (char-db-insert-char-spec cell readable)
- (char-db-insert-char-reference cell readable))
- (setq separator lbs))
- (t
- (if separator
- (insert separator))
- (insert (prin1-to-string cell))
- (setq separator " ")))
- (setq value (cdr value)))
- (insert ")")
- (insert line-breaking)
- (setq attributes (delq 'ideographic- attributes)))
+ ;; (when (and (memq 'ideographic- attributes)
+ ;; (setq value (get-char-attribute char 'ideographic-)))
+ ;; (insert "(ideographic- ")
+ ;; (setq lbs (concat "\n" (make-string (current-column) ?\ ))
+ ;; separator nil)
+ ;; (while (consp value)
+ ;; (setq cell (car value))
+ ;; (if (integerp cell)
+ ;; (setq cell (decode-char '=ucs cell)))
+ ;; (cond ((characterp cell)
+ ;; (if separator
+ ;; (insert lbs))
+ ;; (if readable
+ ;; (insert (format "%S" cell))
+ ;; (char-db-insert-char-spec cell readable))
+ ;; (setq separator lbs))
+ ;; ((consp cell)
+ ;; (if separator
+ ;; (insert lbs))
+ ;; (if (consp (car cell))
+ ;; (char-db-insert-char-spec cell readable)
+ ;; (char-db-insert-char-reference cell readable))
+ ;; (setq separator lbs))
+ ;; (t
+ ;; (if separator
+ ;; (insert separator))
+ ;; (insert (prin1-to-string cell))
+ ;; (setq separator " ")))
+ ;; (setq value (cdr value)))
+ ;; (insert ")")
+ ;; (insert line-breaking)
+ ;; (setq attributes (delq 'ideographic- attributes)))
(when (and (memq 'total-strokes attributes)
(setq value (get-char-attribute char 'total-strokes)))
(insert (format "(total-strokes . %S)%s"
line-breaking))
(setq attributes (delq 'total-strokes attributes))
)
- (when (and (memq '->ideograph attributes)
- (setq value (get-char-attribute char '->ideograph)))
- (insert (format "(->ideograph\t%s)%s"
- (mapconcat (lambda (code)
- (cond ((symbolp code)
- (symbol-name code))
- ((integerp code)
- (format "#x%04X" code))
- (t
- (format "%s %S"
- line-breaking code))))
- value " ")
- line-breaking))
- (setq attributes (delq '->ideograph attributes))
- )
+ ;; (when (and (memq '->ideograph attributes)
+ ;; (setq value (get-char-attribute char '->ideograph)))
+ ;; (insert (format "(->ideograph\t%s)%s"
+ ;; (mapconcat (lambda (code)
+ ;; (cond ((symbolp code)
+ ;; (symbol-name code))
+ ;; ((integerp code)
+ ;; (format "#x%04X" code))
+ ;; (t
+ ;; (format "%s %S"
+ ;; line-breaking code))))
+ ;; value " ")
+ ;; line-breaking))
+ ;; (setq attributes (delq '->ideograph attributes))
+ ;; )
;; (when (and (memq '->decomposition attributes)
;; (setq value (get-char-attribute char '->decomposition)))
;; (insert (format "(->decomposition\t%s)%s"
line-breaking))
(setq attributes (delq '->mojikyo attributes))
)
- (when (and (memq 'hanyu-dazidian-vol attributes)
- (setq value (get-char-attribute char 'hanyu-dazidian-vol)))
- (insert (format "(hanyu-dazidian-vol . %d)%s"
- value line-breaking))
- (setq attributes (delq 'hanyu-dazidian-vol attributes))
- )
- (when (and (memq 'hanyu-dazidian-page attributes)
- (setq value (get-char-attribute char 'hanyu-dazidian-page)))
- (insert (format "(hanyu-dazidian-page . %d)%s"
- value line-breaking))
- (setq attributes (delq 'hanyu-dazidian-page attributes))
- )
- (when (and (memq 'hanyu-dazidian-char attributes)
- (setq value (get-char-attribute char 'hanyu-dazidian-char)))
- (insert (format "(hanyu-dazidian-char . %d)%s"
- value line-breaking))
- (setq attributes (delq 'hanyu-dazidian-char attributes))
- )
+ ;; (when (and (memq 'hanyu-dazidian-vol attributes)
+ ;; (setq value (get-char-attribute char 'hanyu-dazidian-vol)))
+ ;; (insert (format "(hanyu-dazidian-vol . %d)%s"
+ ;; value line-breaking))
+ ;; (setq attributes (delq 'hanyu-dazidian-vol attributes))
+ ;; )
+ ;; (when (and (memq 'hanyu-dazidian-page attributes)
+ ;; (setq value (get-char-attribute char 'hanyu-dazidian-page)))
+ ;; (insert (format "(hanyu-dazidian-page . %d)%s"
+ ;; value line-breaking))
+ ;; (setq attributes (delq 'hanyu-dazidian-page attributes))
+ ;; )
+ ;; (when (and (memq 'hanyu-dazidian-char attributes)
+ ;; (setq value (get-char-attribute char 'hanyu-dazidian-char)))
+ ;; (insert (format "(hanyu-dazidian-char . %d)%s"
+ ;; value line-breaking))
+ ;; (setq attributes (delq 'hanyu-dazidian-char attributes))
+ ;; )
(unless readable
(dolist (ignored '(composition
->denotational <-subsumptive ->ucs-unified
)
((or (eq name 'ideographic-structure)
(eq name 'ideographic-combination)
- (eq name 'ideographic-)
+ ;; (eq name 'ideographic-)
(eq name '=decomposition)
(char-feature-base-name= '=decomposition name)
(char-feature-base-name= '=>decomposition name)
(char-db-insert-relation-feature char name value
line-breaking
ccss readable))
- ((memq name '(ideograph=
- original-ideograph-of
- ancient-ideograph-of
- vulgar-ideograph-of
- wrong-ideograph-of
- ;; simplified-ideograph-of
- ideographic-variants
- ;; ideographic-different-form-of
- ))
- (insert (format "(%-18s%s " name line-breaking))
- (setq lbs (concat "\n" (make-string (current-column) ?\ ))
- separator nil)
- (while (consp value)
- (setq cell (car value))
- (if (and (consp cell)
- (consp (car cell)))
- (progn
- (if separator
- (insert lbs))
- (char-db-insert-alist cell readable)
- (setq separator lbs))
- (if separator
- (insert separator))
- (insert (prin1-to-string cell))
- (setq separator " "))
- (setq value (cdr value)))
- (insert ")")
- (insert line-breaking))
+ ;; ((memq name '(ideograph=
+ ;; original-ideograph-of
+ ;; ancient-ideograph-of
+ ;; vulgar-ideograph-of
+ ;; wrong-ideograph-of
+ ;; ;; simplified-ideograph-of
+ ;; ideographic-variants
+ ;; ;; ideographic-different-form-of
+ ;; ))
+ ;; (insert (format "(%-18s%s " name line-breaking))
+ ;; (setq lbs (concat "\n" (make-string (current-column) ?\ ))
+ ;; separator nil)
+ ;; (while (consp value)
+ ;; (setq cell (car value))
+ ;; (if (and (consp cell)
+ ;; (consp (car cell)))
+ ;; (progn
+ ;; (if separator
+ ;; (insert lbs))
+ ;; (char-db-insert-alist cell readable)
+ ;; (setq separator lbs))
+ ;; (if separator
+ ;; (insert separator))
+ ;; (insert (prin1-to-string cell))
+ ;; (setq separator " "))
+ ;; (setq value (cdr value)))
+ ;; (insert ")")
+ ;; (insert line-breaking))
((consp value)
(insert (format "(%-18s " name))
(setq lbs (concat "\n" (make-string (current-column) ?\ ))