Reformatted.
[chise/xemacs-chise.git.1] / lisp / utf-2000 / char-db-util.el
index dea1a15..d25d65b 100644 (file)
@@ -1,7 +1,7 @@
 ;;; char-db-util.el --- Character Database utility -*- coding: utf-8-er; -*-
 
 ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
-;;   2007, 2008, 2009 MORIOKA Tomohiko.
+;;   2007, 2008, 2009, 2010, 2011 MORIOKA Tomohiko.
 
 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
@@ -25,7 +25,8 @@
 
 ;;; Code:
 
-(require 'alist)
+(require 'chise-subr)
+(require 'ideograph-subr)
 
 (defconst unidata-normative-category-alist
   '(("Lu" letter       uppercase)
     ("So" symbol       other)
     ))
 
-(defconst ideographic-radicals
-  (let ((v (make-vector 215 nil))
-       (i 1))
-    (while (< i 215)
-      (aset v i (decode-char '=ucs (+ #x2EFF i)))
-      (setq i (1+ i)))
-    v))
-
-(defun ideographic-radical (number)
-  (aref ideographic-radicals number))
-
 (defconst shuowen-radicals
   [?一 ?上 ?示 ?三 ?王 ?玉 ?玨 ?气 ?士 ?丨 ; 010
    ?屮 ?艸 ?蓐 ?茻 ?小 ?八 ?釆 ?半 ?牛 ?犛 ; 020
 
 (defvar char-db-file-coding-system 'utf-8-mcs-er)
 
-(defvar char-db-feature-domains
-  '(ucs ucs/compat daikanwa cns gt jis jis/alt jis/a jis/b
-       jis-x0212 jis-x0213 cdp shinjigen misc unknown))
-
 (defvar char-db-ignored-attributes '(ideographic-products))
 
-(defun char-attribute-name< (ka kb)
-  (cond
-   ((eq '->denotational kb)
-    t)
-   ((eq '->subsumptive kb)
-    (not (eq '->denotational ka)))
-   ((eq '->denotational ka)
-    nil)
-   ((eq '->subsumptive ka)
-    nil)
-   ((and (symbolp ka)
-        (string-match "^->" (symbol-name ka)))
-    (cond ((and (symbolp kb)
-               (string-match "^->" (symbol-name kb)))
-          (string< (symbol-name ka)
-                   (symbol-name kb))
-          ))
-    )
-   ((and (symbolp kb)
-        (string-match "^->" (symbol-name kb)))
-    t)
-   ((and (symbolp ka)
-        (string-match "^<-" (symbol-name ka)))
-    (cond ((symbolp kb)
-          (cond ((string-match "^<-" (symbol-name kb))
-                 (string< (symbol-name ka)
-                          (symbol-name kb))
-                 )
-                 ;; ((string-match "^->" (symbol-name kb))
-                 ;;  t)
-                )))
-    )
-   ((and (symbolp kb)
-        (string-match "^<-" (symbol-name kb)))
-    t
-    ;; (not (string-match "^->" (symbol-name ka)))
-    )
-   ((find-charset ka)
-    (if (find-charset kb)
-       (let (a-ir b-ir)
-         (if (and (setq a-ir (charset-property ka 'iso-ir))
-                  (if (= a-ir 177)
-                      (if (= (charset-id ka) -177)
-                          t
-                        (setq a-ir nil))
-                    t))
-             (if (and (setq b-ir (charset-property kb 'iso-ir))
-                      (if (= b-ir 177)
-                          (if (= (charset-id kb) -177)
-                              t
-                            (setq b-ir nil))
-                        t))
-                 (cond
-                  ((= a-ir b-ir)
-                   (< (charset-id ka)(charset-id kb))
-                   )
-                   ;; ((= a-ir 177)
-                   ;;  t)
-                   ;; ((= b-ir 177)
-                   ;;  nil)
-                  ((= (charset-dimension ka)
-                      (charset-dimension kb))
-                   (< a-ir b-ir)
-                   )
-                  ((> (charset-dimension ka)
-                      (charset-dimension kb))
-                   ))
-               t)
-           (if (and (setq b-ir (charset-property kb 'iso-ir))
-                    (if (= b-ir 177)
-                        (if (= (charset-id kb) -177)
-                            t
-                          (setq b-ir nil))
-                      t))
-               nil
-             (< (charset-id ka)(charset-id kb)))))
-      nil)
-    )
-   ((find-charset kb))
-   ((symbolp ka)
-    (cond ((symbolp kb)
-          (string< (symbol-name ka)
-                   (symbol-name kb)))
-         (t)))
-   ((symbolp kb)
-    nil)))
-
 (defvar char-db-coded-charset-priority-list
   '(ascii
     control-1
     chinese-cns11643-5
     chinese-cns11643-6
     chinese-cns11643-7
-    =jis-x0213-1-2000
-    =jis-x0213-2-2000
+    =jis-x0213-1
+    =jis-x0213-1@2000
+    =jis-x0213-1@2004
+    =jis-x0213-2
     korean-ksc5601
     chinese-isoir165
     katakana-jisx0201
     ethiopic-ucs
     =big5-cdp
     =gt
+    =adobe-japan1-0
+    =adobe-japan1-1
+    =adobe-japan1-2
+    =adobe-japan1-3
+    =adobe-japan1-4
+    =adobe-japan1-5
+    =adobe-japan1-6
+    =hanyo-denshi/ja
+    =hanyo-denshi/jb
+    =hanyo-denshi/jc
+    =hanyo-denshi/jd
+    =hanyo-denshi/ft
+    =hanyo-denshi/ia
+    =hanyo-denshi/ib
+    =hanyo-denshi/hg
     ideograph-daikanwa-2
     ideograph-daikanwa
     =cbeta
+    =gt-k
     ideograph-hanziku-1
     ideograph-hanziku-2
     ideograph-hanziku-3
     ideograph-hanziku-10
     ideograph-hanziku-11
     ideograph-hanziku-12
-    =gt-k
+    =>>jis-x0208
+    =>>jis-x0213-1
+    =>>jis-x0213-1@2000
+    =>>jis-x0213-1@2004
+    =>>jis-x0213-2
+    =>>jis-x0208@1978
+    =>>hanyo-denshi/ft
+    =>>gt
+    =>jis-x0208@usual
+    =>jis-x0208
+    =>jis-x0208@1997
+    =>jis-x0213-1
+    =>jis-x0213-1@2000
+    =>jis-x0213-1@2004
+    =>jis-x0213-2@usual
+    =>jis-x0213-2
+    ==>ucs@bucs
+    =>ucs@iso
+    =>ucs@unicode
+    =>ucs@jis
+    =>ucs@JP
+    =>ucs@cns
+    =>ucs@ks
+    =>>ucs@unicode
+    =>>ucs@jis
+    =>>ucs@cns
     =ucs@iso
     =ucs@unicode
+    =>>big5-cdp
+    =>>gt-k
+    =>gt
+    =>big5-cdp
+    =>daikanwa
     =big5
     =big5-eten
-    =jis-x0208@1997
     =zinbun-oracle
+    =>zinbun-oracle
     =ruimoku-v6
-    =jef-china3))
+    =>>ruimoku-v6
+    =jef-china3
+    =shinjigen))
+
+
+;;; @ char-db formatters
+;;;
 
 (defun char-db-make-char-spec (char)
   (let (ret char-spec)
                                         ;; =gt-k
                                         =jis-x0208@1997
                                         ))
-                                 ;; (string-match "=ucs@" (symbol-name ccs))
-                                )
+                                (string-match "=ucs@" (symbol-name ccs)))
                             (setq ccs (charset-name ccs))
                             (null (assq ccs char-spec))
                             (setq ret (encode-char char ccs 'defined-only)))
    (format
     (cond ((memq name '(=shinjigen
                        =shinjigen@1ed
-                       =shinjigen@rev =shinjigen/+p@rev))
+                       =shinjigen@rev =shinjigen/+p@rev
+                       =daikanwa/ho))
           "(%-18s .  %04d)\t; %c")
          ((eq name '=shinjigen@1ed/24pr)
           "(%-18s . %04d)\t; %c")
          ((or (memq name '(=daikanwa
                            =daikanwa@rev1 =daikanwa@rev2
-                           =gt =gt-k =cbeta =zinbun-oracle))
+                           =daikanwa/+p =daikanwa/+2p
+                           =>>daikanwa =>daikanwa
+                           =gt =>>gt =>gt =gt-k =>>gt-k =cbeta
+                           =zinbun-oracle =>zinbun-oracle))
               (string-match "^=adobe-" (symbol-name name)))
           "(%-18s . %05d)\t; %c")
-         ((eq name 'mojikyo)
+         ((memq name '(=hanyo-denshi/ks mojikyo))
           "(%-18s . %06d)\t; %c")
          ((>= (charset-dimension name) 2)
           "(%-18s . #x%04X)\t; %c")
                  (union required-features
                         '(=jis-x0208
                           =jis-x0208@1990
-                          =jis-x0213-1-2000
-                          =jis-x0213-2-2000
+                          =jis-x0213-1@2000
+                          =jis-x0213-1@2004
+                          =jis-x0213-2
                           =jis-x0212
                           =jis-x0208@1983
                           =jis-x0208@1978
                        name value (decode-char '=ucs value)
                        line-breaking))
        (setq attributes (delq name attributes))))
-    (dolist (name '(=>ucs@gb =>ucs@cns =>ucs@jis =>ucs@ks =>ucs@big5))
+    (dolist (name '(=>ucs@gb =>ucs@big5))
       (when (and (memq name attributes)
                 (setq value (get-char-attribute char name)))
        (insert (format "(%-18s . #x%04X)\t; %c%s"
                        line-breaking))
        (setq attributes (delq name attributes))
        ))
-    (dolist (name '(=>daikanwa))
-      (when (and (memq name attributes)
-                (setq value (get-char-attribute char name)))
-       (insert
-        (if (integerp value)
-            (format "(%-18s . %05d)\t; %c%s"
-                    name value (decode-char '=daikanwa value)
-                    line-breaking)
-          (format "(%-18s %s)\t; %c%s"
-                  name
-                  (mapconcat (function prin1-to-string)
-                             value " ")
-                  (char-representative-of-daikanwa char)
-                  line-breaking)))
-       (setq attributes (delq name attributes))))
+    ;; (dolist (name '(=>daikanwa))
+    ;;   (when (and (memq name attributes)
+    ;;              (setq value (get-char-attribute char name)))
+    ;;     (insert
+    ;;      (if (integerp value)
+    ;;          (format "(%-18s . %05d)\t; %c%s"
+    ;;                  name value (decode-char '=daikanwa value)
+    ;;                  line-breaking)
+    ;;        (format "(%-18s %s)\t; %c%s"
+    ;;                name
+    ;;                (mapconcat (function prin1-to-string)
+    ;;                           value " ")
+    ;;                (char-representative-of-daikanwa char)
+    ;;                line-breaking)))
+    ;;     (setq attributes (delq name attributes))))
     (when (and (memq 'general-category attributes)
               (setq value (get-char-attribute char 'general-category)))
       (insert (format
                     (eq name 'ideographic-combination)
                     (eq name 'ideographic-)
                     (eq name '=decomposition)
-                    (string-match "^=>decomposition" (symbol-name name))
+                    (string-match "^=>*decomposition\\(@[^*]+\\)?$"
+                                  (symbol-name name))
                     (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name name))
                     (string-match "^\\(->\\|<-\\)[^*]*\\*sources$"
                                   (symbol-name name))
                what-character-original-window-configuration)
               (signal (car err) (cdr err)))))))
 
+
+;;; @ end
+;;;
+
 (provide 'char-db-util)
 
 ;;; char-db-util.el ends here