Reformatted.
[chise/xemacs-chise.git.1] / lisp / utf-2000 / char-db-util.el
index 7e9a87d..56da7a9 100644 (file)
@@ -1,6 +1,7 @@
 ;;; char-db-util.el --- Character Database utility -*- coding: utf-8-er; -*-
 
-;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004,2005 MORIOKA Tomohiko.
+;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+;;   2007, 2008 MORIOKA Tomohiko.
 
 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
   (aref ideographic-radicals number))
 
 (defconst shuowen-radicals
-  [?一 ?上 ?示 ?三 ?王 ?玉 ?玨 ?气 ?士 ?丨 ?屮 ?艸 ?茻])
+  [?一 ?上 ?示 ?三 ?王 ?玉 ?玨 ?气 ?士 ?丨 ; 010
+   ?屮 ?艸 ?蓐 ?茻 ?小 ?八 ?釆 ?半 ?牛 ?犛 ; 020
+   ?告 ?口 ?凵 ?吅 ?哭 ?走 ?止 ?癶 ?步 ?此 ; 030
+   ?正 ?是 ?辵 ?彳 ?廴 ?㢟 ?行 ?齒 ?牙 ?足 ; 040
+   ?疋 ?品 ?龠 ?冊 ?㗊 ?舌 ?干 ?谷 ?只 ?㕯 ; 050
+   ?句 ?丩 ?古 ?十 ?卅 ?言 ?誩 ?音 ?䇂 ?丵 ; 060
+   ?菐 ?𠬞 ?廾 ?共 ?異 ?舁 ?𦥑 ?䢅 ?爨 ?革 ; 070
+   ?鬲 ?䰜 ?爪 ?𠃨 ?鬥 ?又 ?𠂇 ?㕜 ?支 ?𦘒 ; 080
+   ?聿 ?畫 ?隶 ?臤 ?臣 ?殳 ?殺 ?𠘧 ?寸 ?皮 ; 090
+   ?㼱 ?攴 ?敎 ?卜 ?用 ?爻 ?㸚 ?𥄎 ?目 ?䀠 ; 100
+   ?眉 ?盾 ?自 ?白 ?鼻 ?皕 ?習 ?羽 ?隹 ?奞 ; 110
+   ?萑 ?𦫳 ?苜 ?羊 ?羴 ?瞿 ?雔 ?雥 ?鳥 ?烏 ; 120
+   ?𠦒 ?冓 ?幺 ?𢆶 ?叀 ?玄 ?予 ?放 ?𠬪 ?𣦼 ; 130
+   ?歺 ?死 ?冎 ?骨 ?肉 ?筋 ?刀 ?刃 ?㓞 ?丰 ; 140
+   ?耒 ?𧢲 ?竹 ?箕 ?丌 ?左 ?工 ?㠭 ?巫 ?甘 ; 150
+   ?曰 ?乃 ?丂 ?可 ?兮 ?号 ?亏 ?旨 ?喜 ?壴 ; 160
+   ?鼓 ?豈 ?豆 ?豊 ?豐 ?䖒 ?虍 ?虎 ?虤 ?皿 ; 170
+   ?𠙴 ?去 ?血 ?丶 ?丹 ?青 ?井 ?皀 ?鬯 ?食 ; 180
+   ?亼 ?會 ?倉 ?入 ?缶 ?矢 ?高 ?冂 ?𩫏 ?京 ; 190
+   ?亯 ?𣆪 ?畗 ?㐭 ?嗇 ?來 ?麥 ?夊 ?舛 ?䑞 ; 200
+   ?韋 ?弟 ?夂 ?久 ?桀 ?木 ?東 ?林 ?才 ?叒 ; 210
+   ?之 ?帀 ?出 ?𣎵 ?生 ?乇 ?𠂹 ?𠌶 ?華 ?𥝌 ; 220
+   ?稽 ?巢 ?桼 ?束 ?㯻 ?囗 ?員 ?貝 ?邑 ?𨛜 ; 230
+   ?日 ?旦 ?倝 ?㫃 ?冥 ?晶 ?月 ?有 ?明 ?囧 ; 240
+   ?夕 ?多 ?毌 ?𢎘 ?𣐺 ?卣 ?齊 ?朿 ?片 ?鼎 ; 250
+   ?克 ?彔 ?禾 ?秝 ?黍 ?香 ?米 ?毇 ?臼 ?凶 ; 260
+   ?𣎳 ?林 ?麻 ?尗 ?耑 ?韭 ?瓜 ?瓠 ?宀 ?宮 ; 270
+   ?呂 ?穴 ?㝱 ?𤕫 ?冖 ?𠔼 ?冃 ?㒳 ?网 ?襾 ; 280
+   ?巾 ?巿 ?帛 ?白 ?㡀 ?黹 ?人 ?𠤎 ?匕 ?从 ; 290
+   ])
 
 (defun shuowen-radical (number)
   (aref shuowen-radicals (1- number)))
 (defvar char-db-file-coding-system 'utf-8-mcs-er)
 
 (defvar char-db-feature-domains
-  '(ucs daikanwa cns gt jis jis/alt jis/a jis/b
+  '(ucs ucs/compat daikanwa cns gt jis jis/alt jis/a jis/b
        jis-x0212 jis-x0213 cdp shinjigen misc unknown))
 
 (defvar char-db-ignored-attributes '(ideographic-products))
     nil)
    ((eq '->subsumptive ka)
     nil)
+   ((and (symbolp ka)
+        (string-match "^->" (symbol-name ka)))
+    (cond ((and (symbolp kb)
+               (string-match "^->" (symbol-name kb)))
+          (string< (symbol-name ka)
+                   (symbol-name kb))
+          ))
+    )
+   ((and (symbolp kb)
+        (string-match "^->" (symbol-name kb)))
+    t)
+   ((and (symbolp ka)
+        (string-match "^<-" (symbol-name ka)))
+    (cond ((symbolp kb)
+          (cond ((string-match "^<-" (symbol-name kb))
+                 (string< (symbol-name ka)
+                          (symbol-name kb))
+                 )
+                 ;; ((string-match "^->" (symbol-name kb))
+                 ;;  t)
+                )))
+    )
+   ((and (symbolp kb)
+        (string-match "^<-" (symbol-name kb)))
+    t
+    ;; (not (string-match "^->" (symbol-name ka)))
+    )
    ((find-charset ka)
     (if (find-charset kb)
        (if (<= (charset-id ka) 1)
     =jis-x0208@1983
     japanese-jisx0212
     chinese-gb2312
+    =jis-x0208@1990
     chinese-cns11643-1
     chinese-cns11643-2
     chinese-cns11643-3
     chinese-cns11643-5
     chinese-cns11643-6
     chinese-cns11643-7
-    =jis-x0208@1990
     =jis-x0213-1-2000
     =jis-x0213-2-2000
     korean-ksc5601
     =big5-eten
     =jis-x0208@1997
     =zinbun-oracle
+    =ruimoku-v6
     =jef-china3))
 
 (defun char-db-make-char-spec (char)
                                       '(=daikanwa
                                         =daikanwa@rev2
                                         ;; =gt-k
+                                        =jis-x0208@1997
                                         )))
                             (setq ccs (charset-name ccs))
                             (null (assq ccs char-spec))
 (defun char-db-insert-ccs-feature (name value line-breaking)
   (insert
    (format
-    (cond ((memq name '(=daikanwa
-                       =daikanwa@rev1 =daikanwa@rev2
-                       =gt =gt-k =cbeta =zinbun-oracle))
+    (cond ((memq name '(=shinjigen
+                       =shinjigen@1ed
+                       =shinjigen@rev =shinjigen/+p@rev))
+          "(%-18s .  %04d)\t; %c")
+         ((eq name '=shinjigen@1ed/24pr)
+          "(%-18s . %04d)\t; %c")
+         ((or (memq name '(=daikanwa
+                           =daikanwa@rev1 =daikanwa@rev2
+                           =gt =gt-k =cbeta =zinbun-oracle))
+              (string-match "^=adobe-" (symbol-name name)))
           "(%-18s . %05d)\t; %c")
          ((eq name 'mojikyo)
           "(%-18s . %06d)\t; %c")
                      (- (logand value 255) 32))))
   (insert line-breaking))
 
+(defun char-db-insert-relation-feature (char name value line-breaking
+                                            ccss readable)
+  (insert (format "(%-18s%s " name line-breaking))
+  (let ((lbs (concat "\n" (make-string (current-column) ?\ )))
+       separator cell sources required-features
+       ret)
+    (while (consp value)
+      (setq cell (car value))
+      (if (integerp cell)
+         (setq cell (decode-char '=ucs cell)))
+      (cond
+       ((eq name '->subsumptive)
+       (when (or (not (some (lambda (atr)
+                              (get-char-attribute cell atr))
+                            char-db-ignored-attributes))
+                 (some (lambda (ccs)
+                         (encode-char cell ccs 'defined-only))
+                       ccss))
+         (if separator
+             (insert lbs))
+         (let ((char-db-ignored-attributes
+                (cons '<-subsumptive
+                      char-db-ignored-attributes)))
+           (insert-char-attributes cell readable))
+         (setq separator lbs))
+       )
+       ((characterp cell)
+       (setq sources
+             (get-char-attribute
+              char (intern (format "%s*sources" name))))
+       (setq required-features nil)
+       (dolist (source sources)
+         (cond
+          ((memq source '(JP
+                          JP/Jouyou
+                          shinjigen shinjigen@1ed shinjigen@rev))
+           (setq required-features
+                 (union required-features
+                        '(=jis-x0208
+                          =jis-x0208@1990
+                          =jis-x0213-1-2000
+                          =jis-x0213-2-2000
+                          =jis-x0212
+                          =jis-x0208@1983
+                          =jis-x0208@1978
+                          =shinjigen))))
+          ((eq source 'CN)
+           (setq required-features
+                 (union required-features
+                        '(=gb2312
+                          =gb12345
+                          =iso-ir165)))))
+         (cond
+          ((find-charset (setq ret (intern (format "=%s" source))))
+           (setq required-features
+                 (cons ret required-features)))
+          (t (setq required-features
+                   (cons source required-features)))))
+       (cond ((string-match "@JP" (symbol-name name))
+              (setq required-features
+                    (union required-features
+                           '(=jis-x0208
+                             =jis-x0208@1990
+                             =jis-x0213-1-2000
+                             =jis-x0213-2-2000
+                             =jis-x0212
+                             =jis-x0208@1983
+                             =jis-x0208@1978))))
+             ((string-match "@CN" (symbol-name name))
+              (setq required-features
+                    (union required-features
+                           '(=gb2312
+                             =gb12345
+                             =iso-ir165)))))
+       (if separator
+           (insert lbs))
+       (if readable
+           (insert (format "%S" cell))
+         (char-db-insert-char-spec cell readable
+                                   nil
+                                   required-features))
+       (setq separator lbs))
+       ((consp cell)
+       (if separator
+           (insert lbs))
+       (if (consp (car cell))
+           (char-db-insert-char-spec cell readable)
+         (char-db-insert-char-reference cell readable))
+       (setq separator lbs))
+       (t
+       (if separator
+           (insert separator))
+       (insert (prin1-to-string cell))
+       (setq separator " ")))
+      (setq value (cdr value)))
+    (insert ")")
+    (insert line-breaking)))
+
 (defun insert-char-attributes (char &optional readable attributes column)
   (unless column
     (setq column (current-column)))
         (concat "\n" (make-string (1+ column) ?\ )))
        lbs cell separator ret
        key al cal
-       dest-ccss
-       sources required-features
+       dest-ccss ; sources required-features
        ccss)
     (let (atr-d)
       (setq attributes
                    atr-d)
                  #'char-attribute-name<)))
     (insert "(")
+    (when (memq '<-subsumptive attributes)
+      (when readable
+       (when (setq value (get-char-attribute char '<-subsumptive))
+         (char-db-insert-relation-feature char '<-subsumptive value
+                                          line-breaking
+                                          ccss readable)))
+      (setq attributes (delq '<-subsumptive attributes)))
+    (when (and (memq '<-denotational attributes)
+              (setq value (get-char-attribute char '<-denotational)))
+      (char-db-insert-relation-feature char '<-denotational value
+                                      line-breaking
+                                      ccss readable)
+      (setq attributes (delq '<-denotational attributes)))
     (when (and (memq 'name attributes)
               (setq value (get-char-attribute char 'name)))
       (insert (format
                      line-breaking))
       (setq attributes (delq 'script attributes))
       )
-    ;; (when (and (memq '<-denotational attributes)
-    ;;            (setq value (get-char-attribute char '<-denotational))
-    ;;            (null (cdr value))
-    ;;            (setq value (encode-char (car value) 'ucs 'defined-only)))
-    ;;   (insert (format "(%-18s . #x%04X)\t; %c%s"
-    ;;                   '=>ucs value (decode-char 'ucs value)
-    ;;                   line-breaking))
-    ;;   (setq attributes (delq '<-denotational attributes)))
     (dolist (name '(=>ucs =>ucs*))
       (when (and (memq name attributes)
                 (setq value (get-char-attribute char name)))
                      line-breaking))
       (setq attributes (delq '->ideograph attributes))
       )
-    (when (and (memq '->decomposition attributes)
-              (setq value (get-char-attribute char '->decomposition)))
-      (insert (format "(->decomposition\t%s)%s"
-                     (mapconcat (lambda (code)
-                                  (cond ((symbolp code)
-                                         (symbol-name code))
-                                        ((characterp code)
-                                         (if readable
-                                             (format "%S" code)
-                                           (format "#x%04X"
-                                                   (char-int code))
-                                           ))
-                                        ((integerp code)
-                                         (format "#x%04X" code))
-                                        (t
-                                         (format "%s%S" line-breaking code))))
-                                value " ")
-                     line-breaking))
-      (setq attributes (delq '->decomposition attributes))
-      )
+    ;; (when (and (memq '->decomposition attributes)
+    ;;            (setq value (get-char-attribute char '->decomposition)))
+    ;;   (insert (format "(->decomposition\t%s)%s"
+    ;;                   (mapconcat (lambda (code)
+    ;;                                (cond ((symbolp code)
+    ;;                                       (symbol-name code))
+    ;;                                      ((characterp code)
+    ;;                                       (if readable
+    ;;                                           (format "%S" code)
+    ;;                                         (format "#x%04X"
+    ;;                                                 (char-int code))
+    ;;                                         ))
+    ;;                                      ((integerp code)
+    ;;                                       (format "#x%04X" code))
+    ;;                                      (t
+    ;;                                       (format "%s%S" line-breaking code))))
+    ;;                              value " ")
+    ;;                   line-breaking))
+    ;;   (setq attributes (delq '->decomposition attributes))
+    ;;   )
     (if (equal (get-char-attribute char '->titlecase)
               (get-char-attribute char '->uppercase))
        (setq attributes (delq '->titlecase attributes)))
                                 name value
                                 line-breaking))
                 )
-               ((and (not readable)
-                     (null (get-char-attribute
-                            char
-                            (intern (format "%s*sources" name))))
-                     (not (string-match "\\*sources$" (symbol-name name)))
-                     (or (eq name '<-identical)
-                         (string-match "^->halfwidth" (symbol-name name))
-                         (and
-                          (string-match "^->fullwidth" (symbol-name name))
-                          (not
-                           (and (consp value)
-                                (characterp (car value))
-                                (encode-char
-                                 (car value) '=ucs 'defined-only))))
-                         (string-match "^->simplified" (symbol-name name))
-                         (string-match "^->vulgar" (symbol-name name))
-                         (string-match "^->wrong" (symbol-name name))
-                         (string-match "^->same" (symbol-name name))
-                         (string-match "^->formed" (symbol-name name))
-                         (string-match "^->original" (symbol-name name))
-                         (string-match "^->ancient" (symbol-name name))
-                         (string-match "^->Oracle-Bones" (symbol-name name))
-                         ))
+               ((and
+                 (not readable)
+                 (not (eq name '->subsumptive))
+                 (not (eq name '->uppercase))
+                 (not (eq name '->lowercase))
+                 (not (eq name '->titlecase))
+                 (not (eq name '->canonical))
+                 (not (eq name '->Bopomofo))
+                 (not (eq name '->mistakable))
+                 (not (eq name '->ideographic-variants))
+                 (null (get-char-attribute
+                        char (intern (format "%s*sources" name))))
+                 (not (string-match "\\*sources$" (symbol-name name)))
+                 (null (get-char-attribute
+                        char (intern (format "%s*note" name))))
+                 (not (string-match "\\*note$" (symbol-name name)))
+                 (or (eq name '<-identical)
+                     (eq name '<-uppercase)
+                     (eq name '<-lowercase)
+                     (eq name '<-titlecase)
+                     (eq name '<-canonical)
+                     (eq name '<-ideographic-variants)
+                      ;; (eq name '<-synonyms)
+                     (string-match "^<-synonyms" (symbol-name name))
+                     (eq name '<-mistakable)
+                     (when (string-match "^->" (symbol-name name))
+                       (cond
+                        ((string-match "^->fullwidth" (symbol-name name))
+                         (not (and (consp value)
+                                   (characterp (car value))
+                                   (encode-char
+                                    (car value) '=ucs 'defined-only)))
+                         )
+                        (t)))
+                     ))
                 )
                ((or (eq name 'ideographic-structure)
                     (eq name 'ideographic-combination)
                     (eq name 'ideographic-)
-                    (string-match "^\\(->\\|<-\\)" (symbol-name name)))
-                (insert (format "(%-18s%s " name line-breaking))
-                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
-                      separator nil)
-                (while (consp value)
-                  (setq cell (car value))
-                   (if (integerp cell)
-                      (setq cell (decode-char '=ucs cell)))
-                  (cond ((eq name '->subsumptive)
-                         (when (or (not
-                                    (some (lambda (atr)
-                                            (get-char-attribute cell atr))
-                                          char-db-ignored-attributes))
-                                   (some (lambda (ccs)
-                                           (encode-char cell ccs
-                                                        'defined-only))
-                                         ccss))
-                           (if separator
-                               (insert lbs))
-                           (let ((char-db-ignored-attributes
-                                  (cons '<-subsumptive
-                                        char-db-ignored-attributes)))
-                             (insert-char-attributes cell readable))
-                           (setq separator lbs))
-                         )
-                        ((characterp cell)
-                         (setq sources
-                               (get-char-attribute
-                                char
-                                (intern (format "%s*sources" name))))
-                         (setq required-features nil)
-                         (dolist (source sources)
-                           (cond
-                            ((memq source '(JP JP/Jouyou
-                                               shinjigen-1))
-                             (setq required-features
-                                   (union required-features
-                                          '(=jis-x0208
-                                            =jis-x0208@1990
-                                            =jis-x0213-1-2000
-                                            =jis-x0213-2-2000
-                                            =jis-x0212
-                                            =jis-x0208@1983
-                                            =jis-x0208@1978))))
-                            ((eq source 'CN)
-                             (setq required-features
-                                   (union required-features
-                                          '(=gb2312
-                                            =gb12345
-                                            =iso-ir165)))))
-                           (cond
-                            ((find-charset
-                              (setq ret (intern (format "=%s" source))))
-                             (setq required-features
-                                   (cons ret required-features)))
-                            (t (setq required-features
-                                     (cons source required-features)))))
-                         (cond ((string-match "@JP" (symbol-name name))
-                                (setq required-features
-                                      (union required-features
-                                             '(=jis-x0208
-                                               =jis-x0208@1990
-                                               =jis-x0213-1-2000
-                                               =jis-x0213-2-2000
-                                               =jis-x0212
-                                               =jis-x0208@1983
-                                               =jis-x0208@1978))))
-                               ((string-match "@CN" (symbol-name name))
-                                (setq required-features
-                                      (union required-features
-                                             '(=gb2312
-                                               =gb12345
-                                               =iso-ir165)))))
-                         (if separator
-                             (insert lbs))
-                         (if readable
-                             (insert (format "%S" cell))
-                           (char-db-insert-char-spec cell readable
-                                                     nil
-                                                     required-features))
-                         (setq separator lbs))
-                        ((consp cell)
-                         (if separator
-                             (insert lbs))
-                         (if (consp (car cell))
-                             (char-db-insert-char-spec cell readable)
-                           (char-db-insert-char-reference cell readable))
-                         (setq separator lbs))
-                        (t
-                         (if separator
-                             (insert separator))
-                         (insert (prin1-to-string cell))
-                         (setq separator " ")))
-                  (setq value (cdr value)))
-                (insert ")")
-                (insert line-breaking))
+                    (eq name '=decomposition)
+                    (string-match "^=>decomposition" (symbol-name name))
+                    (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name name))
+                    (string-match "^\\(->\\|<-\\)[^*]*\\*sources$"
+                                  (symbol-name name))
+                    )
+                (char-db-insert-relation-feature char name value
+                                                 line-breaking
+                                                 ccss readable))
                ((memq name '(ideograph=
                              original-ideograph-of
                              ancient-ideograph-of
                 (insert ")")
                 (insert line-breaking))
                (t
-                (insert (format "(%-18s . %S)%s"
-                                name value
-                                line-breaking)))
+                 (insert (format "(%-18s" name))
+                (setq ret (prin1-to-string value))
+                (unless (< (+ (current-column)
+                              (length ret)
+                              3)
+                           76)
+                  (insert line-breaking))
+                (insert " . " ret ")" line-breaking)
+                ;; (insert (format "(%-18s . %S)%s"
+                 ;;                 name value
+                 ;;                 line-breaking))
+                )
                ))
       (setq attributes (cdr attributes)))
     (insert ")")))