(U+6C17): Add `<-simplified@JP/Jouyou'.
[chise/xemacs-chise.git.1] / lisp / utf-2000 / char-db-util.el
index 1cf7476..43d3d92 100644 (file)
@@ -1,24 +1,24 @@
 ;;; char-db-util.el --- Character Database utility
 
-;; Copyright (C) 1998,1999,2000,2001,2002,2003 MORIOKA Tomohiko.
+;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004 MORIOKA Tomohiko.
 
 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
-;; Keywords: UTF-2000, ISO/IEC 10646, Unicode, UCS-4, MULE.
+;; Keywords: CHISE, Character Database, ISO/IEC 10646, Unicode, UCS-4, MULE.
 
-;; This file is part of XEmacs UTF-2000.
+;; This file is part of XEmacs CHISE.
 
-;; XEmacs UTF-2000 is free software; you can redistribute it and/or
+;; XEmacs CHISE is free software; you can redistribute it and/or
 ;; modify it under the terms of the GNU General Public License as
 ;; published by the Free Software Foundation; either version 2, or (at
 ;; your option) any later version.
 
-;; XEmacs UTF-2000 is distributed in the hope that it will be useful,
+;; XEmacs CHISE is distributed in the hope that it will be useful,
 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;; General Public License for more details.
 
 ;; You should have received a copy of the GNU General Public License
-;; along with XEmacs UTF-2000; see the file COPYING.  If not, write to
+;; along with XEmacs CHISE; see the file COPYING.  If not, write to
 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 ;; Boston, MA 02111-1307, USA.
 
       (setq i (1+ i)))
     v))
 
+(defvar char-db-feature-domains
+  '(ucs daikanwa cns gt jis jis/alt jis/a jis/b
+       jis-x0213 misc unknown))
+
 (defvar char-db-ignored-attributes nil)
 
 (defun char-attribute-name< (ka kb)
    ((find-charset ka)
     (cond
      ((find-charset kb)
-      (cond
-       ((= (charset-dimension ka)
-          (charset-dimension kb))
-       (cond ((= (charset-chars ka)(charset-chars kb))
-              (if (charset-iso-final-char ka)
-                  (cond
-                   ((>= (charset-iso-final-char ka) ?@)
-                    (if (and (charset-iso-final-char kb)
-                             (>= (charset-iso-final-char kb) ?@))
-                        (< (charset-iso-final-char ka)
-                           (charset-iso-final-char kb))
-                      t))
-                   (t
-                    (if (charset-iso-final-char kb)
-                        (if (>= (charset-iso-final-char kb) ?@)
-                            nil
-                          (< (charset-iso-final-char ka)
-                             (charset-iso-final-char kb)))
-                      t)))
-                (if (charset-iso-final-char kb)
-                    nil
-                  (< (charset-id ka)(charset-id kb)))))
-             ((<= (charset-chars ka)(charset-chars kb)))))
-       (t
-       (< (charset-dimension ka)
-          (charset-dimension kb))
-       )))
+      (if (<= (charset-id ka) 0)
+         (if (<= (charset-id kb) 0)
+             (cond
+              ((= (charset-dimension ka)
+                  (charset-dimension kb))
+               (> (charset-id ka)(charset-id kb)))
+              (t
+               (> (charset-dimension ka)
+                  (charset-dimension kb))
+               ))
+           t)
+       (if (<= (charset-id kb) 0)
+           nil
+         (< (charset-id ka)(charset-id kb)))))
      ((symbolp kb)
       nil)
      (t
     chinese-gb12345
     latin-viscii
     ethiopic-ucs
+    =big5-cdp
     =gt
     ideograph-daikanwa-2
     ideograph-daikanwa
     ideograph-hanziku-12
     =big5
     =big5-eten
-    =big5-cdp
     =gt-k
     =jef-china3))
 
          ((consp char)
           char))))
     
-(defun char-db-insert-char-spec (char &optional readable column)
+(defun char-db-insert-char-spec (char &optional readable column
+                                     required-features)
   (unless column
     (setq column (current-column)))
-  (let (char-spec ret al cal key temp-char)
+  (let (char-spec al cal key temp-char)
     (setq char-spec (char-db-make-char-spec char))
     (unless (or (characterp char) ; char
                (condition-case nil
     (unless (or cal
                (memq 'ideographic-structure al))
       (push 'ideographic-structure al))
+    (dolist (feature required-features)
+      (if (find-charset feature)
+         (if (encode-char char feature 'defined-only)
+             (setq cal (adjoin feature cal)))
+       (setq al (adjoin feature al))))
     (insert-char-attributes char
                            readable
                            (or al 'none) cal)
         (concat "\n" (make-string (1+ column) ?\ )))
        lbs cell separator ret
        key al cal
-       dest-ccss)
+       dest-ccss
+       sources required-features)
     (insert "(")
     (when (and (memq 'name attributes)
               (setq value (get-char-attribute char 'name)))
       )
     (setq radical nil
          strokes nil)
+    (when (and (memq 'ideographic-radical attributes)
+              (setq value (get-char-attribute char 'ideographic-radical)))
+      (setq radical value)
+      (insert (format "(ideographic-radical . %S)\t; %c%s"
+                     radical
+                     (aref ideographic-radicals radical)
+                     line-breaking))
+      (setq attributes (delq 'ideographic-radical attributes))
+      )
     (let (key)
-      (dolist (domain '(ucs daikanwa cns))
+      (dolist (domain char-db-feature-domains)
        (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
        (when (and (memq key attributes)
                   (setq value (get-char-attribute char key)))
                          line-breaking))
          (setq attributes (delq key attributes))
          )
-       (setq key (intern (format "%s@%s*sources"
-                                 'ideographic-radical domain)))
+       (setq key (intern (format "%s@%s" 'total-strokes domain)))
        (when (and (memq key attributes)
                   (setq value (get-char-attribute char key)))
-         (insert (format "(%s%s" key line-breaking))
-         (dolist (cell value)
-           (insert (format " %s" cell)))
-         (insert ")")
-         (insert line-breaking)
+         (insert (format "(%s       . %S)%s"
+                         key
+                         value
+                         line-breaking))
          (setq attributes (delq key attributes))
          )
+       (dolist (feature '(ideographic-radical
+                          ideographic-strokes
+                          total-strokes))
+         (setq key (intern (format "%s@%s*sources" feature domain)))
+         (when (and (memq key attributes)
+                    (setq value (get-char-attribute char key)))
+           (insert (format "(%s%s" key line-breaking))
+           (dolist (cell value)
+             (insert (format " %s" cell)))
+           (insert ")")
+           (insert line-breaking)
+           (setq attributes (delq key attributes))
+           ))
        ))
-    (when (and (memq 'ideographic-radical attributes)
-              (setq value (get-char-attribute char 'ideographic-radical)))
-      (setq radical value)
-      (insert (format "(ideographic-radical . %S)\t; %c%s"
-                     radical
-                     (aref ideographic-radicals radical)
-                     line-breaking))
-      (setq attributes (delq 'ideographic-radical attributes))
-      )
     (when (and (memq 'ideographic-strokes attributes)
               (setq value (get-char-attribute char 'ideographic-strokes)))
       (setq strokes value)
       (setq attributes (delq 'hanyu-dazidian-char attributes))
       )
     (unless readable
-      (when (memq '->ucs-variants attributes)
-       (setq attributes (delq '->ucs-variants attributes))
+      (when (memq '->ucs-unified attributes)
+       (setq attributes (delq '->ucs-unified attributes))
        )
       (when (memq 'composition attributes)
        (setq attributes (delq 'composition attributes))
                    (if (integerp cell)
                       (setq cell (decode-char '=ucs cell)))
                   (cond ((characterp cell)
+                         (setq sources
+                               (get-char-attribute
+                                char
+                                (intern (format "%s*sources" name))))
+                         (setq required-features nil)
+                         (dolist (source sources)
+                           (setq required-features
+                                 (cons
+                                  (if (find-charset
+                                       (setq ret (intern
+                                                  (format "=%s" source))))
+                                      ret
+                                    source)
+                                  required-features)))
+                         (when (string-match "@JP" (symbol-name name))
+                           (setq required-features
+                                 (union required-features
+                                        '(=jis-x0208
+                                          =jis-x0208-1990
+                                          =jis-x0213-1-2000
+                                          =jis-x0213-2-2000
+                                          =jis-x0212
+                                          =jis-x0208-1983
+                                          =jis-x0208-1978))))
                          (if separator
                              (insert lbs))
                          (if readable
                              (insert (format "%S" cell))
-                           (char-db-insert-char-spec cell readable))
+                           (char-db-insert-char-spec cell readable
+                                                     nil
+                                                     required-features))
                          (setq separator lbs))
                         ((consp cell)
                          (if separator
                              ancient-ideograph-of
                              vulgar-ideograph-of
                              wrong-ideograph-of
-                             simplified-ideograph-of
+                              ;; simplified-ideograph-of
                              ideographic-variants
-                             ideographic-different-form-of))
+                              ;; ideographic-different-form-of
+                             ))
                 (insert (format "(%-18s%s " name line-breaking))
                 (setq lbs (concat "\n" (make-string (current-column) ?\ ))
                       separator nil)
                   (setq value (cdr value)))
                 (insert ")")
                 (insert line-breaking))
-                ;; ((string-match "^->" (symbol-name name))
-                ;;  (insert
-                ;;   (format "(%-18s %s)%s"
-                ;;           name
-                ;;           (mapconcat (lambda (code)
-                ;;                        (cond ((symbolp code)
-                ;;                               (symbol-name code))
-                ;;                              ((integerp code)
-                ;;                               (format "#x%04X" code))
-                ;;                              (t
-                ;;                               (format "%s%S"
-                ;;                                       line-breaking code))))
-                ;;                      value " ")
-                ;;           line-breaking)))
-               ((consp value)
+                ((consp value)
                 (insert (format "(%-18s " name))
                 (setq lbs (concat "\n" (make-string (current-column) ?\ ))
                       separator nil)
     ))
 
 (defun insert-char-data-with-variant (char &optional printable
-                                          no-ucs-variant
+                                          no-ucs-unified
                                           script excluded-script)
   (insert-char-data char printable)
   (let ((variants (or (char-variants char)
               (or (null excluded-script)
                   (null (setq vs (get-char-attribute variant 'script)))
                   (not (memq excluded-script vs))))
-         (or (and no-ucs-variant (get-char-attribute variant '=ucs))
+         (or (and no-ucs-unified (get-char-attribute variant '=ucs))
              (insert-char-data variant printable)))
       (setq variants (cdr variants))
       )))
     (while (<= code max)
       (setq char (decode-char '=ucs code))
       (if (encode-char char '=ucs 'defined-only)
-         (insert-char-data-with-variant char nil 'no-ucs-variant
+         (insert-char-data-with-variant char nil 'no-ucs-unified
                                         script excluded-script))
       (setq code (1+ code)))))
 
 (defun write-char-range-data-to-file (min max file
                                          &optional script excluded-script)
-  (let ((coding-system-for-write 'utf-8))
+  (let ((coding-system-for-write 'utf-8-mcs))
     (with-temp-buffer
+      (insert ";; -*- coding: utf-8-mcs -*-\n")
       (insert-char-range-data min max script excluded-script)
       (write-region (point-min)(point-max) file))))