update.
[chise/xemacs-chise.git] / lisp / utf-2000 / read-maps.el
index d9974c3..fe02723 100644 (file)
@@ -1,6 +1,7 @@
 ;;; read-maps.el --- Read mapping-tables.
 
-;; Copyright (C) 2002,2003,2004 MORIOKA Tomohiko
+;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2008, 2012, 2014, 2015, 2017
+;;   MORIOKA Tomohiko
 
 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
 ;; Keywords: mapping table, character, CCS, multiscript, multilingual
 
 ;;; Code:
 
+(defvar mapping-table-ccs-setting-alist
+  '((=jis-x0208@1990
+     "^J90-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
+     =ucs@jis/1990
+     "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
+    (=jis-x0212
+     "^JSP-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
+     =ucs@jis/1990
+     "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
+    (=jis-x0213-1@2000
+     "^JX1-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
+     =ucs@jis/2000
+     "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
+    (=jis-x0213-2
+     "^JX2-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
+     =ucs@jis/2000
+     "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
+    (=gb2312
+     "^G0-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
+     =ucs@gb
+     "\tGU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
+    (=cns11643-1
+     "^C1-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
+     =ucs@cns
+     "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
+    (=cns11643-2     
+     "^C2-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
+     =ucs@cns
+     "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
+    (=cns11643-3
+     "^C3-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
+     =ucs@cns
+     "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
+    (=cns11643-4
+     "^C4-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
+     =ucs@cns
+     "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
+    (=cns11643-5
+     "^C5-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
+     =ucs@cns
+     "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
+    (=cns11643-6
+     "^C6-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
+     =ucs@cns
+     "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
+    (=cns11643-7
+     "^C7-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
+     =ucs@cns
+     "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
+    (=big5     
+     "^B-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
+     =ucs@big5
+     "\tBU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
+    (=ks-x1001
+     "^K0-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
+     =ucs@ks
+     "\tKU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
+    (=jef-china3     
+     "^JC3-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
+     nil
+     nil)
+    )
+  "*List of information about mapping table formats.
+Elements are of the form
+\(CCS CODE-REGEXP PAREN-EXP BASE UCS-CCS UCS-REGEXP).
+
+CCS    is a symbol, which is a name of a target coded-charset.
+
+CODE-REGEXP is a regular expression to match against
+       the representation of the target coded-charset.
+
+PAREN-EXP is a number specifies which parenthesized expression
+       in the CODE-REGEXP.
+
+BASE   base of code in the string specified by CODE-REGEXP and
+       PAREN-EXP.
+
+UCS-CCS is a symbol, which is a name of a UCS-CCS.
+
+UCS-REGEXP is a regular expression to match against
+       the representation of the UCS-CCS.")
+
 ;;;###autoload
 (defun mapping-table-read-file (filename)
   "Read mapping table." 
     (buffer-disable-undo)
     (insert-file-contents filename)
     (goto-char (point-min))
-    (let (line ccs code ucs ucs-pat ucs-ccs ucs-code chr)
+    (let (rest setting ccs code ucs ucs-pat ucs-ccs ucs-code chr
+              drep-chr drep-ccs drep-ucs-ccs)
       (while (not (eobp))
-       (cond ((looking-at "^J90-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)")
-              (setq ccs 'japanese-jisx0208-1990
-                    code (string-to-int (match-string 1) 16)
-                    ucs-pat "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)"
-                    ucs-ccs 'ucs-jis)
-              (goto-char (match-end 0))
-              )
-             ((looking-at "^JSP-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)")
-              (setq ccs 'japanese-jisx0212
-                    code (string-to-int (match-string 1) 16)
-                    ucs-pat "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)"
-                    ucs-ccs '=ucs-jis-1990)
-              (goto-char (match-end 0))
-              )
-             ((looking-at "^JX1-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)")
-              (setq ccs '=jis-x0213-1-2000
-                    code (string-to-int (match-string 1) 16)
-                    ucs-pat "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)"
-                    ucs-ccs '=ucs-jis-2000)
-              (goto-char (match-end 0))
-              )
-             ((looking-at "^JX2-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)")
-              (setq ccs '=jis-x0213-2-2000
-                    code (string-to-int (match-string 1) 16)
-                    ucs-pat "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)"
-                    ucs-ccs '=ucs@jis-2000)
-              (goto-char (match-end 0))
-              )
-             ((looking-at "^C1-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)")
-              (setq ccs 'chinese-cns11643-1
-                    code (string-to-int (match-string 1) 16)
-                    ucs-pat "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)"
-                    ucs-ccs 'ucs-cns)
-              (goto-char (match-end 0))
-              )
-             ((looking-at "^C2-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)")
-              (setq ccs 'chinese-cns11643-2
-                    code (string-to-int (match-string 1) 16)
-                    ucs-pat "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)"
-                    ucs-ccs 'ucs-cns)
-              (goto-char (match-end 0))
-              )
-             ((looking-at "^C3-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)")
-              (setq ccs 'chinese-cns11643-3
-                    code (string-to-int (match-string 1) 16)
-                    ucs-pat "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)"
-                    ucs-ccs 'ucs-cns)
-              (goto-char (match-end 0))
-              )
-             ((looking-at "^C4-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)")
-              (setq ccs 'chinese-cns11643-4
-                    code (string-to-int (match-string 1) 16)
-                    ucs-pat "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)"
-                    ucs-ccs 'ucs-cns)
-              (goto-char (match-end 0))
-              )
-             ((looking-at "^B-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)")
-              (setq ccs 'chinese-big5
-                    code (string-to-int (match-string 1) 16)
-                    ucs-pat "\tBU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)"
-                    ucs-ccs 'ucs-big5)
-              (goto-char (match-end 0))
-              )
-             ((looking-at "^JC3-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)")
-              (setq ccs '=jef-china3
-                    code (string-to-int (match-string 1) 16)
-                    ucs-pat nil
-                    ucs-ccs nil)
-              (goto-char (match-end 0))
-              )
-             (t
-              (setq ccs nil
-                    code nil
-                    ucs-pat nil
-                    ucs-ccs nil)
-              ))
+       (setq rest mapping-table-ccs-setting-alist)
+       (catch 'matched
+         (while rest
+           (setq setting (car rest)
+                 ccs (pop setting))
+           (when (looking-at (pop setting))
+             (setq code (string-to-int (match-string (pop setting))
+                                       (pop setting))
+                   ucs-ccs (pop setting)
+                   ucs-pat (car setting)
+                   drep-ccs (intern (format "=%s" ccs))
+                   drep-ucs-ccs (intern (format "=%s" ucs-ccs)))
+             (unless (find-charset drep-ccs)
+               (setq drep-ccs nil))
+             (unless (find-charset drep-ucs-ccs)
+               (setq drep-ucs-ccs nil))
+             (goto-char (match-end 0))
+             (throw 'matched t))
+           (setq rest (cdr rest)))
+         (setq ccs nil
+               code nil
+               ucs-pat nil
+               ucs-ccs nil
+               drep-ccs nil
+               drep-ucs-ccs nil))
        (setq ucs-code
              (if (and ucs-pat
                       (looking-at ucs-pat))
              (if (looking-at
                   "[ \t]*U[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
                  (string-to-int (match-string 1) 16)))
+       (if (and ucs-ccs ucs (not ucs-code))
+           (setq ucs-code ucs
+                 ucs nil))
+       (if (and (eq ccs '=jef-china3)
+                (eq ucs #xFA66))
+           (setq ucs-ccs '=ucs@JP
+                 drep-ucs-ccs '==ucs@JP))
        (when (setq chr (decode-char ccs code))
          (unless (eq (encode-char chr ccs 'defined-only)
                      code)
            (put-char-attribute chr ccs code))
          (when (and ucs-code
-                    (not (eq (or (encode-char chr ucs-ccs 'defined-only)
-                                 (char-feature chr '=>ucs))
-                             ucs-code)))
+                    (not
+                     (eq (or
+                          (encode-char chr ucs-ccs 'defined-only)
+                          (cond
+                           ((memq ucs-ccs '(=ucs@jis
+                                            =ucs@jis/1990
+                                            =ucs@jis/2000))
+                            (encode-char chr '=ucs@jis/fw 'defined-only))
+                           ((eq ucs-ccs '=ucs@gb)
+                            (encode-char chr '=ucs@gb/fw 'defined-only))
+                            ;; ((eq ucs-ccs '=ucs@cns)
+                            ;;  (encode-char chr '=ucs@cns/fw 'defined-only))
+                            ;; ((eq ucs-ccs '=ucs@big5)
+                            ;;  nil)
+                            ;; ((eq ucs-ccs '=ucs@ks)
+                            ;;  (encode-char chr '=ucs@ks/fw 'defined-only))
+                            (t
+                            (or (char-feature chr '=ucs)
+                                (char-feature chr '=>ucs))
+                            )))
+                         ucs-code)))
            (put-char-attribute chr ucs-ccs ucs-code))
          (when (and ucs
                     (not (eq (or (encode-char chr '=ucs 'defined-only)
-                                 (and (not (memq ucs-ccs '(ucs-jis
-                                                           =ucs-jis-1990
-                                                            =ucs-jis-2000
-                                                           ;; ucs-big5
+                                 (and (not (memq ucs-ccs '(=ucs@jis
+                                                           =ucs@jis/1990
+                                                            =ucs@jis/2000
+                                                           =ucs@gb
+                                                           =ucs@cns
+                                                           =ucs@big5
+                                                           =ucs@ks
                                                            )))
-                                      (char-feature chr '=>ucs)))
+                                      (or (char-feature chr '=ucs)
+                                          (char-feature chr '=>ucs))
+                                      ))
                              ucs)))
            (if (or ucs-code (null ucs-ccs))
-               (unless (eq (char-feature chr '=>ucs) ucs)
+               (unless (eq (or (char-feature chr '=ucs)
+                               (char-feature chr '=>ucs))
+                           ucs)
                  (put-char-attribute chr '=>ucs ucs))
              (unless (eq (encode-char chr ucs-ccs 'defined-only)
                          ucs)
                (put-char-attribute chr ucs-ccs ucs)))))
+
+       (when (and drep-ccs
+                  (setq drep-chr (decode-char drep-ccs code))
+                  (not (eq drep-chr chr)))
+         (unless (eq (encode-char drep-chr drep-ccs 'defined-only)
+                     code)
+           (put-char-attribute drep-chr drep-ccs code))
+         (when (and ucs-code
+                    (not (eq (encode-char drep-chr drep-ucs-ccs
+                                          'defined-only)
+                             ucs-code)))
+           (put-char-attribute drep-chr drep-ucs-ccs ucs-code))
+         (when (and ucs
+                    (not (eq (and (not (memq drep-ucs-ccs '(==ucs@jis
+                                                            ==ucs@jis/1990
+                                                            ==ucs@jis/2000
+                                                            ==ucs@gb
+                                                            ==ucs@cns
+                                                            ==ucs@ks)))
+                                  (or (char-feature drep-chr '=ucs)
+                                      (char-feature drep-chr '=>ucs))
+                                  )
+                             ucs))
+                    (not (eq (char-feature drep-chr '=>ucs*) ucs)))
+           (if (or ucs-code (null drep-ucs-ccs))
+               (unless (eq (or (char-feature drep-chr '=ucs)
+                               (char-feature drep-chr '=>ucs))
+                           ucs)
+                 (put-char-attribute drep-chr '=>ucs ucs))
+             (unless (eq (encode-char drep-chr drep-ucs-ccs 'defined-only)
+                         ucs)
+               (put-char-attribute drep-chr drep-ucs-ccs ucs)))))
        (forward-line)))))
 
+;;;###autoload
+(defun ucs-compat-read-file (filename)
+  (interactive "fUCS-compat file : ")
+  (with-temp-buffer
+    (buffer-disable-undo)
+    (insert-file-contents filename)
+    (goto-char (point-min))
+    (let (ucs ucs*)
+      (while (re-search-forward
+             "^ *U[---+]\\([0-9A-F]+\\)\t *U[---+]\\([0-9A-F]+\\)" nil t)
+       (setq ucs (string-to-int (match-string 1) 16)
+             ucs* (string-to-int (match-string 2) 16))
+       (put-char-attribute (decode-char '=ucs ucs) '=>ucs* ucs*)
+       ))))
+
+;;;###autoload
 (defun jp-jouyou-read-file (filename)
   (interactive "fjp-jouyou file : ")
   (with-temp-buffer