X-Git-Url: http://git.chise.org/gitweb/?p=chise%2Fxemacs-chise.git.1;a=blobdiff_plain;f=lisp%2Futf-2000%2Fread-maps.el;h=fe02723da65441b3e735933cb1a1443a5a5b8c8d;hp=54f27e42f304a6d2836c266198bdd812c3b4db51;hb=HEAD;hpb=6bc12da9ed3ad7e06c1140bad64f297b2c4d737c diff --git a/lisp/utf-2000/read-maps.el b/lisp/utf-2000/read-maps.el index 54f27e4..fe02723 100644 --- a/lisp/utf-2000/read-maps.el +++ b/lisp/utf-2000/read-maps.el @@ -1,6 +1,7 @@ ;;; read-maps.el --- Read mapping-tables. -;; Copyright (C) 2002,2003,2004 MORIOKA Tomohiko +;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2008, 2012, 2014, 2015, 2017 +;; MORIOKA Tomohiko ;; Author: MORIOKA Tomohiko ;; Keywords: mapping table, character, CCS, multiscript, multilingual @@ -24,6 +25,88 @@ ;;; Code: +(defvar mapping-table-ccs-setting-alist + '((=jis-x0208@1990 + "^J90-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16 + =ucs@jis/1990 + "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)") + (=jis-x0212 + "^JSP-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16 + =ucs@jis/1990 + "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)") + (=jis-x0213-1@2000 + "^JX1-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16 + =ucs@jis/2000 + "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)") + (=jis-x0213-2 + "^JX2-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16 + =ucs@jis/2000 + "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)") + (=gb2312 + "^G0-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16 + =ucs@gb + "\tGU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)") + (=cns11643-1 + "^C1-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16 + =ucs@cns + "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)") + (=cns11643-2 + "^C2-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16 + =ucs@cns + "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)") + (=cns11643-3 + "^C3-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16 + =ucs@cns + "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)") + (=cns11643-4 + "^C4-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16 + =ucs@cns + "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)") + (=cns11643-5 + "^C5-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16 + =ucs@cns + "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)") + (=cns11643-6 + "^C6-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16 + =ucs@cns + "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)") + (=cns11643-7 + "^C7-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16 + =ucs@cns + "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)") + (=big5 + "^B-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16 + =ucs@big5 + "\tBU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)") + (=ks-x1001 + "^K0-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16 + =ucs@ks + "\tKU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)") + (=jef-china3 + "^JC3-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16 + nil + nil) + ) + "*List of information about mapping table formats. +Elements are of the form +\(CCS CODE-REGEXP PAREN-EXP BASE UCS-CCS UCS-REGEXP). + +CCS is a symbol, which is a name of a target coded-charset. + +CODE-REGEXP is a regular expression to match against + the representation of the target coded-charset. + +PAREN-EXP is a number specifies which parenthesized expression + in the CODE-REGEXP. + +BASE base of code in the string specified by CODE-REGEXP and + PAREN-EXP. + +UCS-CCS is a symbol, which is a name of a UCS-CCS. + +UCS-REGEXP is a regular expression to match against + the representation of the UCS-CCS.") + ;;;###autoload (defun mapping-table-read-file (filename) "Read mapping table." @@ -32,84 +115,34 @@ (buffer-disable-undo) (insert-file-contents filename) (goto-char (point-min)) - (let (line ccs code ucs ucs-pat ucs-ccs ucs-code chr) + (let (rest setting ccs code ucs ucs-pat ucs-ccs ucs-code chr + drep-chr drep-ccs drep-ucs-ccs) (while (not (eobp)) - (cond ((looking-at "^J90-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)") - (setq ccs 'japanese-jisx0208-1990 - code (string-to-int (match-string 1) 16) - ucs-pat "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)" - ucs-ccs 'ucs-jis) - (goto-char (match-end 0)) - ) - ((looking-at "^JSP-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)") - (setq ccs 'japanese-jisx0212 - code (string-to-int (match-string 1) 16) - ucs-pat "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)" - ucs-ccs '=ucs-jis-1990) - (goto-char (match-end 0)) - ) - ((looking-at "^JX1-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)") - (setq ccs '=jis-x0213-1-2000 - code (string-to-int (match-string 1) 16) - ucs-pat "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)" - ucs-ccs '=ucs-jis-2000) - (goto-char (match-end 0)) - ) - ((looking-at "^JX2-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)") - (setq ccs '=jis-x0213-2-2000 - code (string-to-int (match-string 1) 16) - ucs-pat "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)" - ucs-ccs '=ucs@jis-2000) - (goto-char (match-end 0)) - ) - ((looking-at "^C1-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)") - (setq ccs 'chinese-cns11643-1 - code (string-to-int (match-string 1) 16) - ucs-pat "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)" - ucs-ccs 'ucs-cns) - (goto-char (match-end 0)) - ) - ((looking-at "^C2-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)") - (setq ccs 'chinese-cns11643-2 - code (string-to-int (match-string 1) 16) - ucs-pat "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)" - ucs-ccs 'ucs-cns) - (goto-char (match-end 0)) - ) - ((looking-at "^C3-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)") - (setq ccs 'chinese-cns11643-3 - code (string-to-int (match-string 1) 16) - ucs-pat "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)" - ucs-ccs 'ucs-cns) - (goto-char (match-end 0)) - ) - ((looking-at "^C4-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)") - (setq ccs 'chinese-cns11643-4 - code (string-to-int (match-string 1) 16) - ucs-pat "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)" - ucs-ccs 'ucs-cns) - (goto-char (match-end 0)) - ) - ((looking-at "^B-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)") - (setq ccs 'chinese-big5 - code (string-to-int (match-string 1) 16) - ucs-pat "\tBU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)" - ucs-ccs 'ucs-big5) - (goto-char (match-end 0)) - ) - ((looking-at "^JC3-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)") - (setq ccs '=jef-china3 - code (string-to-int (match-string 1) 16) - ucs-pat nil - ucs-ccs nil) - (goto-char (match-end 0)) - ) - (t - (setq ccs nil - code nil - ucs-pat nil - ucs-ccs nil) - )) + (setq rest mapping-table-ccs-setting-alist) + (catch 'matched + (while rest + (setq setting (car rest) + ccs (pop setting)) + (when (looking-at (pop setting)) + (setq code (string-to-int (match-string (pop setting)) + (pop setting)) + ucs-ccs (pop setting) + ucs-pat (car setting) + drep-ccs (intern (format "=%s" ccs)) + drep-ucs-ccs (intern (format "=%s" ucs-ccs))) + (unless (find-charset drep-ccs) + (setq drep-ccs nil)) + (unless (find-charset drep-ucs-ccs) + (setq drep-ucs-ccs nil)) + (goto-char (match-end 0)) + (throw 'matched t)) + (setq rest (cdr rest))) + (setq ccs nil + code nil + ucs-pat nil + ucs-ccs nil + drep-ccs nil + drep-ucs-ccs nil)) (setq ucs-code (if (and ucs-pat (looking-at ucs-pat)) @@ -120,31 +153,112 @@ (if (looking-at "[ \t]*U[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)") (string-to-int (match-string 1) 16))) + (if (and ucs-ccs ucs (not ucs-code)) + (setq ucs-code ucs + ucs nil)) + (if (and (eq ccs '=jef-china3) + (eq ucs #xFA66)) + (setq ucs-ccs '=ucs@JP + drep-ucs-ccs '==ucs@JP)) (when (setq chr (decode-char ccs code)) (unless (eq (encode-char chr ccs 'defined-only) code) (put-char-attribute chr ccs code)) (when (and ucs-code - (not (eq (or (encode-char chr ucs-ccs 'defined-only) - (char-feature chr '=>ucs)) - ucs-code))) + (not + (eq (or + (encode-char chr ucs-ccs 'defined-only) + (cond + ((memq ucs-ccs '(=ucs@jis + =ucs@jis/1990 + =ucs@jis/2000)) + (encode-char chr '=ucs@jis/fw 'defined-only)) + ((eq ucs-ccs '=ucs@gb) + (encode-char chr '=ucs@gb/fw 'defined-only)) + ;; ((eq ucs-ccs '=ucs@cns) + ;; (encode-char chr '=ucs@cns/fw 'defined-only)) + ;; ((eq ucs-ccs '=ucs@big5) + ;; nil) + ;; ((eq ucs-ccs '=ucs@ks) + ;; (encode-char chr '=ucs@ks/fw 'defined-only)) + (t + (or (char-feature chr '=ucs) + (char-feature chr '=>ucs)) + ))) + ucs-code))) (put-char-attribute chr ucs-ccs ucs-code)) (when (and ucs (not (eq (or (encode-char chr '=ucs 'defined-only) - (and (not (memq ucs-ccs '(ucs-jis - =ucs-jis-1990 - =ucs-jis-2000 - ;; ucs-big5 + (and (not (memq ucs-ccs '(=ucs@jis + =ucs@jis/1990 + =ucs@jis/2000 + =ucs@gb + =ucs@cns + =ucs@big5 + =ucs@ks ))) - (char-feature chr '=>ucs))) + (or (char-feature chr '=ucs) + (char-feature chr '=>ucs)) + )) ucs))) (if (or ucs-code (null ucs-ccs)) - (put-char-attribute chr '=>ucs ucs) + (unless (eq (or (char-feature chr '=ucs) + (char-feature chr '=>ucs)) + ucs) + (put-char-attribute chr '=>ucs ucs)) (unless (eq (encode-char chr ucs-ccs 'defined-only) ucs) (put-char-attribute chr ucs-ccs ucs))))) + + (when (and drep-ccs + (setq drep-chr (decode-char drep-ccs code)) + (not (eq drep-chr chr))) + (unless (eq (encode-char drep-chr drep-ccs 'defined-only) + code) + (put-char-attribute drep-chr drep-ccs code)) + (when (and ucs-code + (not (eq (encode-char drep-chr drep-ucs-ccs + 'defined-only) + ucs-code))) + (put-char-attribute drep-chr drep-ucs-ccs ucs-code)) + (when (and ucs + (not (eq (and (not (memq drep-ucs-ccs '(==ucs@jis + ==ucs@jis/1990 + ==ucs@jis/2000 + ==ucs@gb + ==ucs@cns + ==ucs@ks))) + (or (char-feature drep-chr '=ucs) + (char-feature drep-chr '=>ucs)) + ) + ucs)) + (not (eq (char-feature drep-chr '=>ucs*) ucs))) + (if (or ucs-code (null drep-ucs-ccs)) + (unless (eq (or (char-feature drep-chr '=ucs) + (char-feature drep-chr '=>ucs)) + ucs) + (put-char-attribute drep-chr '=>ucs ucs)) + (unless (eq (encode-char drep-chr drep-ucs-ccs 'defined-only) + ucs) + (put-char-attribute drep-chr drep-ucs-ccs ucs))))) (forward-line))))) +;;;###autoload +(defun ucs-compat-read-file (filename) + (interactive "fUCS-compat file : ") + (with-temp-buffer + (buffer-disable-undo) + (insert-file-contents filename) + (goto-char (point-min)) + (let (ucs ucs*) + (while (re-search-forward + "^ *U[---+]\\([0-9A-F]+\\)\t *U[---+]\\([0-9A-F]+\\)" nil t) + (setq ucs (string-to-int (match-string 1) 16) + ucs* (string-to-int (match-string 2) 16)) + (put-char-attribute (decode-char '=ucs ucs) '=>ucs* ucs*) + )))) + +;;;###autoload (defun jp-jouyou-read-file (filename) (interactive "fjp-jouyou file : ") (with-temp-buffer @@ -161,8 +275,12 @@ (mapcar (lambda (c) (aref c 0)) (split-string tchars " "))) - (unless (equal (char-feature char '<-simplified@JP/Jouyou) - tchars) + (unless (or (equal (char-feature char '<-simplified@JP/Jouyou) + tchars) + (and (equal (char-feature char '<-simplified) + tchars) + (memq 'JP/Jouyou + (char-feature char '<-simplified*sources)))) (put-char-attribute char '<-simplified@JP/Jouyou tchars)))