1 ;;; read-maps.el --- Read mapping-tables.
3 ;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2008, 2012, 2014, 2015, 2017
6 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
7 ;; Keywords: mapping table, character, CCS, multiscript, multilingual
9 ;; This file is part of XEmacs CHISE.
11 ;; XEmacs CHISE is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
16 ;; XEmacs CHISE is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 ;; General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs CHISE; see the file COPYING. If not, write to
23 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
28 (defvar mapping-table-ccs-setting-alist
30 "^J90-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
32 "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
34 "^JSP-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
36 "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
38 "^JX1-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
40 "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
42 "^JX2-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
44 "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
46 "^G0-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
48 "\tGU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
50 "^C1-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
52 "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
54 "^C2-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
56 "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
58 "^C3-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
60 "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
62 "^C4-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
64 "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
66 "^C5-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
68 "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
70 "^C6-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
72 "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
74 "^C7-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
76 "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
78 "^B-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
80 "\tBU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
82 "^K0-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
84 "\tKU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
86 "^JC3-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
90 "*List of information about mapping table formats.
91 Elements are of the form
92 \(CCS CODE-REGEXP PAREN-EXP BASE UCS-CCS UCS-REGEXP).
94 CCS is a symbol, which is a name of a target coded-charset.
96 CODE-REGEXP is a regular expression to match against
97 the representation of the target coded-charset.
99 PAREN-EXP is a number specifies which parenthesized expression
102 BASE base of code in the string specified by CODE-REGEXP and
105 UCS-CCS is a symbol, which is a name of a UCS-CCS.
107 UCS-REGEXP is a regular expression to match against
108 the representation of the UCS-CCS.")
111 (defun mapping-table-read-file (filename)
112 "Read mapping table."
113 (interactive "fMapping table : ")
115 (buffer-disable-undo)
116 (insert-file-contents filename)
117 (goto-char (point-min))
118 (let (rest setting ccs code ucs ucs-pat ucs-ccs ucs-code chr
119 drep-chr drep-ccs drep-ucs-ccs)
121 (setq rest mapping-table-ccs-setting-alist)
124 (setq setting (car rest)
126 (when (looking-at (pop setting))
127 (setq code (string-to-int (match-string (pop setting))
129 ucs-ccs (pop setting)
130 ucs-pat (car setting)
131 drep-ccs (intern (format "=%s" ccs))
132 drep-ucs-ccs (intern (format "=%s" ucs-ccs)))
133 (unless (find-charset drep-ccs)
135 (unless (find-charset drep-ucs-ccs)
136 (setq drep-ucs-ccs nil))
137 (goto-char (match-end 0))
139 (setq rest (cdr rest)))
148 (looking-at ucs-pat))
150 (string-to-int (match-string 1) 16)
151 (goto-char (match-end 0)))))
154 "[ \t]*U[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
155 (string-to-int (match-string 1) 16)))
156 (if (and ucs-ccs ucs (not ucs-code))
159 (if (and (eq ccs '=jef-china3)
161 (setq ucs-ccs '=ucs@JP
162 drep-ucs-ccs '==ucs@JP))
163 (when (setq chr (decode-char ccs code))
164 (unless (eq (encode-char chr ccs 'defined-only)
166 (put-char-attribute chr ccs code))
170 (encode-char chr ucs-ccs 'defined-only)
172 ((memq ucs-ccs '(=ucs@jis
175 (encode-char chr '=ucs@jis/fw 'defined-only))
176 ((eq ucs-ccs '=ucs@gb)
177 (encode-char chr '=ucs@gb/fw 'defined-only))
178 ;; ((eq ucs-ccs '=ucs@cns)
179 ;; (encode-char chr '=ucs@cns/fw 'defined-only))
180 ;; ((eq ucs-ccs '=ucs@big5)
182 ;; ((eq ucs-ccs '=ucs@ks)
183 ;; (encode-char chr '=ucs@ks/fw 'defined-only))
185 (or (char-feature chr '=ucs)
186 (char-feature chr '=>ucs))
189 (put-char-attribute chr ucs-ccs ucs-code))
191 (not (eq (or (encode-char chr '=ucs 'defined-only)
192 (and (not (memq ucs-ccs '(=ucs@jis
200 (or (char-feature chr '=ucs)
201 (char-feature chr '=>ucs))
204 (if (or ucs-code (null ucs-ccs))
205 (unless (eq (or (char-feature chr '=ucs)
206 (char-feature chr '=>ucs))
208 (put-char-attribute chr '=>ucs ucs))
209 (unless (eq (encode-char chr ucs-ccs 'defined-only)
211 (put-char-attribute chr ucs-ccs ucs)))))
214 (setq drep-chr (decode-char drep-ccs code))
215 (not (eq drep-chr chr)))
216 (unless (eq (encode-char drep-chr drep-ccs 'defined-only)
218 (put-char-attribute drep-chr drep-ccs code))
220 (not (eq (encode-char drep-chr drep-ucs-ccs
223 (put-char-attribute drep-chr drep-ucs-ccs ucs-code))
225 (not (eq (and (not (memq drep-ucs-ccs '(==ucs@jis
231 (or (char-feature drep-chr '=ucs)
232 (char-feature drep-chr '=>ucs))
235 (not (eq (char-feature drep-chr '=>ucs*) ucs)))
236 (if (or ucs-code (null drep-ucs-ccs))
237 (unless (eq (or (char-feature drep-chr '=ucs)
238 (char-feature drep-chr '=>ucs))
240 (put-char-attribute drep-chr '=>ucs ucs))
241 (unless (eq (encode-char drep-chr drep-ucs-ccs 'defined-only)
243 (put-char-attribute drep-chr drep-ucs-ccs ucs)))))
247 (defun ucs-compat-read-file (filename)
248 (interactive "fUCS-compat file : ")
250 (buffer-disable-undo)
251 (insert-file-contents filename)
252 (goto-char (point-min))
254 (while (re-search-forward
255 "^ *U[---+]\\([0-9A-F]+\\)\t *U[---+]\\([0-9A-F]+\\)" nil t)
256 (setq ucs (string-to-int (match-string 1) 16)
257 ucs* (string-to-int (match-string 2) 16))
258 (put-char-attribute (decode-char '=ucs ucs) '=>ucs* ucs*)
262 (defun jp-jouyou-read-file (filename)
263 (interactive "fjp-jouyou file : ")
265 (buffer-disable-undo)
266 (insert-file-contents filename)
267 (goto-char (point-min))
269 (while (re-search-forward "^[^\t\n ]+\t\\(.\\)\t*" nil t)
270 (setq char (aref (match-string 1) 0)
271 tchars (buffer-substring (match-end 0)
273 (when (> (length tchars) 0)
277 (split-string tchars " ")))
278 (unless (or (equal (char-feature char '<-simplified@JP/Jouyou)
280 (and (equal (char-feature char '<-simplified)
283 (char-feature char '<-simplified*sources))))
284 (put-char-attribute char
285 '<-simplified@JP/Jouyou
287 ;; (put-char-attribute
288 ;; char 'script (adjoin
294 ;; (get-char-attribute char 'script)))))
299 ;;; read-maps.el ends here