(BC-8BED): Unify U-0002BDA7.
[chise/xemacs-chise.git.1] / lisp / utf-2000 / read-maps.el
1 ;;; read-maps.el --- Read mapping-tables.
2
3 ;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2008, 2012, 2014
4 ;;   MORIOKA Tomohiko
5
6 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
7 ;; Keywords: mapping table, character, CCS, multiscript, multilingual
8
9 ;; This file is part of XEmacs CHISE.
10
11 ;; XEmacs CHISE is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
15
16 ;; XEmacs CHISE is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
23 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Code:
27
28 (defvar mapping-table-ccs-setting-alist
29   '((=jis-x0208@1990
30      "^J90-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
31      =ucs@jis/1990
32      "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
33     (=jis-x0212
34      "^JSP-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
35      =ucs@jis/1990
36      "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
37     (=jis-x0213-1@2000
38      "^JX1-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
39      =ucs@jis/2000
40      "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
41     (=jis-x0213-2
42      "^JX2-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
43      =ucs@jis/2000
44      "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
45     (=gb2312
46      "^G0-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
47      =ucs@gb
48      "\tGU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
49     (=cns11643-1
50      "^C1-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
51      =ucs@cns
52      "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
53     (=cns11643-2     
54      "^C2-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
55      =ucs@cns
56      "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
57     (=cns11643-3
58      "^C3-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
59      =ucs@cns
60      "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
61     (=cns11643-4
62      "^C4-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
63      =ucs@cns
64      "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
65     (=cns11643-5
66      "^C5-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
67      =ucs@cns
68      "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
69     (=cns11643-6
70      "^C6-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
71      =ucs@cns
72      "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
73     (=cns11643-7
74      "^C7-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
75      =ucs@cns
76      "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
77     (=big5     
78      "^B-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
79      =ucs@big5
80      "\tBU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
81     (=ks-x1001
82      "^K0-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
83      =ucs@ks
84      "\tKU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
85     (=jef-china3     
86      "^JC3-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
87      nil
88      nil)
89     )
90   "*List of information about mapping table formats.
91 Elements are of the form
92 \(CCS CODE-REGEXP PAREN-EXP BASE UCS-CCS UCS-REGEXP).
93
94 CCS     is a symbol, which is a name of a target coded-charset.
95
96 CODE-REGEXP is a regular expression to match against
97         the representation of the target coded-charset.
98
99 PAREN-EXP is a number specifies which parenthesized expression
100         in the CODE-REGEXP.
101
102 BASE    base of code in the string specified by CODE-REGEXP and
103         PAREN-EXP.
104
105 UCS-CCS is a symbol, which is a name of a UCS-CCS.
106
107 UCS-REGEXP is a regular expression to match against
108         the representation of the UCS-CCS.")
109
110 ;;;###autoload
111 (defun mapping-table-read-file (filename)
112   "Read mapping table." 
113   (interactive "fMapping table : ")
114   (with-temp-buffer
115     (buffer-disable-undo)
116     (insert-file-contents filename)
117     (goto-char (point-min))
118     (let (rest setting ccs code ucs ucs-pat ucs-ccs ucs-code chr
119                drep-chr drep-ccs drep-ucs-ccs)
120       (while (not (eobp))
121         (setq rest mapping-table-ccs-setting-alist)
122         (catch 'matched
123           (while rest
124             (setq setting (car rest)
125                   ccs (pop setting))
126             (when (looking-at (pop setting))
127               (setq code (string-to-int (match-string (pop setting))
128                                         (pop setting))
129                     ucs-ccs (pop setting)
130                     ucs-pat (car setting)
131                     drep-ccs (intern (format "=%s" ccs))
132                     drep-ucs-ccs (intern (format "=%s" ucs-ccs)))
133               (unless (find-charset drep-ccs)
134                 (setq drep-ccs nil))
135               (unless (find-charset drep-ucs-ccs)
136                 (setq drep-ucs-ccs nil))
137               (goto-char (match-end 0))
138               (throw 'matched t))
139             (setq rest (cdr rest)))
140           (setq ccs nil
141                 code nil
142                 ucs-pat nil
143                 ucs-ccs nil
144                 drep-ccs nil
145                 drep-ucs-ccs nil))
146         (setq ucs-code
147               (if (and ucs-pat
148                        (looking-at ucs-pat))
149                   (prog1
150                       (string-to-int (match-string 1) 16)
151                     (goto-char (match-end 0)))))
152         (setq ucs
153               (if (looking-at
154                    "[ \t]*U[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
155                   (string-to-int (match-string 1) 16)))
156         (if (and ucs-ccs ucs (not ucs-code))
157             (setq ucs-code ucs
158                   ucs nil))
159         (when (setq chr (decode-char ccs code))
160           (unless (eq (encode-char chr ccs 'defined-only)
161                       code)
162             (put-char-attribute chr ccs code))
163           (when (and ucs-code
164                      (not
165                       (eq (or
166                            (encode-char chr ucs-ccs 'defined-only)
167                            (cond
168                             ((memq ucs-ccs '(=ucs@jis
169                                              =ucs@jis/1990
170                                              =ucs@jis/2000))
171                              (encode-char chr '=ucs@jis/fw 'defined-only))
172                             ((eq ucs-ccs '=ucs@gb)
173                              (encode-char chr '=ucs@gb/fw 'defined-only))
174                             ;; ((eq ucs-ccs '=ucs@cns)
175                             ;;  (encode-char chr '=ucs@cns/fw 'defined-only))
176                             ;; ((eq ucs-ccs '=ucs@big5)
177                             ;;  nil)
178                             ;; ((eq ucs-ccs '=ucs@ks)
179                             ;;  (encode-char chr '=ucs@ks/fw 'defined-only))
180                             (t
181                              (char-feature chr '=>ucs))))
182                           ucs-code)))
183             (put-char-attribute chr ucs-ccs ucs-code))
184           (when (and ucs
185                      (not (eq (or (encode-char chr '=ucs 'defined-only)
186                                   (and (not (memq ucs-ccs '(=ucs@jis
187                                                             =ucs@jis/1990
188                                                             =ucs@jis/2000
189                                                             =ucs@gb
190                                                             =ucs@cns
191                                                             =ucs@big5
192                                                             =ucs@ks
193                                                             )))
194                                        (char-feature chr '=>ucs)))
195                               ucs)))
196             (if (or ucs-code (null ucs-ccs))
197                 (unless (eq (char-feature chr '=>ucs) ucs)
198                   (put-char-attribute chr '=>ucs ucs))
199               (unless (eq (encode-char chr ucs-ccs 'defined-only)
200                           ucs)
201                 (put-char-attribute chr ucs-ccs ucs)))))
202
203         (when (and drep-ccs
204                    (setq drep-chr (decode-char drep-ccs code))
205                    (not (eq drep-chr chr)))
206           (unless (eq (encode-char drep-chr drep-ccs 'defined-only)
207                       code)
208             (put-char-attribute drep-chr drep-ccs code))
209           (when (and ucs-code
210                      (not (eq (encode-char drep-chr drep-ucs-ccs
211                                            'defined-only)
212                               ucs-code)))
213             (put-char-attribute drep-chr drep-ucs-ccs ucs-code))
214           (when (and ucs
215                      (not (eq (and (not (memq drep-ucs-ccs '(==ucs@jis
216                                                              ==ucs@jis/1990
217                                                              ==ucs@jis/2000
218                                                              ==ucs@gb
219                                                              ==ucs@cns
220                                                              ==ucs@ks)))
221                                    (char-feature drep-chr '=>ucs))
222                               ucs))
223                      (not (eq (char-feature drep-chr '=>ucs*) ucs)))
224             (if (or ucs-code (null drep-ucs-ccs))
225                 (unless (eq (char-feature drep-chr '=>ucs) ucs)
226                   (put-char-attribute drep-chr '=>ucs ucs))
227               (unless (eq (encode-char drep-chr drep-ucs-ccs 'defined-only)
228                           ucs)
229                 (put-char-attribute drep-chr drep-ucs-ccs ucs)))))
230         (forward-line)))))
231
232 ;;;###autoload
233 (defun ucs-compat-read-file (filename)
234   (interactive "fUCS-compat file : ")
235   (with-temp-buffer
236     (buffer-disable-undo)
237     (insert-file-contents filename)
238     (goto-char (point-min))
239     (let (ucs ucs*)
240       (while (re-search-forward
241               "^ *U[---+]\\([0-9A-F]+\\)\t *U[---+]\\([0-9A-F]+\\)" nil t)
242         (setq ucs (string-to-int (match-string 1) 16)
243               ucs* (string-to-int (match-string 2) 16))
244         (put-char-attribute (decode-char '=ucs ucs) '=>ucs* ucs*)
245         ))))
246
247 ;;;###autoload
248 (defun jp-jouyou-read-file (filename)
249   (interactive "fjp-jouyou file : ")
250   (with-temp-buffer
251     (buffer-disable-undo)
252     (insert-file-contents filename)
253     (goto-char (point-min))
254     (let (char tchars)
255       (while (re-search-forward "^[^\t\n ]+\t\\(.\\)\t*" nil t)
256         (setq char (aref (match-string 1) 0)
257               tchars (buffer-substring (match-end 0)
258                                        (point-at-eol)))
259         (when (> (length tchars) 0)
260           (setq tchars
261                 (mapcar (lambda (c)
262                           (aref c 0))
263                         (split-string tchars " ")))
264           (unless (or (equal (char-feature char '<-simplified@JP/Jouyou)
265                              tchars)
266                       (and (equal (char-feature char '<-simplified)
267                                   tchars)
268                            (memq 'JP/Jouyou
269                                  (char-feature char '<-simplified*sources))))
270             (put-char-attribute char
271                                 '<-simplified@JP/Jouyou
272                                 tchars)))
273         ;; (put-char-attribute
274         ;;  char 'script (adjoin
275         ;;                'JP
276         ;;                (adjoin
277         ;;                 'Jouyou
278         ;;                 (adjoin
279         ;;                  'Ideograph
280         ;;                  (get-char-attribute char 'script)))))
281         ))))
282
283 (provide 'read-maps)
284
285 ;;; read-maps.el ends here