Reformatted.
[chise/xemacs-chise.git.1] / lisp / utf-2000 / read-maps.el
1 ;;; read-maps.el --- Read mapping-tables.
2
3 ;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2008, 2012, 2014, 2015
4 ;;   MORIOKA Tomohiko
5
6 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
7 ;; Keywords: mapping table, character, CCS, multiscript, multilingual
8
9 ;; This file is part of XEmacs CHISE.
10
11 ;; XEmacs CHISE is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
15
16 ;; XEmacs CHISE is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
23 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Code:
27
28 (defvar mapping-table-ccs-setting-alist
29   '((=jis-x0208@1990
30      "^J90-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
31      =ucs@jis/1990
32      "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
33     (=jis-x0212
34      "^JSP-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
35      =ucs@jis/1990
36      "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
37     (=jis-x0213-1@2000
38      "^JX1-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
39      =ucs@jis/2000
40      "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
41     (=jis-x0213-2
42      "^JX2-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
43      =ucs@jis/2000
44      "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
45     (=gb2312
46      "^G0-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
47      =ucs@gb
48      "\tGU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
49     (=cns11643-1
50      "^C1-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
51      =ucs@cns
52      "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
53     (=cns11643-2     
54      "^C2-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
55      =ucs@cns
56      "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
57     (=cns11643-3
58      "^C3-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
59      =ucs@cns
60      "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
61     (=cns11643-4
62      "^C4-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
63      =ucs@cns
64      "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
65     (=cns11643-5
66      "^C5-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
67      =ucs@cns
68      "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
69     (=cns11643-6
70      "^C6-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
71      =ucs@cns
72      "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
73     (=cns11643-7
74      "^C7-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
75      =ucs@cns
76      "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
77     (=big5     
78      "^B-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
79      =ucs@big5
80      "\tBU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
81     (=ks-x1001
82      "^K0-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
83      =ucs@ks
84      "\tKU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
85     (=jef-china3     
86      "^JC3-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
87      nil
88      nil)
89     )
90   "*List of information about mapping table formats.
91 Elements are of the form
92 \(CCS CODE-REGEXP PAREN-EXP BASE UCS-CCS UCS-REGEXP).
93
94 CCS     is a symbol, which is a name of a target coded-charset.
95
96 CODE-REGEXP is a regular expression to match against
97         the representation of the target coded-charset.
98
99 PAREN-EXP is a number specifies which parenthesized expression
100         in the CODE-REGEXP.
101
102 BASE    base of code in the string specified by CODE-REGEXP and
103         PAREN-EXP.
104
105 UCS-CCS is a symbol, which is a name of a UCS-CCS.
106
107 UCS-REGEXP is a regular expression to match against
108         the representation of the UCS-CCS.")
109
110 ;;;###autoload
111 (defun mapping-table-read-file (filename)
112   "Read mapping table." 
113   (interactive "fMapping table : ")
114   (with-temp-buffer
115     (buffer-disable-undo)
116     (insert-file-contents filename)
117     (goto-char (point-min))
118     (let (rest setting ccs code ucs ucs-pat ucs-ccs ucs-code chr
119                drep-chr drep-ccs drep-ucs-ccs)
120       (while (not (eobp))
121         (setq rest mapping-table-ccs-setting-alist)
122         (catch 'matched
123           (while rest
124             (setq setting (car rest)
125                   ccs (pop setting))
126             (when (looking-at (pop setting))
127               (setq code (string-to-int (match-string (pop setting))
128                                         (pop setting))
129                     ucs-ccs (pop setting)
130                     ucs-pat (car setting)
131                     drep-ccs (intern (format "=%s" ccs))
132                     drep-ucs-ccs (intern (format "=%s" ucs-ccs)))
133               (unless (find-charset drep-ccs)
134                 (setq drep-ccs nil))
135               (unless (find-charset drep-ucs-ccs)
136                 (setq drep-ucs-ccs nil))
137               (goto-char (match-end 0))
138               (throw 'matched t))
139             (setq rest (cdr rest)))
140           (setq ccs nil
141                 code nil
142                 ucs-pat nil
143                 ucs-ccs nil
144                 drep-ccs nil
145                 drep-ucs-ccs nil))
146         (setq ucs-code
147               (if (and ucs-pat
148                        (looking-at ucs-pat))
149                   (prog1
150                       (string-to-int (match-string 1) 16)
151                     (goto-char (match-end 0)))))
152         (setq ucs
153               (if (looking-at
154                    "[ \t]*U[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
155                   (string-to-int (match-string 1) 16)))
156         (if (and ucs-ccs ucs (not ucs-code))
157             (setq ucs-code ucs
158                   ucs nil))
159         (when (setq chr (decode-char ccs code))
160           (unless (eq (encode-char chr ccs 'defined-only)
161                       code)
162             (put-char-attribute chr ccs code))
163           (when (and ucs-code
164                      (not
165                       (eq (or
166                            (encode-char chr ucs-ccs 'defined-only)
167                            (cond
168                             ((memq ucs-ccs '(=ucs@jis
169                                              =ucs@jis/1990
170                                              =ucs@jis/2000))
171                              (encode-char chr '=ucs@jis/fw 'defined-only))
172                             ((eq ucs-ccs '=ucs@gb)
173                              (encode-char chr '=ucs@gb/fw 'defined-only))
174                             ;; ((eq ucs-ccs '=ucs@cns)
175                             ;;  (encode-char chr '=ucs@cns/fw 'defined-only))
176                             ;; ((eq ucs-ccs '=ucs@big5)
177                             ;;  nil)
178                             ;; ((eq ucs-ccs '=ucs@ks)
179                             ;;  (encode-char chr '=ucs@ks/fw 'defined-only))
180                             (t
181                              (or (char-feature chr '=ucs)
182                                  (char-feature chr '=>ucs))
183                              )))
184                           ucs-code)))
185             (put-char-attribute chr ucs-ccs ucs-code))
186           (when (and ucs
187                      (not (eq (or (encode-char chr '=ucs 'defined-only)
188                                   (and (not (memq ucs-ccs '(=ucs@jis
189                                                             =ucs@jis/1990
190                                                             =ucs@jis/2000
191                                                             =ucs@gb
192                                                             =ucs@cns
193                                                             =ucs@big5
194                                                             =ucs@ks
195                                                             )))
196                                        (or (char-feature chr '=ucs)
197                                            (char-feature chr '=>ucs))
198                                        ))
199                               ucs)))
200             (if (or ucs-code (null ucs-ccs))
201                 (unless (eq (or (char-feature chr '=ucs)
202                                 (char-feature chr '=>ucs))
203                             ucs)
204                   (put-char-attribute chr '=>ucs ucs))
205               (unless (eq (encode-char chr ucs-ccs 'defined-only)
206                           ucs)
207                 (put-char-attribute chr ucs-ccs ucs)))))
208
209         (when (and drep-ccs
210                    (setq drep-chr (decode-char drep-ccs code))
211                    (not (eq drep-chr chr)))
212           (unless (eq (encode-char drep-chr drep-ccs 'defined-only)
213                       code)
214             (put-char-attribute drep-chr drep-ccs code))
215           (when (and ucs-code
216                      (not (eq (encode-char drep-chr drep-ucs-ccs
217                                            'defined-only)
218                               ucs-code)))
219             (put-char-attribute drep-chr drep-ucs-ccs ucs-code))
220           (when (and ucs
221                      (not (eq (and (not (memq drep-ucs-ccs '(==ucs@jis
222                                                              ==ucs@jis/1990
223                                                              ==ucs@jis/2000
224                                                              ==ucs@gb
225                                                              ==ucs@cns
226                                                              ==ucs@ks)))
227                                    (or (char-feature drep-chr '=ucs)
228                                        (char-feature drep-chr '=>ucs))
229                                    )
230                               ucs))
231                      (not (eq (char-feature drep-chr '=>ucs*) ucs)))
232             (if (or ucs-code (null drep-ucs-ccs))
233                 (unless (eq (or (char-feature drep-chr '=ucs)
234                                 (char-feature drep-chr '=>ucs))
235                             ucs)
236                   (put-char-attribute drep-chr '=>ucs ucs))
237               (unless (eq (encode-char drep-chr drep-ucs-ccs 'defined-only)
238                           ucs)
239                 (put-char-attribute drep-chr drep-ucs-ccs ucs)))))
240         (forward-line)))))
241
242 ;;;###autoload
243 (defun ucs-compat-read-file (filename)
244   (interactive "fUCS-compat file : ")
245   (with-temp-buffer
246     (buffer-disable-undo)
247     (insert-file-contents filename)
248     (goto-char (point-min))
249     (let (ucs ucs*)
250       (while (re-search-forward
251               "^ *U[---+]\\([0-9A-F]+\\)\t *U[---+]\\([0-9A-F]+\\)" nil t)
252         (setq ucs (string-to-int (match-string 1) 16)
253               ucs* (string-to-int (match-string 2) 16))
254         (put-char-attribute (decode-char '=ucs ucs) '=>ucs* ucs*)
255         ))))
256
257 ;;;###autoload
258 (defun jp-jouyou-read-file (filename)
259   (interactive "fjp-jouyou file : ")
260   (with-temp-buffer
261     (buffer-disable-undo)
262     (insert-file-contents filename)
263     (goto-char (point-min))
264     (let (char tchars)
265       (while (re-search-forward "^[^\t\n ]+\t\\(.\\)\t*" nil t)
266         (setq char (aref (match-string 1) 0)
267               tchars (buffer-substring (match-end 0)
268                                        (point-at-eol)))
269         (when (> (length tchars) 0)
270           (setq tchars
271                 (mapcar (lambda (c)
272                           (aref c 0))
273                         (split-string tchars " ")))
274           (unless (or (equal (char-feature char '<-simplified@JP/Jouyou)
275                              tchars)
276                       (and (equal (char-feature char '<-simplified)
277                                   tchars)
278                            (memq 'JP/Jouyou
279                                  (char-feature char '<-simplified*sources))))
280             (put-char-attribute char
281                                 '<-simplified@JP/Jouyou
282                                 tchars)))
283         ;; (put-char-attribute
284         ;;  char 'script (adjoin
285         ;;                'JP
286         ;;                (adjoin
287         ;;                 'Jouyou
288         ;;                 (adjoin
289         ;;                  'Ideograph
290         ;;                  (get-char-attribute char 'script)))))
291         ))))
292
293 (provide 'read-maps)
294
295 ;;; read-maps.el ends here