(U+581A): Apply new conventions for glyph granularity.
[chise/xemacs-chise.git.1] / lisp / utf-2000 / read-maps.el
1 ;;; read-maps.el --- Read mapping-tables.
2
3 ;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2008, 2012, 2014, 2015, 2017
4 ;;   MORIOKA Tomohiko
5
6 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
7 ;; Keywords: mapping table, character, CCS, multiscript, multilingual
8
9 ;; This file is part of XEmacs CHISE.
10
11 ;; XEmacs CHISE is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
15
16 ;; XEmacs CHISE is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
23 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Code:
27
28 (defvar mapping-table-ccs-setting-alist
29   '((=jis-x0208@1990
30      "^J90-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
31      =ucs@jis/1990
32      "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
33     (=jis-x0212
34      "^JSP-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
35      =ucs@jis/1990
36      "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
37     (=jis-x0213-1@2000
38      "^JX1-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
39      =ucs@jis/2000
40      "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
41     (=jis-x0213-2
42      "^JX2-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
43      =ucs@jis/2000
44      "\tJU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
45     (=gb2312
46      "^G0-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
47      =ucs@gb
48      "\tGU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
49     (=cns11643-1
50      "^C1-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
51      =ucs@cns
52      "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
53     (=cns11643-2     
54      "^C2-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
55      =ucs@cns
56      "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
57     (=cns11643-3
58      "^C3-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
59      =ucs@cns
60      "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
61     (=cns11643-4
62      "^C4-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
63      =ucs@cns
64      "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
65     (=cns11643-5
66      "^C5-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
67      =ucs@cns
68      "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
69     (=cns11643-6
70      "^C6-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
71      =ucs@cns
72      "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
73     (=cns11643-7
74      "^C7-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
75      =ucs@cns
76      "\tCU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
77     (=big5     
78      "^B-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
79      =ucs@big5
80      "\tBU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
81     (=ks-x1001
82      "^K0-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
83      =ucs@ks
84      "\tKU[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
85     (=jef-china3     
86      "^JC3-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" 1 16
87      nil
88      nil)
89     )
90   "*List of information about mapping table formats.
91 Elements are of the form
92 \(CCS CODE-REGEXP PAREN-EXP BASE UCS-CCS UCS-REGEXP).
93
94 CCS     is a symbol, which is a name of a target coded-charset.
95
96 CODE-REGEXP is a regular expression to match against
97         the representation of the target coded-charset.
98
99 PAREN-EXP is a number specifies which parenthesized expression
100         in the CODE-REGEXP.
101
102 BASE    base of code in the string specified by CODE-REGEXP and
103         PAREN-EXP.
104
105 UCS-CCS is a symbol, which is a name of a UCS-CCS.
106
107 UCS-REGEXP is a regular expression to match against
108         the representation of the UCS-CCS.")
109
110 ;;;###autoload
111 (defun mapping-table-read-file (filename)
112   "Read mapping table." 
113   (interactive "fMapping table : ")
114   (with-temp-buffer
115     (buffer-disable-undo)
116     (insert-file-contents filename)
117     (goto-char (point-min))
118     (let (rest setting ccs code ucs ucs-pat ucs-ccs ucs-code chr
119                drep-chr drep-ccs drep-ucs-ccs)
120       (while (not (eobp))
121         (setq rest mapping-table-ccs-setting-alist)
122         (catch 'matched
123           (while rest
124             (setq setting (car rest)
125                   ccs (pop setting))
126             (when (looking-at (pop setting))
127               (setq code (string-to-int (match-string (pop setting))
128                                         (pop setting))
129                     ucs-ccs (pop setting)
130                     ucs-pat (car setting)
131                     drep-ccs (intern (format "=%s" ccs))
132                     drep-ucs-ccs (intern (format "=%s" ucs-ccs)))
133               (unless (find-charset drep-ccs)
134                 (setq drep-ccs nil))
135               (unless (find-charset drep-ucs-ccs)
136                 (setq drep-ucs-ccs nil))
137               (goto-char (match-end 0))
138               (throw 'matched t))
139             (setq rest (cdr rest)))
140           (setq ccs nil
141                 code nil
142                 ucs-pat nil
143                 ucs-ccs nil
144                 drep-ccs nil
145                 drep-ucs-ccs nil))
146         (setq ucs-code
147               (if (and ucs-pat
148                        (looking-at ucs-pat))
149                   (prog1
150                       (string-to-int (match-string 1) 16)
151                     (goto-char (match-end 0)))))
152         (setq ucs
153               (if (looking-at
154                    "[ \t]*U[+-]\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]+\\)")
155                   (string-to-int (match-string 1) 16)))
156         (if (and ucs-ccs ucs (not ucs-code))
157             (setq ucs-code ucs
158                   ucs nil))
159         (if (and (eq ccs '=jef-china3)
160                  (eq ucs #xFA66))
161             (setq ucs-ccs '=ucs@JP
162                   drep-ucs-ccs '==ucs@JP))
163         (when (setq chr (decode-char ccs code))
164           (unless (eq (encode-char chr ccs 'defined-only)
165                       code)
166             (put-char-attribute chr ccs code))
167           (when (and ucs-code
168                      (not
169                       (eq (or
170                            (encode-char chr ucs-ccs 'defined-only)
171                            (cond
172                             ((memq ucs-ccs '(=ucs@jis
173                                              =ucs@jis/1990
174                                              =ucs@jis/2000))
175                              (encode-char chr '=ucs@jis/fw 'defined-only))
176                             ((eq ucs-ccs '=ucs@gb)
177                              (encode-char chr '=ucs@gb/fw 'defined-only))
178                             ;; ((eq ucs-ccs '=ucs@cns)
179                             ;;  (encode-char chr '=ucs@cns/fw 'defined-only))
180                             ;; ((eq ucs-ccs '=ucs@big5)
181                             ;;  nil)
182                             ;; ((eq ucs-ccs '=ucs@ks)
183                             ;;  (encode-char chr '=ucs@ks/fw 'defined-only))
184                             (t
185                              (or (char-feature chr '=ucs)
186                                  (char-feature chr '=>ucs))
187                              )))
188                           ucs-code)))
189             (put-char-attribute chr ucs-ccs ucs-code))
190           (when (and ucs
191                      (not (eq (or (encode-char chr '=ucs 'defined-only)
192                                   (and (not (memq ucs-ccs '(=ucs@jis
193                                                             =ucs@jis/1990
194                                                             =ucs@jis/2000
195                                                             =ucs@gb
196                                                             =ucs@cns
197                                                             =ucs@big5
198                                                             =ucs@ks
199                                                             )))
200                                        (or (char-feature chr '=ucs)
201                                            (char-feature chr '=>ucs))
202                                        ))
203                               ucs)))
204             (if (or ucs-code (null ucs-ccs))
205                 (unless (eq (or (char-feature chr '=ucs)
206                                 (char-feature chr '=>ucs))
207                             ucs)
208                   (put-char-attribute chr '=>ucs ucs))
209               (unless (eq (encode-char chr ucs-ccs 'defined-only)
210                           ucs)
211                 (put-char-attribute chr ucs-ccs ucs)))))
212
213         (when (and drep-ccs
214                    (setq drep-chr (decode-char drep-ccs code))
215                    (not (eq drep-chr chr)))
216           (unless (eq (encode-char drep-chr drep-ccs 'defined-only)
217                       code)
218             (put-char-attribute drep-chr drep-ccs code))
219           (when (and ucs-code
220                      (not (eq (encode-char drep-chr drep-ucs-ccs
221                                            'defined-only)
222                               ucs-code)))
223             (put-char-attribute drep-chr drep-ucs-ccs ucs-code))
224           (when (and ucs
225                      (not (eq (and (not (memq drep-ucs-ccs '(==ucs@jis
226                                                              ==ucs@jis/1990
227                                                              ==ucs@jis/2000
228                                                              ==ucs@gb
229                                                              ==ucs@cns
230                                                              ==ucs@ks)))
231                                    (or (char-feature drep-chr '=ucs)
232                                        (char-feature drep-chr '=>ucs))
233                                    )
234                               ucs))
235                      (not (eq (char-feature drep-chr '=>ucs*) ucs)))
236             (if (or ucs-code (null drep-ucs-ccs))
237                 (unless (eq (or (char-feature drep-chr '=ucs)
238                                 (char-feature drep-chr '=>ucs))
239                             ucs)
240                   (put-char-attribute drep-chr '=>ucs ucs))
241               (unless (eq (encode-char drep-chr drep-ucs-ccs 'defined-only)
242                           ucs)
243                 (put-char-attribute drep-chr drep-ucs-ccs ucs)))))
244         (forward-line)))))
245
246 ;;;###autoload
247 (defun ucs-compat-read-file (filename)
248   (interactive "fUCS-compat file : ")
249   (with-temp-buffer
250     (buffer-disable-undo)
251     (insert-file-contents filename)
252     (goto-char (point-min))
253     (let (ucs ucs*)
254       (while (re-search-forward
255               "^ *U[---+]\\([0-9A-F]+\\)\t *U[---+]\\([0-9A-F]+\\)" nil t)
256         (setq ucs (string-to-int (match-string 1) 16)
257               ucs* (string-to-int (match-string 2) 16))
258         (put-char-attribute (decode-char '=ucs ucs) '=>ucs* ucs*)
259         ))))
260
261 ;;;###autoload
262 (defun jp-jouyou-read-file (filename)
263   (interactive "fjp-jouyou file : ")
264   (with-temp-buffer
265     (buffer-disable-undo)
266     (insert-file-contents filename)
267     (goto-char (point-min))
268     (let (char tchars)
269       (while (re-search-forward "^[^\t\n ]+\t\\(.\\)\t*" nil t)
270         (setq char (aref (match-string 1) 0)
271               tchars (buffer-substring (match-end 0)
272                                        (point-at-eol)))
273         (when (> (length tchars) 0)
274           (setq tchars
275                 (mapcar (lambda (c)
276                           (aref c 0))
277                         (split-string tchars " ")))
278           (unless (or (equal (char-feature char '<-simplified@JP/Jouyou)
279                              tchars)
280                       (and (equal (char-feature char '<-simplified)
281                                   tchars)
282                            (memq 'JP/Jouyou
283                                  (char-feature char '<-simplified*sources))))
284             (put-char-attribute char
285                                 '<-simplified@JP/Jouyou
286                                 tchars)))
287         ;; (put-char-attribute
288         ;;  char 'script (adjoin
289         ;;                'JP
290         ;;                (adjoin
291         ;;                 'Jouyou
292         ;;                 (adjoin
293         ;;                  'Ideograph
294         ;;                  (get-char-attribute char 'script)))))
295         ))))
296
297 (provide 'read-maps)
298
299 ;;; read-maps.el ends here