Sync up with r21-4-22-chise-0_24-aj1-bc.
[chise/xemacs-chise.git.1] / lisp / mule / mule-coding.el
1 ;;; mule-coding.el --- Coding-system functions for Mule.
2
3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5 ;; Copyright (C) 1995 Amdahl Corporation.
6 ;; Copyright (C) 1995 Sun Microsystems.
7 ;; Copyright (C) 1997, 1999, 2002, 2003, 2004, 2005, 2006, 2008, 2009,
8 ;;   2010 MORIOKA Tomohiko
9
10 ;; This file is part of XEmacs.
11
12 ;; XEmacs is free software; you can redistribute it and/or modify it
13 ;; under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; any later version.
16
17 ;; XEmacs is distributed in the hope that it will be useful, but
18 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 ;; General Public License for more details.
21
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with XEmacs; see the file COPYING.  If not, write to the
24 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
25 ;; Boston, MA 02111-1307, USA.
26
27 ;;; Commentary:
28
29 ;;; split off of mule.el and mostly moved to coding.el
30
31 ;;; Code:
32
33 (defun coding-system-force-on-output (coding-system register)
34   "Return the 'force-on-output property of CODING-SYSTEM for the specified REGISTER."
35   (check-type register integer)
36   (coding-system-property
37    coding-system
38    (case register
39      (0 'force-g0-on-output)
40      (1 'force-g1-on-output)
41      (2 'force-g2-on-output)
42      (3 'force-g3-on-output)
43      (t (signal 'args-out-of-range (list register 0 3))))))
44
45 (defun coding-system-short (coding-system)
46   "Return the 'short property of CODING-SYSTEM."
47   (coding-system-property coding-system 'short))
48
49 (defun coding-system-no-ascii-eol (coding-system)
50   "Return the 'no-ascii-eol property of CODING-SYSTEM."
51   (coding-system-property coding-system 'no-ascii-eol))
52
53 (defun coding-system-no-ascii-cntl (coding-system)
54   "Return the 'no-ascii-cntl property of CODING-SYSTEM."
55   (coding-system-property coding-system 'no-ascii-cntl))
56
57 (defun coding-system-seven (coding-system)
58   "Return the 'seven property of CODING-SYSTEM."
59   (coding-system-property coding-system 'seven))
60
61 (defun coding-system-lock-shift (coding-system)
62   "Return the 'lock-shift property of CODING-SYSTEM."
63   (coding-system-property coding-system 'lock-shift))
64
65 ;;(defun coding-system-use-japanese-jisx0201-roman (coding-system)
66 ;;  "Return the 'use-japanese-jisx0201-roman property of CODING-SYSTEM."
67 ;;  (coding-system-property coding-system 'use-japanese-jisx0201-roman))
68
69 ;;(defun coding-system-use-japanese-jisx0208-1978 (coding-system)
70 ;;  "Return the 'use-japanese-jisx0208-1978 property of CODING-SYSTEM."
71 ;;  (coding-system-property coding-system 'use-japanese-jisx0208-2978))
72
73 (defun coding-system-no-iso6429 (coding-system)
74   "Return the 'no-iso6429 property of CODING-SYSTEM."
75   (coding-system-property coding-system 'no-iso6429))
76
77 (defun coding-system-ccl-encode (coding-system)
78   "Return the CCL 'encode property of CODING-SYSTEM."
79   (coding-system-property coding-system 'encode))
80
81 (defun coding-system-ccl-decode (coding-system)
82   "Return the CCL 'decode property of CODING-SYSTEM."
83   (coding-system-property coding-system 'decode))
84
85 \f
86 ;;;; Definitions of predefined coding systems
87
88 (make-coding-system
89  'ctext 'iso2022
90  "Coding-system used in X as Compound Text Encoding."
91  '(charset-g0 ascii
92    charset-g1 latin-iso8859-1
93    eol-type nil
94    mnemonic "CText"))
95
96 ;;; iso-8859-1 and ctext are aliases.
97
98 ;; (copy-coding-system 'ctext 'iso-8859-1)
99 (make-coding-system
100  'iso-8859-1 'no-conversion
101  "Coding-system used in X as Compound Text Encoding."
102  '(eol-type nil mnemonic "Noconv"))
103
104 (make-coding-system
105  'iso-2022-8bit-ss2 'iso2022
106  "ISO-2022 coding system using SS2 for 96-charset in 8-bit code."
107  '(charset-g0 ascii
108    charset-g1 latin-iso8859-1
109    charset-g2 t ;; unspecified but can be used later.
110    short t
111    mnemonic "ISO8/SS"
112    ))
113
114 (make-coding-system
115  'iso-2022-7bit-ss2 'iso2022
116  "ISO-2022 coding system using SS2 for 96-charset in 7-bit code."
117  '(charset-g0 ascii
118    charset-g2 t ;; unspecified but can be used later.
119    seven t
120    short t
121    mnemonic "ISO7/SS"
122    eol-type nil))
123
124 ;; (copy-coding-system 'iso-2022-7bit-ss2 'iso-2022-jp-2)
125 (make-coding-system
126  'iso-2022-jp-2 'iso2022
127  "ISO-2022 coding system using SS2 for 96-charset in 7-bit code."
128  (let ((conf
129         '(charset-g0 ascii
130           charset-g2 t ;; unspecified but can be used later.
131           seven t
132           short t
133           mnemonic "ISO7/SS"
134           eol-type nil)))
135    (if (featurep 'utf-2000)
136        (list* 'ccs-priority-list
137               '(ascii
138                 =jis-x0208@1983 =jis-x0208@1978
139                 latin-jisx0201)
140               conf)
141      conf)))
142
143 (make-coding-system
144  'iso-2022-7bit 'iso2022
145  "ISO 2022 based 7-bit encoding using only G0"
146  '(charset-g0 ascii
147    seven t
148    short t
149    mnemonic "ISO7"))
150
151 ;; compatibility for old XEmacsen
152 (copy-coding-system 'iso-2022-7bit 'iso-2022-7)
153
154 (make-coding-system
155  'iso-2022-8 'iso2022
156  "ISO-2022 eight-bit coding system.  No single-shift or locking-shift."
157  '(charset-g0 ascii
158    charset-g1 latin-iso8859-1
159    short t
160    mnemonic "ISO8"
161    ))
162
163 (make-coding-system
164  'escape-quoted 'iso2022
165  "ISO-2022 eight-bit coding system with escape quoting; used for .ELC files."
166  '(charset-g0 ascii
167    charset-g1 latin-iso8859-1
168    eol-type lf
169    escape-quoted t
170    mnemonic "ESC/Quot"
171    ))
172
173 (make-coding-system
174  'iso-2022-lock 'iso2022
175  "ISO-2022 coding system using Locking-Shift for 96-charset."
176  '(charset-g0 ascii
177    charset-g1 t ;; unspecified but can be used later.
178    seven t
179    lock-shift t
180    mnemonic "ISO7/Lock"
181    ))
182
183 (when (featurep 'utf-2000)
184   (setq coded-charset-entity-reference-alist
185         '(((=adobe-japan1-base  . isolated)    "I-AJ1-" 5 d)
186           ((=gt                 . isolated)     "I-GT-" 5 d)
187           ((=ruimoku-v6         . isolated)   "I-RUI6-" 4 X)
188           ((chinese-big5-cdp    . isolated)    "I-CDP-" 4 X)
189           ((=gt-k               . isolated)    "I-GT-K" 5 d)
190           ((ideograph-cbeta     . isolated)      "I-CB" 5 d)
191           ((ideograph-hanziku-1 . isolated)  "I-HZK01-" 4 X)
192           ((ideograph-hanziku-2 . isolated)  "I-HZK02-" 4 X)
193           ((ideograph-hanziku-3 . isolated)  "I-HZK03-" 4 X)
194           ((ideograph-hanziku-4 . isolated)  "I-HZK04-" 4 X)
195           ((ideograph-hanziku-5 . isolated)  "I-HZK05-" 4 X)
196           ((ideograph-hanziku-6 . isolated)  "I-HZK06-" 4 X)
197           ((ideograph-hanziku-7 . isolated)  "I-HZK07-" 4 X)
198           ((ideograph-hanziku-8 . isolated)  "I-HZK08-" 4 X)
199           ((ideograph-hanziku-9 . isolated)  "I-HZK09-" 4 X)
200           ((ideograph-hanziku-10 . isolated) "I-HZK10-" 4 X)
201           ((ideograph-hanziku-11 . isolated) "I-HZK11-" 4 X)
202           ((ideograph-hanziku-12 . isolated) "I-HZK12-" 4 X)
203           ((=jis-x0208@1990     . isolated)    "I-J90-" 4 X)
204           ((=jis-x0208@1983     . isolated)    "I-J83-" 4 X)
205           ((=jis-x0213-1@2000   . isolated)    "I-JX1-" 4 X)
206           ((=jis-x0213-2        . isolated)    "I-JX2-" 4 X)
207           ((=jis-x0213-1@2004   . isolated)    "I-JX3-" 4 X)
208           ((=jis-x0212          . isolated)    "I-JSP-" 4 X)
209           ((=jis-x0208@1978     . isolated)    "I-J78-" 4 X)
210           ((chinese-gb2312      . isolated)     "I-G0-" 4 X)
211           ((chinese-cns11643-1  . isolated)     "I-C1-" 4 X)
212           ((chinese-cns11643-2  . isolated)     "I-C2-" 4 X)
213           ((chinese-cns11643-3  . isolated)     "I-C3-" 4 X)
214           ((chinese-cns11643-4  . isolated)     "I-C4-" 4 X)
215           ((chinese-cns11643-5  . isolated)     "I-C5-" 4 X)
216           ((chinese-cns11643-6  . isolated)     "I-C6-" 4 X)
217           ((chinese-cns11643-7  . isolated)     "I-C7-" 4 X)
218           ((korean-ksc5601      . isolated)     "I-K0-" 4 X)
219           ((=iso-ir165          . isolated)    "I-EGB-" 4 X)
220           ((ideograph-daikanwa  . isolated)      "I-M-" 5 d)
221           ((chinese-big5        . isolated)      "I-B-" 4 X)
222           ((latin-iso8859-1     . isolated) "I-LATIN1-" 2 X)
223           ((latin-iso8859-2     . isolated) "I-LATIN2-" 2 X)
224           ((latin-iso8859-3     . isolated) "I-LATIN3-" 2 X)
225           ((latin-iso8859-4     . isolated) "I-LATIN4-" 2 X)
226           ((cyrillic-iso8859-5  . isolated) "I-CYRILLIC-" 2 X)
227           ((greek-iso8859-7     . isolated)  "I-GREEK-" 2 X)
228           ((hebrew-iso8859-8    . isolated) "I-HEBREW-" 2 X)
229           ((latin-iso8859-9     . isolated) "I-LATIN5-" 2 X)
230           ((latin-jisx0201      . isolated) "I-LATINJ-" 2 X)
231           ((katakana-jisx0201   . isolated) "I-KATAKANA-" 2 X)
232           ((latin-tcvn5712      . isolated) "I-VSCII2-" 2 X)
233           ;; ((latin-viscii     . isolated) "I-VISCII-" 2 X)
234           ((latin-viscii-upper  . isolated) "I-MULE-VIET-U-" 2 X)
235           ((latin-viscii-lower  . isolated) "I-MULE-VIET-L-" 2 X)
236           ((thai-tis620         . isolated)   "I-THAI-" 2 X)
237           ((lao                 . isolated) "I-MULE-LAO-" 2 X)
238           ((arabic-1-column     . isolated) "I-MULE-ARB1-" 2 X)
239           ((arabic-2-column     . isolated) "I-MULE-ARB2-" 2 X)
240           ((arabic-digit        . isolated) "I-MULE-ARBD-" 2 X)
241           ((ipa                 . isolated) "I-MULE-IPA-" 2 X)
242           ((china3-jef          . isolated)    "I-JC3-" 4 X)
243           ( =adobe-japan1-6                      "AJ1-" 5 d)
244           ( =adobe-japan1-base                 "I-AJ1-" 5 d)
245           ( =gt                                   "GT-" 5 d)
246           (=>>gt                                "G-GT-" 5 d)
247           (=>>gt                                 "aGT-" 5 d)
248           (=>gt                                 "A-GT-" 5 d)
249           ( =zinbun-oracle                       "ZOB-" 4 d)
250           (=>zinbun-oracle                     "A-ZOB-" 4 d)
251           ( =ruimoku-v6                         "RUI6-" 4 X)
252           ( chinese-big5-cdp                     "CDP-" 4 X)
253           ( ideograph-daikanwa                     "M-" 5 d)
254           ( =gt-k                                "GT-K" 5 d)
255           ( ideograph-cbeta                        "CB" 5 d)
256           ( ideograph-hanziku-1                "HZK01-" 4 X)
257           ( ideograph-hanziku-2                "HZK02-" 4 X)
258           ( ideograph-hanziku-3                "HZK03-" 4 X)
259           ( ideograph-hanziku-4                "HZK04-" 4 X)
260           ( ideograph-hanziku-5                "HZK05-" 4 X)
261           ( ideograph-hanziku-6                "HZK06-" 4 X)
262           ( ideograph-hanziku-7                "HZK07-" 4 X)
263           ( ideograph-hanziku-8                "HZK08-" 4 X)
264           ( ideograph-hanziku-9                "HZK09-" 4 X)
265           ( ideograph-hanziku-10               "HZK10-" 4 X)
266           ( ideograph-hanziku-11               "HZK11-" 4 X)
267           ( ideograph-hanziku-12               "HZK12-" 4 X)
268           ((ideograph-hanziku-1 . isolated)   "I-HZK1-" 4 X)
269           ( ideograph-hanziku-1                 "HZK1-" 4 X)
270           ((ideograph-hanziku-2 . isolated)   "I-HZK2-" 4 X)
271           ( ideograph-hanziku-2                 "HZK2-" 4 X)
272           ( =jis-x0208@1990                      "J90-" 4 X)
273           ( =jis-x0208@1983                      "J83-" 4 X)
274           ( =jis-x0213-1@2000                    "JX1-" 4 X)
275           ( =jis-x0213-2                         "JX2-" 4 X)
276           ( =jis-x0213-1@2004                    "JX3-" 4 X)
277           ( =jis-x0212                           "JSP-" 4 X)
278           ( =jis-x0208@1978                      "J78-" 4 X)
279           (=>>jis-x0208@1990                   "G-J90-" 4 X)
280           (=>>jis-x0213-1@2000                 "G-JX1-" 4 X)
281           (=>>jis-x0213-2                      "G-JX2-" 4 X)
282           (=>>jis-x0213-1@2004                 "G-JX3-" 4 X)
283           (=>>jis-x0213-1@2000                  "aJX1-" 4 X)
284           (=>>jis-x0213-2                       "aJX2-" 4 X)
285           (=>>jis-x0213-1@2004                  "aJX3-" 4 X)
286           (=>>jis-x0208@1978                   "G-J78-" 4 X)
287           ( =>jis-x0208@1997                     "J97-" 4 X)
288           ( =>jis-x0208@1997                    "A-J0-" 4 X)
289           ( =>jis-x0213-1@2000                 "A-JX1-" 4 X)
290           ( =>jis-x0213-2                      "A-JX2-" 4 X)
291           ( =>jis-x0213-1@2004                 "A-JX3-" 4 X)
292           ( chinese-cns11643-1                    "C1-" 4 X)
293           ( chinese-cns11643-2                    "C2-" 4 X)
294           ( chinese-cns11643-3                    "C3-" 4 X)
295           ( chinese-cns11643-4                    "C4-" 4 X)
296           ( chinese-cns11643-5                    "C5-" 4 X)
297           ( chinese-cns11643-6                    "C6-" 4 X)
298           ( chinese-cns11643-7                    "C7-" 4 X)
299           ( korean-ksc5601                        "K0-" 4 X)
300           ( chinese-gb2312                        "G0-" 4 X)
301           ( =iso-ir165                           "EGB-" 4 X)
302           ( latin-iso8859-1                   "LATIN1-" 2 X)
303           ( latin-iso8859-2                   "LATIN2-" 2 X)
304           ( latin-iso8859-3                   "LATIN3-" 2 X)
305           ( latin-iso8859-4                   "LATIN4-" 2 X)
306           ( cyrillic-iso8859-5              "CYRILLIC-" 2 X)
307           ( greek-iso8859-7                    "GREEK-" 2 X)
308           ( hebrew-iso8859-8                  "HEBREW-" 2 X)
309           ( latin-iso8859-9                   "LATIN5-" 2 X)
310           ( latin-jisx0201                    "LATINJ-" 2 X)
311           ( katakana-jisx0201               "KATAKANA-" 2 X)
312           ( latin-tcvn5712                    "VSCII2-" 2 X)
313           ( latin-viscii                      "VISCII-" 2 X)
314           ( latin-viscii-upper           "MULE-VIET-U-" 2 X)
315           ( latin-viscii-lower           "MULE-VIET-L-" 2 X)
316           ( thai-tis620                         "THAI-" 2 X)
317           ( lao                             "MULE-LAO-" 2 X)
318           ( ethiopic                      "MULE-ETHIO-" 4 X)
319           ( arabic-1-column                "MULE-ARB1-" 2 X)
320           ( arabic-2-column                "MULE-ARB2-" 2 X)
321           ( arabic-digit                   "MULE-ARBD-" 2 X)
322           ( ipa                             "MULE-IPA-" 2 X)
323           ( china3-jef                           "JC3-" 4 X)
324           ( chinese-big5                           "B-" 4 X)
325           ( chinese-big5                          "C0-" 4 X)
326           ( =ucs@iso                               "U-" 8 X)
327           ( =ucs@unicode                          "UU+" 4 X)
328           ( =ucs@unicode                          "UU-" 8 X)
329           ( =ucs@iso                               "U+" 4 X)
330           ( =ucs@gb                               "GU+" 4 X)
331           ( =ucs@gb                               "GU-" 8 X)
332           ( =ucs@jis                              "JU+" 4 X)
333           ( =ucs@jis                              "JU-" 8 X)
334           ( =ucs@cns                              "CU+" 4 X)
335           ( =ucs@cns                              "CU-" 8 X)
336           ( =ucs@ks                               "KU+" 4 X)
337           ( =ucs@ks                               "KU-" 8 X)
338           (=>>ucs@unicode                       "G-UU+" 4 X)
339           ( =jis-x0208@1978/i1                 "J78i1-" 4 X)
340           ( =shinjigen@rev                      "SJG2-" 4 d)
341           ( =shinjigen@1ed                      "SJG1-" 4 d)
342           ))
343
344   (make-coding-system
345    'utf-8-mcs-er 'utf-8
346    "Coding-system of UTF-8 with entity-reference."
347    '(mnemonic "MTF8r" use-entity-reference t))
348
349   (make-coding-system
350    'utf-8-gb 'utf-8
351    "Coding-system of UTF-8 using GB mapping."
352    '(mnemonic "UTF8G"
353               charset-g0 ucs-gb
354               charset-g1 =>ucs-gb
355               charset-g2 =>ucs))
356
357   (make-coding-system
358    'utf-8-gb-er 'utf-8
359    "Coding-system of UTF-8 using GB mapping with entity-reference."
360    '(mnemonic "UTF8Gr"
361               charset-g0 ucs-gb
362               charset-g1 =>ucs-gb
363               charset-g2 =>ucs
364               use-entity-reference t))
365
366   (make-coding-system
367    'utf-8-cns 'utf-8
368    "Coding-system of UTF-8 using CNS mapping."
369    '(mnemonic "UTF8C"
370               charset-g0 ucs-cns
371               charset-g1 =>ucs-cns
372               charset-g2 =>ucs))
373
374   (make-coding-system
375    'utf-8-cns-er 'utf-8
376    "Coding-system of UTF-8 using CNS mapping with entity-reference."
377    '(mnemonic "UTF8Cr"
378               charset-g0 ucs-cns
379               charset-g1 =>ucs-cns
380               charset-g2 =>ucs
381               use-entity-reference t))
382
383   (make-coding-system
384    'utf-8-big5 'utf-8
385    "Coding-system of UTF-8 using Big5 mapping."
386    '(mnemonic "UTF8B"
387               charset-g0 ucs-big5
388               charset-g1 =>ucs-big5
389               charset-g2 =>ucs))
390
391   (make-coding-system
392    'utf-8-big5-er 'utf-8
393    "Coding-system of UTF-8 using Big5 mapping with entity-reference."
394    '(mnemonic "UTF8Br"
395               charset-g0 ucs-big5
396               charset-g1 =>ucs-big5
397               charset-g2 =>ucs
398               use-entity-reference t))
399
400   (make-coding-system
401    'utf-8-jis 'utf-8
402    "Coding-system of UTF-8 using JIS mapping."
403    '(mnemonic "UTF8J"
404               charset-g0 =ucs@jis-2000
405               charset-g1 =>ucs-jis
406               charset-g2 =>ucs))
407
408   (make-coding-system
409    'utf-8-jis-er 'utf-8
410    "Coding-system of UTF-8 using JIS mapping with entity-reference."
411    '(mnemonic "UTF8Jr"
412               charset-g0 =ucs@jis-2000
413               charset-g1 =>ucs-jis
414               charset-g2 =>ucs
415               use-entity-reference t))
416
417   (make-coding-system
418    'utf-8-jp 'utf-8
419    "Coding-system of UTF-8 for common glyphs used in Japan."
420    '(mnemonic "UTF8J"
421               charset-g0 =ucs@jp
422               charset-g1 =>ucs-jis
423               charset-g2 =>ucs))
424
425   (make-coding-system
426    'utf-8-jp-er 'utf-8
427    "Coding-system of UTF-8 using =ucs@jp mapping with entity-reference."
428    '(mnemonic "UTF8Jr"
429               charset-g0 =ucs@jp
430               charset-g1 =>ucs-jis
431               charset-g2 =>ucs
432               use-entity-reference t))
433
434   (make-coding-system
435    'utf-8-ks 'utf-8
436    "Coding-system of UTF-8 using KS mapping."
437    '(mnemonic "UTF8K"
438               charset-g0 ucs-ks
439               charset-g1 =>ucs-ks
440               charset-g2 =>ucs))
441
442   (make-coding-system
443    'utf-8-ks-er 'utf-8
444    "Coding-system of UTF-8 using KS mapping with entity-reference."
445    '(mnemonic "UTF8Kr"
446               charset-g0 ucs-ks
447               charset-g1 =>ucs-ks
448               charset-g2 =>ucs
449               use-entity-reference t))
450
451   (define-coding-system-alias 'utf-8 'utf-8-mcs)
452   (define-coding-system-alias 'utf-8-er 'utf-8-mcs-er)
453   )
454
455 (make-coding-system
456  'euc-jisx0213 'iso2022
457  "Coding-system of Japanese EUC based on JIS X 0213."
458  '(charset-g0 ascii
459    charset-g1 japanese-jisx0213-1
460    charset-g2 katakana-jisx0201
461    charset-g3 japanese-jisx0213-2
462    short t
463    mnemonic "Ja/EUC0213"
464    ))
465
466 ;; initialize the coding categories to something semi-reasonable
467 ;; so that the remaining Lisp files can contain extended characters.
468 ;; (They will be in ISO-7 format)
469 ;; #### This list needs to be synched with the ones in mule-cmds.el.
470
471 (if (featurep 'utf-2000)
472     (set-coding-priority-list '(iso-7
473                                 no-conversion
474                                 utf-8
475                                 iso-8-1
476                                 iso-8-2
477                                 iso-8-designate
478                                 iso-lock-shift
479                                 shift-jis
480                                 big5
481                                 ucs-4))
482   (set-coding-priority-list '(iso-7
483                               no-conversion
484                               ;; utf-8
485                               iso-8-1
486                               iso-8-2
487                               iso-8-designate
488                               iso-lock-shift
489                               shift-jis
490                               big5
491                               ;; ucs-4
492                               )))
493
494 (set-coding-category-system 'iso-7 'iso-2022-7)
495 (set-coding-category-system 'iso-8-designate 'ctext)
496 (set-coding-category-system 'iso-8-1 'ctext)
497 (set-coding-category-system 'iso-lock-shift 'iso-2022-lock)
498 (set-coding-category-system 'no-conversion 'no-conversion)
499
500 (setq-default buffer-file-coding-system 'iso-2022-8)
501
502 ;;; mule-coding.el ends here