aa6c91faaf2895826b67008bfd608a34b1c58827
[chise/xemacs-chise.git.1] / lisp / mule / mule-coding.el
1 ;;; mule-coding.el --- Coding-system functions for Mule.
2
3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5 ;; Copyright (C) 1995 Amdahl Corporation.
6 ;; Copyright (C) 1995 Sun Microsystems.
7 ;; Copyright (C) 1997,1999,2002,2003,2004 MORIOKA Tomohiko
8
9 ;; This file is part of XEmacs.
10
11 ;; XEmacs is free software; you can redistribute it and/or modify it
12 ;; under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
15
16 ;; XEmacs is distributed in the hope that it will be useful, but
17 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs; see the file COPYING.  If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Commentary:
27
28 ;;; split off of mule.el and mostly moved to coding.el
29
30 ;;; Code:
31
32 (defun coding-system-force-on-output (coding-system register)
33   "Return the 'force-on-output property of CODING-SYSTEM for the specified REGISTER."
34   (check-type register integer)
35   (coding-system-property
36    coding-system
37    (case register
38      (0 'force-g0-on-output)
39      (1 'force-g1-on-output)
40      (2 'force-g2-on-output)
41      (3 'force-g3-on-output)
42      (t (signal 'args-out-of-range (list register 0 3))))))
43
44 (defun coding-system-short (coding-system)
45   "Return the 'short property of CODING-SYSTEM."
46   (coding-system-property coding-system 'short))
47
48 (defun coding-system-no-ascii-eol (coding-system)
49   "Return the 'no-ascii-eol property of CODING-SYSTEM."
50   (coding-system-property coding-system 'no-ascii-eol))
51
52 (defun coding-system-no-ascii-cntl (coding-system)
53   "Return the 'no-ascii-cntl property of CODING-SYSTEM."
54   (coding-system-property coding-system 'no-ascii-cntl))
55
56 (defun coding-system-seven (coding-system)
57   "Return the 'seven property of CODING-SYSTEM."
58   (coding-system-property coding-system 'seven))
59
60 (defun coding-system-lock-shift (coding-system)
61   "Return the 'lock-shift property of CODING-SYSTEM."
62   (coding-system-property coding-system 'lock-shift))
63
64 ;;(defun coding-system-use-japanese-jisx0201-roman (coding-system)
65 ;;  "Return the 'use-japanese-jisx0201-roman property of CODING-SYSTEM."
66 ;;  (coding-system-property coding-system 'use-japanese-jisx0201-roman))
67
68 ;;(defun coding-system-use-japanese-jisx0208-1978 (coding-system)
69 ;;  "Return the 'use-japanese-jisx0208-1978 property of CODING-SYSTEM."
70 ;;  (coding-system-property coding-system 'use-japanese-jisx0208-2978))
71
72 (defun coding-system-no-iso6429 (coding-system)
73   "Return the 'no-iso6429 property of CODING-SYSTEM."
74   (coding-system-property coding-system 'no-iso6429))
75
76 (defun coding-system-ccl-encode (coding-system)
77   "Return the CCL 'encode property of CODING-SYSTEM."
78   (coding-system-property coding-system 'encode))
79
80 (defun coding-system-ccl-decode (coding-system)
81   "Return the CCL 'decode property of CODING-SYSTEM."
82   (coding-system-property coding-system 'decode))
83
84 \f
85 ;;;; Definitions of predefined coding systems
86
87 (make-coding-system
88  'ctext 'iso2022
89  "Coding-system used in X as Compound Text Encoding."
90  '(charset-g0 ascii
91    charset-g1 latin-iso8859-1
92    eol-type nil
93    mnemonic "CText"))
94
95 ;;; iso-8859-1 and ctext are aliases.
96
97 ;; (copy-coding-system 'ctext 'iso-8859-1)
98 (make-coding-system
99  'iso-8859-1 'no-conversion
100  "Coding-system used in X as Compound Text Encoding."
101  '(eol-type nil mnemonic "Noconv"))
102
103 (make-coding-system
104  'iso-2022-8bit-ss2 'iso2022
105  "ISO-2022 coding system using SS2 for 96-charset in 8-bit code."
106  '(charset-g0 ascii
107    charset-g1 latin-iso8859-1
108    charset-g2 t ;; unspecified but can be used later.
109    short t
110    mnemonic "ISO8/SS"
111    ))
112
113 (make-coding-system
114  'iso-2022-7bit-ss2 'iso2022
115  "ISO-2022 coding system using SS2 for 96-charset in 7-bit code."
116  '(charset-g0 ascii
117    charset-g2 t ;; unspecified but can be used later.
118    seven t
119    short t
120    mnemonic "ISO7/SS"
121    eol-type nil))
122
123 ;; (copy-coding-system 'iso-2022-7bit-ss2 'iso-2022-jp-2)
124 (make-coding-system
125  'iso-2022-jp-2 'iso2022
126  "ISO-2022 coding system using SS2 for 96-charset in 7-bit code."
127  '(charset-g0 ascii
128    charset-g2 t ;; unspecified but can be used later.
129    seven t
130    short t
131    mnemonic "ISO7/SS"
132    eol-type nil))
133
134 (make-coding-system
135  'iso-2022-7bit 'iso2022
136  "ISO 2022 based 7-bit encoding using only G0"
137  '(charset-g0 ascii
138    seven t
139    short t
140    mnemonic "ISO7"))
141
142 ;; compatibility for old XEmacsen
143 (copy-coding-system 'iso-2022-7bit 'iso-2022-7)
144
145 (make-coding-system
146  'iso-2022-8 'iso2022
147  "ISO-2022 eight-bit coding system.  No single-shift or locking-shift."
148  '(charset-g0 ascii
149    charset-g1 latin-iso8859-1
150    short t
151    mnemonic "ISO8"
152    ))
153
154 (make-coding-system
155  'escape-quoted 'iso2022
156  "ISO-2022 eight-bit coding system with escape quoting; used for .ELC files."
157  '(charset-g0 ascii
158    charset-g1 latin-iso8859-1
159    eol-type lf
160    escape-quoted t
161    mnemonic "ESC/Quot"
162    ))
163
164 (make-coding-system
165  'iso-2022-lock 'iso2022
166  "ISO-2022 coding system using Locking-Shift for 96-charset."
167  '(charset-g0 ascii
168    charset-g1 t ;; unspecified but can be used later.
169    seven t
170    lock-shift t
171    mnemonic "ISO7/Lock"
172    ))
173
174 (when (featurep 'utf-2000)
175   (setq coded-charset-entity-reference-alist
176         '(((ideograph-gt . isolated)            "I-GT-" 5 d)
177           ( ideograph-gt                          "GT-" 5 d)
178           ((=ruimoku-v6 . isolated)           "I-RUI6-" 4 X)
179           ( =ruimoku-v6                         "RUI6-" 4 X)
180           ((chinese-big5-cdp . isolated)       "I-CDP-" 4 X)
181           ( chinese-big5-cdp                     "CDP-" 4 X)
182           ((ideograph-daikanwa . isolated)       "I-M-" 5 d)
183           ( ideograph-daikanwa                     "M-" 5 d)
184           ((ideograph-gt-k . isolated)         "I-GT-K" 5 d)
185           ( ideograph-gt-k                       "GT-K" 5 d)
186           ((ideograph-cbeta . isolated)          "I-CB" 5 d)
187           ( ideograph-cbeta                        "CB" 5 d)
188           ((ideograph-hanziku-1 . isolated)  "I-HZK01-" 4 X)
189           ( ideograph-hanziku-1                "HZK01-" 4 X)
190           ((ideograph-hanziku-2 . isolated)  "I-HZK02-" 4 X)
191           ( ideograph-hanziku-2                "HZK02-" 4 X)
192           ((ideograph-hanziku-3 . isolated)  "I-HZK03-" 4 X)
193           ( ideograph-hanziku-3                "HZK03-" 4 X)
194           ((ideograph-hanziku-4 . isolated)  "I-HZK04-" 4 X)
195           ( ideograph-hanziku-4                "HZK04-" 4 X)
196           ((ideograph-hanziku-5 . isolated)  "I-HZK05-" 4 X)
197           ( ideograph-hanziku-5                "HZK05-" 4 X)
198           ((ideograph-hanziku-6 . isolated)  "I-HZK06-" 4 X)
199           ( ideograph-hanziku-6                "HZK06-" 4 X)
200           ((ideograph-hanziku-7 . isolated)  "I-HZK07-" 4 X)
201           ( ideograph-hanziku-7                "HZK07-" 4 X)
202           ((ideograph-hanziku-8 . isolated)  "I-HZK08-" 4 X)
203           ( ideograph-hanziku-8                "HZK08-" 4 X)
204           ((ideograph-hanziku-9 . isolated)  "I-HZK09-" 4 X)
205           ( ideograph-hanziku-9                "HZK09-" 4 X)
206           ((ideograph-hanziku-10 . isolated) "I-HZK10-" 4 X)
207           ( ideograph-hanziku-10               "HZK10-" 4 X)
208           ((ideograph-hanziku-11 . isolated) "I-HZK11-" 4 X)
209           ( ideograph-hanziku-11               "HZK11-" 4 X)
210           ((ideograph-hanziku-12 . isolated) "I-HZK12-" 4 X)
211           ( ideograph-hanziku-12               "HZK12-" 4 X)
212           ((ideograph-hanziku-1 . isolated)   "I-HZK1-" 4 X)
213           ( ideograph-hanziku-1                 "HZK1-" 4 X)
214           ((ideograph-hanziku-2 . isolated)   "I-HZK2-" 4 X)
215           ( ideograph-hanziku-2                 "HZK2-" 4 X)
216           ((japanese-jisx0208-1990 . isolated) "I-J90-" 4 X)
217           ( japanese-jisx0208-1990               "J90-" 4 X)
218           ((japanese-jisx0208 . isolated)      "I-J83-" 4 X)
219           ( japanese-jisx0208                    "J83-" 4 X)
220           ((japanese-jisx0213-1 . isolated)    "I-JX1-" 4 X)
221           ( japanese-jisx0213-1                  "JX1-" 4 X)
222           ((japanese-jisx0213-2 . isolated)    "I-JX2-" 4 X)
223           ( japanese-jisx0213-2                  "JX2-" 4 X)
224           ((japanese-jisx0212 . isolated)      "I-JSP-" 4 X)
225           ( japanese-jisx0212                    "JSP-" 4 X)
226           ((japanese-jisx0208-1978 . isolated) "I-J78-" 4 X)
227           ( japanese-jisx0208-1978               "J78-" 4 X)
228           ((chinese-cns11643-1 . isolated)      "I-C1-" 4 X)
229           ( chinese-cns11643-1                    "C1-" 4 X)
230           ((chinese-cns11643-2 . isolated)      "I-C2-" 4 X)
231           ( chinese-cns11643-2                    "C2-" 4 X)
232           ((chinese-cns11643-3 . isolated)      "I-C3-" 4 X)
233           ( chinese-cns11643-3                    "C3-" 4 X)
234           ((chinese-cns11643-4 . isolated)      "I-C4-" 4 X)
235           ( chinese-cns11643-4                    "C4-" 4 X)
236           ((chinese-cns11643-5 . isolated)      "I-C5-" 4 X)
237           ( chinese-cns11643-5                    "C5-" 4 X)
238           ((chinese-cns11643-6 . isolated)      "I-C6-" 4 X)
239           ( chinese-cns11643-6                    "C6-" 4 X)
240           ((chinese-cns11643-7 . isolated)      "I-C7-" 4 X)
241           ( chinese-cns11643-7                    "C7-" 4 X)
242           ((korean-ksc5601 . isolated)          "I-K0-" 4 X)
243           ( korean-ksc5601                        "K0-" 4 X)
244           ((chinese-gb2312 . isolated)          "I-G0-" 4 X)
245           ( chinese-gb2312                        "G0-" 4 X)
246           ((=iso-ir165 . isolated)             "I-EGB-" 4 X)
247           ( =iso-ir165                           "EGB-" 4 X)
248           ((latin-iso8859-1 . isolated)     "I-LATIN1-" 2 X)
249           ( latin-iso8859-1                   "LATIN1-" 2 X)
250           ((latin-iso8859-2 . isolated)     "I-LATIN2-" 2 X)
251           ( latin-iso8859-2                   "LATIN2-" 2 X)
252           ((latin-iso8859-3 . isolated)     "I-LATIN3-" 2 X)
253           ( latin-iso8859-3                   "LATIN3-" 2 X)
254           ((latin-iso8859-4 . isolated)     "I-LATIN4-" 2 X)
255           ( latin-iso8859-4                   "LATIN4-" 2 X)
256           ((cyrillic-iso8859-5 . isolated) "I-CYRILLIC-" 2 X)
257           ( cyrillic-iso8859-5              "CYRILLIC-" 2 X)
258           ((greek-iso8859-7 . isolated)      "I-GREEK-" 2 X)
259           ( greek-iso8859-7                    "GREEK-" 2 X)
260           ((hebrew-iso8859-8 . isolated)    "I-HEBREW-" 2 X)
261           ( hebrew-iso8859-8                  "HEBREW-" 2 X)
262           ((latin-iso8859-9 . isolated)     "I-LATIN5-" 2 X)
263           ( latin-iso8859-9                   "LATIN5-" 2 X)
264           ((latin-jisx0201 . isolated)      "I-LATINJ-" 2 X)
265           ( latin-jisx0201                    "LATINJ-" 2 X)
266           ((katakana-jisx0201 . isolated) "I-KATAKANA-" 2 X)
267           ( katakana-jisx0201               "KATAKANA-" 2 X)
268           ;; ((latin-viscii . isolated)        "I-VISCII-" 2 X)
269           ;; ( latin-viscii                      "VISCII-" 2 X)
270           ((latin-tcvn5712 . isolated)      "I-VSCII2-" 2 X)
271           ( latin-tcvn5712                    "VSCII2-" 2 X)
272           ((thai-tis620 . isolated)           "I-THAI-" 2 X)
273           ( thai-tis620                         "THAI-" 2 X)
274           ((lao . isolated)               "I-MULE-LAO-" 2 X)
275           ( lao                             "MULE-LAO-" 2 X)
276           ( ethiopic                      "MULE-ETHIO-" 4 X)
277           ((arabic-1-column . isolated)  "I-MULE-ARB1-" 2 X)
278           ( arabic-1-column                "MULE-ARB1-" 2 X)
279           ((arabic-2-column . isolated)  "I-MULE-ARB2-" 2 X)
280           ( arabic-2-column                "MULE-ARB2-" 2 X)
281           ((arabic-digit . isolated)     "I-MULE-ARBD-" 2 X)
282           ( arabic-digit                   "MULE-ARBD-" 2 X)
283           ((ipa . isolated)               "I-MULE-IPA-" 2 X)
284           ( ipa                             "MULE-IPA-" 2 X)
285           ((china3-jef . isolated)             "I-JC3-" 4 X)
286           ( china3-jef                           "JC3-" 4 X)
287           ((chinese-big5 . isolated)             "I-B-" 4 X)
288           ( chinese-big5                           "B-" 4 X)
289           ( chinese-big5                          "C0-" 4 X)
290           ( =ucs@unicode                          "UU+" 4 X)
291           ( =ucs@unicode                          "UU-" 8 X)
292           ( =ucs@iso                               "U+" 4 X)
293           ( =ucs@iso                               "U-" 8 X)
294           ( =ucs@gb                               "GU+" 4 X)
295           ( =ucs@gb                               "GU-" 8 X)
296           ( =ucs@jis                              "JU+" 4 X)
297           ( =ucs@jis                              "JU-" 8 X)
298           ( =ucs@cns                              "CU+" 4 X)
299           ( =ucs@cns                              "CU-" 8 X)
300           ( =ucs@ks                               "KU+" 4 X)
301           ( =ucs@ks                               "KU-" 8 X)
302           ( =jis-x0208@1997                      "J97-" 4 X)
303           ))
304
305   (make-coding-system
306    'utf-8-mcs-er 'utf-8
307    "Coding-system of UTF-8 with entity-reference."
308    '(mnemonic "MTF8r" use-entity-reference t))
309
310   (make-coding-system
311    'utf-8-gb 'utf-8
312    "Coding-system of UTF-8 using GB mapping."
313    '(mnemonic "UTF8G"
314               charset-g0 ucs-gb
315               charset-g1 =>ucs-gb
316               charset-g2 =>ucs))
317
318   (make-coding-system
319    'utf-8-gb-er 'utf-8
320    "Coding-system of UTF-8 using GB mapping with entity-reference."
321    '(mnemonic "UTF8Gr"
322               charset-g0 ucs-gb
323               charset-g1 =>ucs-gb
324               charset-g2 =>ucs
325               use-entity-reference t))
326
327   (make-coding-system
328    'utf-8-cns 'utf-8
329    "Coding-system of UTF-8 using CNS mapping."
330    '(mnemonic "UTF8C"
331               charset-g0 ucs-cns
332               charset-g1 =>ucs-cns
333               charset-g2 =>ucs))
334
335   (make-coding-system
336    'utf-8-cns-er 'utf-8
337    "Coding-system of UTF-8 using CNS mapping with entity-reference."
338    '(mnemonic "UTF8Cr"
339               charset-g0 ucs-cns
340               charset-g1 =>ucs-cns
341               charset-g2 =>ucs
342               use-entity-reference t))
343
344   (make-coding-system
345    'utf-8-big5 'utf-8
346    "Coding-system of UTF-8 using Big5 mapping."
347    '(mnemonic "UTF8B"
348               charset-g0 ucs-big5
349               charset-g1 =>ucs-big5
350               charset-g2 =>ucs))
351
352   (make-coding-system
353    'utf-8-big5-er 'utf-8
354    "Coding-system of UTF-8 using Big5 mapping with entity-reference."
355    '(mnemonic "UTF8Br"
356               charset-g0 ucs-big5
357               charset-g1 =>ucs-big5
358               charset-g2 =>ucs
359               use-entity-reference t))
360
361   (make-coding-system
362    'utf-8-jis 'utf-8
363    "Coding-system of UTF-8 using JIS mapping."
364    '(mnemonic "UTF8J"
365               charset-g0 =ucs@jis-2000
366               charset-g1 =>ucs-jis
367               charset-g2 =>ucs))
368
369   (make-coding-system
370    'utf-8-jis-er 'utf-8
371    "Coding-system of UTF-8 using JIS mapping with entity-reference."
372    '(mnemonic "UTF8Jr"
373               charset-g0 =ucs@jis-2000
374               charset-g1 =>ucs-jis
375               charset-g2 =>ucs
376               use-entity-reference t))
377
378   (make-coding-system
379    'utf-8-jp 'utf-8
380    "Coding-system of UTF-8 for common glyphs used in Japan."
381    '(mnemonic "UTF8J"
382               charset-g0 =ucs@jp
383               charset-g1 =>ucs-jis
384               charset-g2 =>ucs))
385
386   (make-coding-system
387    'utf-8-jp-er 'utf-8
388    "Coding-system of UTF-8 using =ucs@jp mapping with entity-reference."
389    '(mnemonic "UTF8Jr"
390               charset-g0 =ucs@jp
391               charset-g1 =>ucs-jis
392               charset-g2 =>ucs
393               use-entity-reference t))
394
395   (make-coding-system
396    'utf-8-ks 'utf-8
397    "Coding-system of UTF-8 using KS mapping."
398    '(mnemonic "UTF8K"
399               charset-g0 ucs-ks
400               charset-g1 =>ucs-ks
401               charset-g2 =>ucs))
402
403   (make-coding-system
404    'utf-8-ks-er 'utf-8
405    "Coding-system of UTF-8 using KS mapping with entity-reference."
406    '(mnemonic "UTF8Kr"
407               charset-g0 ucs-ks
408               charset-g1 =>ucs-ks
409               charset-g2 =>ucs
410               use-entity-reference t))
411
412   (define-coding-system-alias 'utf-8 'utf-8-mcs)
413   (define-coding-system-alias 'utf-8-er 'utf-8-mcs-er)
414   )
415
416 (make-coding-system
417  'euc-jisx0213 'iso2022
418  "Coding-system of Japanese EUC based on JIS X 0213."
419  '(charset-g0 ascii
420    charset-g1 japanese-jisx0213-1
421    charset-g2 katakana-jisx0201
422    charset-g3 japanese-jisx0213-2
423    short t
424    mnemonic "Ja/EUC0213"
425    ))
426
427 ;; initialize the coding categories to something semi-reasonable
428 ;; so that the remaining Lisp files can contain extended characters.
429 ;; (They will be in ISO-7 format)
430 ;; #### This list needs to be synched with the ones in mule-cmds.el.
431
432 (if (featurep 'utf-2000)
433     (set-coding-priority-list '(iso-7
434                                 no-conversion
435                                 utf-8
436                                 iso-8-1
437                                 iso-8-2
438                                 iso-8-designate
439                                 iso-lock-shift
440                                 shift-jis
441                                 big5
442                                 ucs-4))
443   (set-coding-priority-list '(iso-7
444                               no-conversion
445                               ;; utf-8
446                               iso-8-1
447                               iso-8-2
448                               iso-8-designate
449                               iso-lock-shift
450                               shift-jis
451                               big5
452                               ;; ucs-4
453                               )))
454
455 (set-coding-category-system 'iso-7 'iso-2022-7)
456 (set-coding-category-system 'iso-8-designate 'ctext)
457 (set-coding-category-system 'iso-8-1 'ctext)
458 (set-coding-category-system 'iso-lock-shift 'iso-2022-lock)
459 (set-coding-category-system 'no-conversion 'no-conversion)
460
461 (setq-default buffer-file-coding-system 'iso-2022-8)
462
463 ;;; mule-coding.el ends here