1 ;;; mule-coding.el --- Coding-system functions for Mule.
3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5 ;; Copyright (C) 1995 Amdahl Corporation.
6 ;; Copyright (C) 1995 Sun Microsystems.
7 ;; Copyright (C) 1997, 1999, 2002, 2003, 2004, 2005, 2006, 2008, 2009,
8 ;; 2010, 2011 MORIOKA Tomohiko
10 ;; This file is part of XEmacs.
12 ;; XEmacs is free software; you can redistribute it and/or modify it
13 ;; under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation; either version 2, or (at your option)
17 ;; XEmacs is distributed in the hope that it will be useful, but
18 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 ;; General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with XEmacs; see the file COPYING. If not, write to the
24 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
25 ;; Boston, MA 02111-1307, USA.
29 ;;; split off of mule.el and mostly moved to coding.el
33 (defun coding-system-force-on-output (coding-system register)
34 "Return the 'force-on-output property of CODING-SYSTEM for the specified REGISTER."
35 (check-type register integer)
36 (coding-system-property
39 (0 'force-g0-on-output)
40 (1 'force-g1-on-output)
41 (2 'force-g2-on-output)
42 (3 'force-g3-on-output)
43 (t (signal 'args-out-of-range (list register 0 3))))))
45 (defun coding-system-short (coding-system)
46 "Return the 'short property of CODING-SYSTEM."
47 (coding-system-property coding-system 'short))
49 (defun coding-system-no-ascii-eol (coding-system)
50 "Return the 'no-ascii-eol property of CODING-SYSTEM."
51 (coding-system-property coding-system 'no-ascii-eol))
53 (defun coding-system-no-ascii-cntl (coding-system)
54 "Return the 'no-ascii-cntl property of CODING-SYSTEM."
55 (coding-system-property coding-system 'no-ascii-cntl))
57 (defun coding-system-seven (coding-system)
58 "Return the 'seven property of CODING-SYSTEM."
59 (coding-system-property coding-system 'seven))
61 (defun coding-system-lock-shift (coding-system)
62 "Return the 'lock-shift property of CODING-SYSTEM."
63 (coding-system-property coding-system 'lock-shift))
65 ;;(defun coding-system-use-japanese-jisx0201-roman (coding-system)
66 ;; "Return the 'use-japanese-jisx0201-roman property of CODING-SYSTEM."
67 ;; (coding-system-property coding-system 'use-japanese-jisx0201-roman))
69 ;;(defun coding-system-use-japanese-jisx0208-1978 (coding-system)
70 ;; "Return the 'use-japanese-jisx0208-1978 property of CODING-SYSTEM."
71 ;; (coding-system-property coding-system 'use-japanese-jisx0208-2978))
73 (defun coding-system-no-iso6429 (coding-system)
74 "Return the 'no-iso6429 property of CODING-SYSTEM."
75 (coding-system-property coding-system 'no-iso6429))
77 (defun coding-system-ccl-encode (coding-system)
78 "Return the CCL 'encode property of CODING-SYSTEM."
79 (coding-system-property coding-system 'encode))
81 (defun coding-system-ccl-decode (coding-system)
82 "Return the CCL 'decode property of CODING-SYSTEM."
83 (coding-system-property coding-system 'decode))
86 ;;;; Definitions of predefined coding systems
90 "Coding-system used in X as Compound Text Encoding."
92 charset-g1 latin-iso8859-1
96 ;;; iso-8859-1 and ctext are aliases.
98 ;; (copy-coding-system 'ctext 'iso-8859-1)
100 'iso-8859-1 'no-conversion
101 "Coding-system used in X as Compound Text Encoding."
102 '(eol-type nil mnemonic "Noconv"))
105 'iso-2022-8bit-ss2 'iso2022
106 "ISO-2022 coding system using SS2 for 96-charset in 8-bit code."
108 charset-g1 latin-iso8859-1
109 charset-g2 t ;; unspecified but can be used later.
115 'iso-2022-7bit-ss2 'iso2022
116 "ISO-2022 coding system using SS2 for 96-charset in 7-bit code."
118 charset-g2 t ;; unspecified but can be used later.
124 ;; (copy-coding-system 'iso-2022-7bit-ss2 'iso-2022-jp-2)
126 'iso-2022-jp-2 'iso2022
127 "ISO-2022 coding system using SS2 for 96-charset in 7-bit code."
130 charset-g2 t ;; unspecified but can be used later.
135 (if (featurep 'utf-2000)
136 (list* 'ccs-priority-list
138 =jis-x0208@1983 =jis-x0208@1978
144 'iso-2022-7bit 'iso2022
145 "ISO 2022 based 7-bit encoding using only G0"
151 ;; compatibility for old XEmacsen
152 (copy-coding-system 'iso-2022-7bit 'iso-2022-7)
156 "ISO-2022 eight-bit coding system. No single-shift or locking-shift."
158 charset-g1 latin-iso8859-1
164 'escape-quoted 'iso2022
165 "ISO-2022 eight-bit coding system with escape quoting; used for .ELC files."
167 charset-g1 latin-iso8859-1
174 'iso-2022-lock 'iso2022
175 "ISO-2022 coding system using Locking-Shift for 96-charset."
177 charset-g1 t ;; unspecified but can be used later.
183 (when (featurep 'utf-2000)
184 (setq decomposition-feature-list
187 =decomposition@hanyo-denshi))
189 (setq coded-charset-entity-reference-alist
190 '(((=adobe-japan1-6 . isolated) "I-AJ1-" 5 d)
191 ( =adobe-japan1-6 "AJ1-" 5 d)
192 ((=adobe-japan1-base . isolated) "I-AJ1-" 5 d)
193 ( =adobe-japan1-base "I-AJ1-" 5 d)
194 ((=gt . isolated) "I-GT-" 5 d)
196 ( =zinbun-oracle "ZOB-" 4 d)
197 ((=ruimoku-v6 . isolated) "I-RUI6-" 4 X)
198 ( =ruimoku-v6 "RUI6-" 4 X)
199 ((=big5-cdp . isolated) "I-CDP-" 4 X)
200 ( =big5-cdp "CDP-" 4 X)
201 ((=daikanwa@rev2 . isolated) "I-M-" 5 d)
202 ( =daikanwa "M-" 5 d)
203 ( =daikanwa@rev2 "r2M-" 5 d)
204 ( =daikanwa@rev1 "r1M-" 5 d)
205 ((=gt-k . isolated) "I-GT-K" 5 d)
207 ((=cbeta . isolated) "I-CB" 5 d)
209 ((=hanziku-1 . isolated) "I-HZK01-" 4 X)
210 ( =hanziku-1 "HZK01-" 4 X)
211 ((=hanziku-2 . isolated) "I-HZK02-" 4 X)
212 ( =hanziku-2 "HZK02-" 4 X)
213 ((=hanziku-3 . isolated) "I-HZK03-" 4 X)
214 ( =hanziku-3 "HZK03-" 4 X)
215 ((=hanziku-4 . isolated) "I-HZK04-" 4 X)
216 ( =hanziku-4 "HZK04-" 4 X)
217 ((=hanziku-5 . isolated) "I-HZK05-" 4 X)
218 ( =hanziku-5 "HZK05-" 4 X)
219 ((=hanziku-6 . isolated) "I-HZK06-" 4 X)
220 ( =hanziku-6 "HZK06-" 4 X)
221 ((=hanziku-7 . isolated) "I-HZK07-" 4 X)
222 ( =hanziku-7 "HZK07-" 4 X)
223 ((=hanziku-8 . isolated) "I-HZK08-" 4 X)
224 ( =hanziku-8 "HZK08-" 4 X)
225 ((=hanziku-9 . isolated) "I-HZK09-" 4 X)
226 ( =hanziku-9 "HZK09-" 4 X)
227 ((=hanziku-10 . isolated) "I-HZK10-" 4 X)
228 ( =hanziku-10 "HZK10-" 4 X)
229 ((=hanziku-11 . isolated) "I-HZK11-" 4 X)
230 ( =hanziku-11 "HZK11-" 4 X)
231 ((=hanziku-12 . isolated) "I-HZK12-" 4 X)
232 ( =hanziku-12 "HZK12-" 4 X)
233 ((=jis-x0208@1990 . isolated) "I-J90-" 4 X)
234 ((=jis-x0208@1983 . isolated) "I-J83-" 4 X)
235 ((=jis-x0213-1@2000 . isolated) "I-JX1-" 4 X)
236 ((=jis-x0213-2 . isolated) "I-JX2-" 4 X)
237 ((=jis-x0213-1@2004 . isolated) "I-JX3-" 4 X)
238 ((=jis-x0212 . isolated) "I-JSP-" 4 X)
239 ((=jis-x0208@1978/1pr . isolated) "I-J78-" 4 X)
240 ( =jis-x0208@1990 "J90-" 4 X)
241 ((=jis-x0208 . isolated) "I-J90-" 4 X)
242 ( =jis-x0213-1@2000 "JX1-" 4 X)
243 ( =jis-x0213-2 "JX2-" 4 X)
244 ( =jis-x0213-1@2004 "JX3-" 4 X)
245 ( =jis-x0212 "JSP-" 4 X)
246 ( =jis-x0208@1983 "J83-" 4 X)
247 ((chinese-gb2312 . isolated) "I-G0-" 4 X)
248 ((=iso-ir165 . isolated) "I-EGB-" 4 X)
249 ((chinese-cns11643-1 . isolated) "I-C1-" 4 X)
250 ((chinese-cns11643-2 . isolated) "I-C2-" 4 X)
251 ((chinese-cns11643-3 . isolated) "I-C3-" 4 X)
252 ((chinese-cns11643-4 . isolated) "I-C4-" 4 X)
253 ((chinese-cns11643-5 . isolated) "I-C5-" 4 X)
254 ((chinese-cns11643-6 . isolated) "I-C6-" 4 X)
255 ((chinese-cns11643-7 . isolated) "I-C7-" 4 X)
256 ((korean-ksc5601 . isolated) "I-K0-" 4 X)
257 ((=big5-eten . isolated) "I-B-" 4 X)
258 ((latin-iso8859-1 . isolated) "I-LATIN1-" 2 X)
259 ((latin-iso8859-2 . isolated) "I-LATIN2-" 2 X)
260 ((latin-iso8859-3 . isolated) "I-LATIN3-" 2 X)
261 ((latin-iso8859-4 . isolated) "I-LATIN4-" 2 X)
262 ((cyrillic-iso8859-5 . isolated) "I-CYRILLIC-" 2 X)
263 ((greek-iso8859-7 . isolated) "I-GREEK-" 2 X)
264 ((hebrew-iso8859-8 . isolated) "I-HEBREW-" 2 X)
265 ((latin-iso8859-9 . isolated) "I-LATIN5-" 2 X)
266 ((latin-jisx0201 . isolated) "I-LATINJ-" 2 X)
267 ((katakana-jisx0201 . isolated) "I-KATAKANA-" 2 X)
268 ((latin-tcvn5712 . isolated) "I-VSCII2-" 2 X)
269 ;; ((latin-viscii . isolated) "I-VISCII-" 2 X)
270 ((latin-viscii-upper . isolated) "I-MULE-VIET-U-" 2 X)
271 ((latin-viscii-lower . isolated) "I-MULE-VIET-L-" 2 X)
272 ((thai-tis620 . isolated) "I-THAI-" 2 X)
273 ((lao . isolated) "I-MULE-LAO-" 2 X)
274 ((arabic-1-column . isolated) "I-MULE-ARB1-" 2 X)
275 ((arabic-2-column . isolated) "I-MULE-ARB2-" 2 X)
276 ((arabic-digit . isolated) "I-MULE-ARBD-" 2 X)
277 ((ipa . isolated) "I-MULE-IPA-" 2 X)
278 ((china3-jef . isolated) "I-JC3-" 4 X)
279 ((=hanyo-denshi/ja . isolated) "I-HD-JA-" 4 X)
280 ((=hanyo-denshi/jb . isolated) "I-HD-JB-" 4 X)
281 ((=hanyo-denshi/jc . isolated) "I-HD-JC-" 4 X)
282 ((=hanyo-denshi/jd . isolated) "I-HD-JD-" 4 X)
283 ((=hanyo-denshi/ft . isolated) "I-HD-FT-" 4 X)
284 ((=hanyo-denshi/ia . isolated) "I-HD-IA-" 4 X)
285 ((=hanyo-denshi/ib . isolated) "I-HD-IB-" 4 X)
286 ((=hanyo-denshi/hg . isolated) "I-HD-HG-" 4 X)
287 ((=hanyo-denshi/ip . isolated) "I-HD-IP-" 4 X)
288 ((=hanyo-denshi/jt . isolated) "I-HD-JT-" 4 X)
289 ((=hanyo-denshi/ks . isolated) "I-HD-KS-" 6 d)
290 ((=hanyo-denshi/ks/mf . isolated) "I-KSMF-" 5 d)
291 ((=hanziku-1 . isolated) "I-HZK1-" 4 X)
292 ( =hanziku-1 "HZK1-" 4 X)
293 ((=hanziku-2 . isolated) "I-HZK2-" 4 X)
294 ( =hanziku-2 "HZK2-" 4 X)
295 ( =hanyo-denshi/ja "HD-JA-" 4 X)
296 ( =hanyo-denshi/jb "HD-JB-" 4 X)
297 ( =hanyo-denshi/jc "HD-JC-" 4 X)
298 ( =hanyo-denshi/jd "HD-JD-" 4 X)
299 ( =hanyo-denshi/ft "HD-FT-" 4 X)
300 ( =hanyo-denshi/ia "HD-IA-" 4 X)
301 ( =hanyo-denshi/ib "HD-IB-" 4 X)
302 ( =hanyo-denshi/hg "HD-HG-" 4 X)
303 ( =hanyo-denshi/ip "HD-IP-" 4 X)
304 ( =hanyo-denshi/jt "HD-JT-" 4 X)
305 ( =hanyo-denshi/ks "HD-KS-" 6 d)
306 ( =hanyo-denshi/ks/mf "KSMF-" 5 d)
307 ( =jis-x0208@1978 "J78-" 4 X)
308 ( chinese-cns11643-1 "C1-" 4 X)
309 ( chinese-cns11643-2 "C2-" 4 X)
310 ( chinese-cns11643-3 "C3-" 4 X)
311 ( chinese-cns11643-4 "C4-" 4 X)
312 ( chinese-cns11643-5 "C5-" 4 X)
313 ( chinese-cns11643-6 "C6-" 4 X)
314 ( chinese-cns11643-7 "C7-" 4 X)
315 ( korean-ksc5601 "K0-" 4 X)
316 ( chinese-gb2312 "G0-" 4 X)
317 ( =iso-ir165 "EGB-" 4 X)
318 ( latin-iso8859-1 "LATIN1-" 2 X)
319 ( latin-iso8859-2 "LATIN2-" 2 X)
320 ( latin-iso8859-3 "LATIN3-" 2 X)
321 ( latin-iso8859-4 "LATIN4-" 2 X)
322 ( cyrillic-iso8859-5 "CYRILLIC-" 2 X)
323 ( greek-iso8859-7 "GREEK-" 2 X)
324 ( hebrew-iso8859-8 "HEBREW-" 2 X)
325 ( latin-iso8859-9 "LATIN5-" 2 X)
326 ( latin-jisx0201 "LATINJ-" 2 X)
327 ( katakana-jisx0201 "KATAKANA-" 2 X)
328 ( latin-tcvn5712 "VSCII2-" 2 X)
329 ( latin-viscii "VISCII-" 2 X)
330 ( latin-viscii-upper "MULE-VIET-U-" 2 X)
331 ( latin-viscii-lower "MULE-VIET-L-" 2 X)
332 ( thai-tis620 "THAI-" 2 X)
333 ( lao "MULE-LAO-" 2 X)
334 ( ethiopic "MULE-ETHIO-" 4 X)
335 ( arabic-1-column "MULE-ARB1-" 2 X)
336 ( arabic-2-column "MULE-ARB2-" 2 X)
337 ( arabic-digit "MULE-ARBD-" 2 X)
338 ( ipa "MULE-IPA-" 2 X)
339 (=>>>jis-x0208 "g2-J0-" 4 X)
340 (=>>>jis-x0208 "GI-J0-" 4 X)
341 (=>>>jis-x0213-1 "g2-JX1-" 4 X)
342 (=>>>jis-x0213-1 "GI-JX1-" 4 X)
343 (=>>>jis-x0213-2 "g2-JX2-" 4 X)
344 (=>>>jis-x0213-1@2004 "g2-JX3-" 4 X)
345 (=>>>jis-x0213-1@2004 "GI-JX3-" 4 X)
346 (=>>>hanyo-denshi/jt "g2-HD-JT-" 4 X)
347 (=>>>gt "g2-GT-" 5 d)
349 (=>>jis-x0208 "G-J0-" 4 X)
350 (=>>jis-x0213-1@2000 "G-JX1-" 4 X)
351 (=>>jis-x0213-2 "G-JX2-" 4 X)
352 (=>>jis-x0213-1@2004 "G-JX3-" 4 X)
353 (=>>adobe-japan1 "G-AJ1-" 5 d)
354 (=>>jis-x0208@1978 "G-J78-" 4 X)
355 (=>>big5-cdp "G-CDP-" 4 X)
356 (=>>gt-k "G-GT-K" 5 d)
357 (=>>ruimoku-v6 "G-RUI6-" 4 X)
358 (=>>hanyo-denshi/ft "G-HD-FT-" 4 X)
359 (=>>hanyo-denshi/ia "G-HD-IA-" 4 X)
360 (=>>hanyo-denshi/ib "G-HD-IB-" 4 X)
361 (=>>hanyo-denshi/jt "G-HD-JT-" 4 X)
362 (=>>hanyo-denshi/ks "G-HD-KS-" 4 X)
363 (=>>cbeta "G-CB" 5 d)
364 (=+>jis-x0208 "o-J0-" 4 X)
365 (=+>jis-x0213-1 "o-JX1-" 4 X)
366 (=+>jis-x0213-2 "o-JX2-" 4 X)
367 (=+>jis-x0208@1978 "o-J78-" 4 X)
368 ( =>jis-x0208@1997 "J97-" 4 X)
369 ( =>jis-x0208@1997 "A-J0-" 4 X)
370 ( =>jis-x0213-1@2000 "A-JX1-" 4 X)
371 ( =>jis-x0213-2 "A-JX2-" 4 X)
372 ( =>jis-x0213-1@2004 "A-JX3-" 4 X)
374 ( =>gt-k "A-GT-K" 5 d)
375 ( =>zinbun-oracle "A-ZOB-" 4 d)
376 (==>ucs@bucs "BUCS+" 4 X)
377 ( =>ucs@iso "A-IU+" 4 X)
378 ( =>ucs@unicode "A-UU+" 4 X)
379 ( =>ucs@jis "A-JU+" 4 X)
380 ( =>ucs@cns "A-CU+" 4 X)
381 ( =>ucs@ks "A-KU+" 4 X)
382 (=+>ucs@iso "o-IU+" 4 X)
383 (=+>ucs@jis "o-JU+" 4 X)
384 (=+>ucs@jis/1990 "o-J90U+" 4 X)
385 (=+>ucs@cns "o-CU+" 4 X)
386 (=>>ucs@iso "G-IU+" 4 X)
387 (=>>ucs@unicode "G-UU+" 4 X)
388 (=>>ucs@jis "G-JU+" 4 X)
389 (=>>ucs@ks "G-KU+" 4 X)
390 (=>>ucs@cns "G-CU+" 4 X)
391 (=>>ucs@jis/2004 "G-J04U+" 4 X)
392 (=>>ucs@jis/2000 "G-J00U+" 4 X)
393 (=>>ucs@jis/1990 "G-J90U+" 4 X)
394 (=>>ucs@JP "G-dJU+" 4 X)
395 (=>>>ucs@iso "g2-IU+" 4 X)
396 (=>>>ucs@iso "GI-IU+" 4 X)
397 (=>>>ucs@unicode "g2-UU+" 4 X)
398 (=>>>ucs@unicode "GI-UU+" 4 X)
399 (=>>>ucs@jis "g2-JU+" 4 X)
400 (=>>>ucs@jis "GI-JU+" 4 X)
401 (=>>>ucs@ks "g2-KU+" 4 X)
402 (=>>>ucs@ks "GI-KU+" 4 X)
403 (=>>>ucs@cns "g2-CU+" 4 X)
404 (=>>>ucs@cns "GI-CU+" 4 X)
405 (=>>>ucs@jis/2004 "g2-J04U+" 4 X)
406 (=>>>ucs@jis/2004 "GI-J04U+" 4 X)
407 (=>>>ucs@jis/1990 "g2-J90U+" 4 X)
409 ( =ucs@unicode "UU+" 4 X)
410 ( =ucs@unicode "UU-" 8 X)
414 ( =ucs@jis "JU+" 4 X)
415 ( =ucs@jis "JU-" 8 X)
416 ( =ucs@cns "CU+" 4 X)
417 ( =ucs@cns "CU-" 8 X)
420 ( =ucs@JP "dJU+" 4 X)
421 ( =ucs@JP/hanazono "hanaJU+" 4 X)
422 ( china3-jef "JC3-" 4 X)
425 ( =big5-eten "BE-" 4 X)
426 ( =>big5-cdp "A-CDP-" 4 X)
427 ( =>daikanwa "A-M-" 5 d)
428 ( =jis-x0208@1978/i1 "J78i1-" 4 X)
429 ( =shinjigen@rev "SJG2-" 4 d)
430 ( =shinjigen@1ed "SJG1-" 4 d)
435 "Coding-system of UTF-8 with entity-reference."
436 '(mnemonic "MTF8r" use-entity-reference t))
439 'utf-8-mcs-no-composition 'utf-8
440 "Coding-system of UTF-8 without composition."
441 '(mnemonic "MTF8-nc" disable-composition t))
445 "Coding-system of UTF-8 using GB mapping."
453 "Coding-system of UTF-8 using GB mapping with entity-reference."
458 use-entity-reference t))
462 "Coding-system of UTF-8 using CNS mapping."
470 "Coding-system of UTF-8 using CNS mapping with entity-reference."
475 use-entity-reference t))
479 "Coding-system of UTF-8 using Big5 mapping."
482 charset-g1 =>ucs-big5
486 'utf-8-big5-er 'utf-8
487 "Coding-system of UTF-8 using Big5 mapping with entity-reference."
490 charset-g1 =>ucs-big5
492 use-entity-reference t))
496 "Coding-system of UTF-8 using JIS mapping."
498 charset-g0 =ucs@jis-2000
504 "Coding-system of UTF-8 using JIS mapping with entity-reference."
506 charset-g0 =ucs@jis-2000
509 use-entity-reference t))
513 "Coding-system of UTF-8 for common glyphs used in Japan."
521 "Coding-system of UTF-8 using =ucs@jp mapping with entity-reference."
526 use-entity-reference t))
530 "Coding-system of UTF-8 using KS mapping."
538 "Coding-system of UTF-8 using KS mapping with entity-reference."
543 use-entity-reference t))
545 (define-coding-system-alias 'utf-8 'utf-8-mcs)
546 (define-coding-system-alias 'utf-8-er 'utf-8-mcs-er)
550 'euc-jisx0213 'iso2022
551 "Coding-system of Japanese EUC based on JIS X 0213."
553 charset-g1 japanese-jisx0213-1
554 charset-g2 katakana-jisx0201
555 charset-g3 japanese-jisx0213-2
557 mnemonic "Ja/EUC0213"
560 ;; initialize the coding categories to something semi-reasonable
561 ;; so that the remaining Lisp files can contain extended characters.
562 ;; (They will be in ISO-7 format)
563 ;; #### This list needs to be synched with the ones in mule-cmds.el.
565 (if (featurep 'utf-2000)
566 (set-coding-priority-list '(iso-7
576 (set-coding-priority-list '(iso-7
588 (set-coding-category-system 'iso-7 'iso-2022-7)
589 (set-coding-category-system 'iso-8-designate 'ctext)
590 (set-coding-category-system 'iso-8-1 'ctext)
591 (set-coding-category-system 'iso-lock-shift 'iso-2022-lock)
592 (set-coding-category-system 'no-conversion 'no-conversion)
594 (setq-default buffer-file-coding-system 'iso-2022-8)
596 ;;; mule-coding.el ends here