1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_cns;
72 Lisp_Object Vcharset_ucs_jis;
73 Lisp_Object Vcharset_ucs_ks;
74 Lisp_Object Vcharset_ucs_big5;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_chinese_big5;
80 Lisp_Object Vcharset_chinese_big5_cdp;
81 Lisp_Object Vcharset_ideograph_hanziku_1;
82 Lisp_Object Vcharset_ideograph_hanziku_2;
83 Lisp_Object Vcharset_ideograph_hanziku_3;
84 Lisp_Object Vcharset_ideograph_hanziku_4;
85 Lisp_Object Vcharset_ideograph_hanziku_5;
86 Lisp_Object Vcharset_ideograph_hanziku_6;
87 Lisp_Object Vcharset_ideograph_hanziku_7;
88 Lisp_Object Vcharset_ideograph_hanziku_8;
89 Lisp_Object Vcharset_ideograph_hanziku_9;
90 Lisp_Object Vcharset_ideograph_hanziku_10;
91 Lisp_Object Vcharset_ideograph_hanziku_11;
92 Lisp_Object Vcharset_ideograph_hanziku_12;
93 Lisp_Object Vcharset_china3_jef;
94 Lisp_Object Vcharset_ideograph_cbeta;
95 Lisp_Object Vcharset_ideograph_gt;
96 Lisp_Object Vcharset_ideograph_gt_pj_1;
97 Lisp_Object Vcharset_ideograph_gt_pj_2;
98 Lisp_Object Vcharset_ideograph_gt_pj_3;
99 Lisp_Object Vcharset_ideograph_gt_pj_4;
100 Lisp_Object Vcharset_ideograph_gt_pj_5;
101 Lisp_Object Vcharset_ideograph_gt_pj_6;
102 Lisp_Object Vcharset_ideograph_gt_pj_7;
103 Lisp_Object Vcharset_ideograph_gt_pj_8;
104 Lisp_Object Vcharset_ideograph_gt_pj_9;
105 Lisp_Object Vcharset_ideograph_gt_pj_10;
106 Lisp_Object Vcharset_ideograph_gt_pj_11;
107 Lisp_Object Vcharset_ideograph_daikanwa_2;
108 Lisp_Object Vcharset_ideograph_daikanwa;
109 Lisp_Object Vcharset_ethiopic_ucs;
111 Lisp_Object Vcharset_chinese_big5_1;
112 Lisp_Object Vcharset_chinese_big5_2;
114 #ifdef ENABLE_COMPOSITE_CHARS
115 Lisp_Object Vcharset_composite;
117 /* Hash tables for composite chars. One maps string representing
118 composed chars to their equivalent chars; one goes the
120 Lisp_Object Vcomposite_char_char2string_hash_table;
121 Lisp_Object Vcomposite_char_string2char_hash_table;
123 static int composite_char_row_next;
124 static int composite_char_col_next;
126 #endif /* ENABLE_COMPOSITE_CHARS */
128 struct charset_lookup *chlook;
130 static const struct lrecord_description charset_lookup_description_1[] = {
131 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
140 static const struct struct_description charset_lookup_description = {
141 sizeof (struct charset_lookup),
142 charset_lookup_description_1
146 /* Table of number of bytes in the string representation of a character
147 indexed by the first byte of that representation.
149 rep_bytes_by_first_byte(c) is more efficient than the equivalent
150 canonical computation:
152 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
154 const Bytecount rep_bytes_by_first_byte[0xA0] =
155 { /* 0x00 - 0x7f are for straight ASCII */
156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 /* 0x80 - 0x8f are for Dimension-1 official charsets */
166 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
168 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170 /* 0x90 - 0x9d are for Dimension-2 official charsets */
171 /* 0x9e is for Dimension-1 private charsets */
172 /* 0x9f is for Dimension-2 private charsets */
173 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
179 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
181 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
183 /* ad-hoc method for `ascii' */
184 if ((CHARSET_CHARS (cs) == 94) &&
185 (CHARSET_BYTE_OFFSET (cs) != 33))
186 return 128 - CHARSET_BYTE_OFFSET (cs);
188 return CHARSET_CHARS (cs);
191 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
193 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
195 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
199 if (XVECTOR_LENGTH (v) > ccs_len)
202 for (i = 0; i < XVECTOR_LENGTH (v); i++)
204 Lisp_Object c = XVECTOR_DATA(v)[i];
206 if (!NILP (c) && !CHARP (c))
210 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
222 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
225 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
235 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
236 nv = XVECTOR_DATA(v)[i];
242 XVECTOR_DATA(v)[i] = Qnil;
246 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
247 int code_point, Lisp_Object character);
249 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
250 int code_point, Lisp_Object character)
254 int ccs_len = XVECTOR_LENGTH (v);
259 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
260 nv = XVECTOR_DATA(v)[i];
264 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
270 XVECTOR_DATA(v)[i] = character;
274 put_char_ccs_code_point (Lisp_Object character,
275 Lisp_Object ccs, Lisp_Object value)
277 if (!EQ (XCHARSET_NAME (ccs), Qucs)
279 || (XCHAR (character) != XINT (value)))
281 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
282 int dim = XCHARSET_DIMENSION (ccs);
283 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
284 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
288 { /* obsolete representation: value must be a list of bytes */
289 Lisp_Object ret = Fcar (value);
293 signal_simple_error ("Invalid value for coded-charset", value);
294 code_point = XINT (ret);
295 if (XCHARSET_GRAPHIC (ccs) == 1)
303 signal_simple_error ("Invalid value for coded-charset",
307 signal_simple_error ("Invalid value for coded-charset",
310 if (XCHARSET_GRAPHIC (ccs) == 1)
312 code_point = (code_point << 8) | j;
315 value = make_int (code_point);
317 else if (INTP (value))
319 code_point = XINT (value);
320 if (XCHARSET_GRAPHIC (ccs) == 1)
322 code_point &= 0x7F7F7F7F;
323 value = make_int (code_point);
327 signal_simple_error ("Invalid value for coded-charset", value);
331 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
334 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
339 XCHARSET_DECODING_TABLE (ccs)
340 = v = make_vector (ccs_len, Qnil);
343 decoding_table_put_char (v, dim, byte_offset, code_point, character);
349 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
351 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
352 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
354 if (VECTORP (decoding_table))
356 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
360 decoding_table_remove_char (decoding_table,
361 XCHARSET_DIMENSION (ccs),
362 XCHARSET_BYTE_OFFSET (ccs),
366 if (CHAR_TABLEP (encoding_table))
368 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qnil);
376 int leading_code_private_11;
379 Lisp_Object Qcharsetp;
381 /* Qdoc_string, Qdimension, Qchars defined in general.c */
382 Lisp_Object Qregistry, Qfinal, Qgraphic;
383 Lisp_Object Qdirection;
384 Lisp_Object Qreverse_direction_charset;
385 Lisp_Object Qleading_byte;
386 Lisp_Object Qshort_name, Qlong_name;
388 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
389 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
406 Qjapanese_jisx0208_1978,
410 Qjapanese_jisx0208_1990,
428 Qvietnamese_viscii_lower,
429 Qvietnamese_viscii_upper,
432 Qideograph_hanziku_1,
433 Qideograph_hanziku_2,
434 Qideograph_hanziku_3,
435 Qideograph_hanziku_4,
436 Qideograph_hanziku_5,
437 Qideograph_hanziku_6,
438 Qideograph_hanziku_7,
439 Qideograph_hanziku_8,
440 Qideograph_hanziku_9,
441 Qideograph_hanziku_10,
442 Qideograph_hanziku_11,
443 Qideograph_hanziku_12,
446 Qideograph_daikanwa_2,
466 Lisp_Object Ql2r, Qr2l;
468 Lisp_Object Vcharset_hash_table;
470 /* Composite characters are characters constructed by overstriking two
471 or more regular characters.
473 1) The old Mule implementation involves storing composite characters
474 in a buffer as a tag followed by all of the actual characters
475 used to make up the composite character. I think this is a bad
476 idea; it greatly complicates code that wants to handle strings
477 one character at a time because it has to deal with the possibility
478 of great big ungainly characters. It's much more reasonable to
479 simply store an index into a table of composite characters.
481 2) The current implementation only allows for 16,384 separate
482 composite characters over the lifetime of the XEmacs process.
483 This could become a potential problem if the user
484 edited lots of different files that use composite characters.
485 Due to FSF bogosity, increasing the number of allowable
486 composite characters under Mule would decrease the number
487 of possible faces that can exist. Mule already has shrunk
488 this to 2048, and further shrinkage would become uncomfortable.
489 No such problems exist in XEmacs.
491 Composite characters could be represented as 0x80 C1 C2 C3,
492 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
493 for slightly under 2^20 (one million) composite characters
494 over the XEmacs process lifetime, and you only need to
495 increase the size of a Mule character from 19 to 21 bits.
496 Or you could use 0x80 C1 C2 C3 C4, allowing for about
497 85 million (slightly over 2^26) composite characters. */
500 /************************************************************************/
501 /* Basic Emchar functions */
502 /************************************************************************/
504 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
505 string in STR. Returns the number of bytes stored.
506 Do not call this directly. Use the macro set_charptr_emchar() instead.
510 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
525 else if ( c <= 0x7ff )
527 *p++ = (c >> 6) | 0xc0;
528 *p++ = (c & 0x3f) | 0x80;
530 else if ( c <= 0xffff )
532 *p++ = (c >> 12) | 0xe0;
533 *p++ = ((c >> 6) & 0x3f) | 0x80;
534 *p++ = (c & 0x3f) | 0x80;
536 else if ( c <= 0x1fffff )
538 *p++ = (c >> 18) | 0xf0;
539 *p++ = ((c >> 12) & 0x3f) | 0x80;
540 *p++ = ((c >> 6) & 0x3f) | 0x80;
541 *p++ = (c & 0x3f) | 0x80;
543 else if ( c <= 0x3ffffff )
545 *p++ = (c >> 24) | 0xf8;
546 *p++ = ((c >> 18) & 0x3f) | 0x80;
547 *p++ = ((c >> 12) & 0x3f) | 0x80;
548 *p++ = ((c >> 6) & 0x3f) | 0x80;
549 *p++ = (c & 0x3f) | 0x80;
553 *p++ = (c >> 30) | 0xfc;
554 *p++ = ((c >> 24) & 0x3f) | 0x80;
555 *p++ = ((c >> 18) & 0x3f) | 0x80;
556 *p++ = ((c >> 12) & 0x3f) | 0x80;
557 *p++ = ((c >> 6) & 0x3f) | 0x80;
558 *p++ = (c & 0x3f) | 0x80;
561 BREAKUP_CHAR (c, charset, c1, c2);
562 lb = CHAR_LEADING_BYTE (c);
563 if (LEADING_BYTE_PRIVATE_P (lb))
564 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
566 if (EQ (charset, Vcharset_control_1))
575 /* Return the first character from a Mule-encoded string in STR,
576 assuming it's non-ASCII. Do not call this directly.
577 Use the macro charptr_emchar() instead. */
580 non_ascii_charptr_emchar (const Bufbyte *str)
593 else if ( b >= 0xf8 )
598 else if ( b >= 0xf0 )
603 else if ( b >= 0xe0 )
608 else if ( b >= 0xc0 )
618 for( ; len > 0; len-- )
621 ch = ( ch << 6 ) | ( b & 0x3f );
625 Bufbyte i0 = *str, i1, i2 = 0;
628 if (i0 == LEADING_BYTE_CONTROL_1)
629 return (Emchar) (*++str - 0x20);
631 if (LEADING_BYTE_PREFIX_P (i0))
636 charset = CHARSET_BY_LEADING_BYTE (i0);
637 if (XCHARSET_DIMENSION (charset) == 2)
640 return MAKE_CHAR (charset, i1, i2);
644 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
645 Do not call this directly. Use the macro valid_char_p() instead. */
649 non_ascii_valid_char_p (Emchar ch)
653 /* Must have only lowest 19 bits set */
657 f1 = CHAR_FIELD1 (ch);
658 f2 = CHAR_FIELD2 (ch);
659 f3 = CHAR_FIELD3 (ch);
665 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
666 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
667 f2 > MAX_CHAR_FIELD2_PRIVATE)
672 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
673 f2 <= MAX_CHAR_FIELD2_PRIVATE))
677 NOTE: This takes advantage of the fact that
678 FIELD2_TO_OFFICIAL_LEADING_BYTE and
679 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
681 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
682 if (EQ (charset, Qnil))
684 return (XCHARSET_CHARS (charset) == 96);
690 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
691 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
692 f1 > MAX_CHAR_FIELD1_PRIVATE)
694 if (f2 < 0x20 || f3 < 0x20)
697 #ifdef ENABLE_COMPOSITE_CHARS
698 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
700 if (UNBOUNDP (Fgethash (make_int (ch),
701 Vcomposite_char_char2string_hash_table,
706 #endif /* ENABLE_COMPOSITE_CHARS */
708 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
709 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
712 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
714 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
717 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
719 if (EQ (charset, Qnil))
721 return (XCHARSET_CHARS (charset) == 96);
727 /************************************************************************/
728 /* Basic string functions */
729 /************************************************************************/
731 /* Copy the character pointed to by SRC into DST. Do not call this
732 directly. Use the macro charptr_copy_char() instead.
733 Return the number of bytes copied. */
736 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
738 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
740 for (i = bytes; i; i--, dst++, src++)
746 /************************************************************************/
747 /* streams of Emchars */
748 /************************************************************************/
750 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
751 The functions below are not meant to be called directly; use
752 the macros in insdel.h. */
755 Lstream_get_emchar_1 (Lstream *stream, int ch)
757 Bufbyte str[MAX_EMCHAR_LEN];
758 Bufbyte *strptr = str;
761 str[0] = (Bufbyte) ch;
763 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
765 int c = Lstream_getc (stream);
766 bufpos_checking_assert (c >= 0);
767 *++strptr = (Bufbyte) c;
769 return charptr_emchar (str);
773 Lstream_fput_emchar (Lstream *stream, Emchar ch)
775 Bufbyte str[MAX_EMCHAR_LEN];
776 Bytecount len = set_charptr_emchar (str, ch);
777 return Lstream_write (stream, str, len);
781 Lstream_funget_emchar (Lstream *stream, Emchar ch)
783 Bufbyte str[MAX_EMCHAR_LEN];
784 Bytecount len = set_charptr_emchar (str, ch);
785 Lstream_unread (stream, str, len);
789 /************************************************************************/
791 /************************************************************************/
794 mark_charset (Lisp_Object obj)
796 Lisp_Charset *cs = XCHARSET (obj);
798 mark_object (cs->short_name);
799 mark_object (cs->long_name);
800 mark_object (cs->doc_string);
801 mark_object (cs->registry);
802 mark_object (cs->ccl_program);
804 mark_object (cs->decoding_table);
805 mark_object (cs->mother);
811 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
813 Lisp_Charset *cs = XCHARSET (obj);
817 error ("printing unreadable object #<charset %s 0x%x>",
818 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
821 write_c_string ("#<charset ", printcharfun);
822 print_internal (CHARSET_NAME (cs), printcharfun, 0);
823 write_c_string (" ", printcharfun);
824 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
825 write_c_string (" ", printcharfun);
826 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
827 write_c_string (" ", printcharfun);
828 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
829 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
831 CHARSET_DIMENSION (cs),
832 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
833 CHARSET_COLUMNS (cs),
834 CHARSET_GRAPHIC (cs),
836 write_c_string (buf, printcharfun);
837 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
838 sprintf (buf, " 0x%x>", cs->header.uid);
839 write_c_string (buf, printcharfun);
842 static const struct lrecord_description charset_description[] = {
843 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
844 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
845 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
846 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
847 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
848 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
849 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
851 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
852 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
857 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
858 mark_charset, print_charset, 0, 0, 0,
862 /* Make a new charset. */
863 /* #### SJT Should generic properties be allowed? */
865 make_charset (Charset_ID id, Lisp_Object name,
866 unsigned short chars, unsigned char dimension,
867 unsigned char columns, unsigned char graphic,
868 Bufbyte final, unsigned char direction, Lisp_Object short_name,
869 Lisp_Object long_name, Lisp_Object doc,
871 Lisp_Object decoding_table,
872 Emchar min_code, Emchar max_code,
873 Emchar code_offset, unsigned char byte_offset,
874 Lisp_Object mother, unsigned char conversion)
877 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
881 XSETCHARSET (obj, cs);
883 CHARSET_ID (cs) = id;
884 CHARSET_NAME (cs) = name;
885 CHARSET_SHORT_NAME (cs) = short_name;
886 CHARSET_LONG_NAME (cs) = long_name;
887 CHARSET_CHARS (cs) = chars;
888 CHARSET_DIMENSION (cs) = dimension;
889 CHARSET_DIRECTION (cs) = direction;
890 CHARSET_COLUMNS (cs) = columns;
891 CHARSET_GRAPHIC (cs) = graphic;
892 CHARSET_FINAL (cs) = final;
893 CHARSET_DOC_STRING (cs) = doc;
894 CHARSET_REGISTRY (cs) = reg;
895 CHARSET_CCL_PROGRAM (cs) = Qnil;
896 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
898 CHARSET_DECODING_TABLE(cs) = Qnil;
899 CHARSET_MIN_CODE (cs) = min_code;
900 CHARSET_MAX_CODE (cs) = max_code;
901 CHARSET_CODE_OFFSET (cs) = code_offset;
902 CHARSET_BYTE_OFFSET (cs) = byte_offset;
903 CHARSET_MOTHER (cs) = mother;
904 CHARSET_CONVERSION (cs) = conversion;
908 if (id == LEADING_BYTE_ASCII)
909 CHARSET_REP_BYTES (cs) = 1;
911 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
913 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
918 /* some charsets do not have final characters. This includes
919 ASCII, Control-1, Composite, and the two faux private
921 unsigned char iso2022_type
922 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
924 if (code_offset == 0)
926 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
927 chlook->charset_by_attributes[iso2022_type][final] = obj;
931 (chlook->charset_by_attributes[iso2022_type][final][direction]));
932 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
936 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
937 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
939 /* Some charsets are "faux" and don't have names or really exist at
940 all except in the leading-byte table. */
942 Fputhash (name, obj, Vcharset_hash_table);
947 get_unallocated_leading_byte (int dimension)
952 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
955 lb = chlook->next_allocated_leading_byte++;
959 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
962 lb = chlook->next_allocated_1_byte_leading_byte++;
966 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
969 lb = chlook->next_allocated_2_byte_leading_byte++;
975 ("No more character sets free for this dimension",
976 make_int (dimension));
982 /* Number of Big5 characters which have the same code in 1st byte. */
984 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
987 decode_builtin_char (Lisp_Object charset, int code_point)
991 if (EQ (charset, Vcharset_chinese_big5))
993 int c1 = code_point >> 8;
994 int c2 = code_point & 0xFF;
997 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
999 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
1000 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
1002 I = (c1 - 0xA1) * BIG5_SAME_ROW
1003 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
1007 charset = Vcharset_chinese_big5_1;
1011 charset = Vcharset_chinese_big5_2;
1012 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1014 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
1017 if ((final = XCHARSET_FINAL (charset)) >= '0')
1019 if (XCHARSET_DIMENSION (charset) == 1)
1021 switch (XCHARSET_CHARS (charset))
1025 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1028 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1036 switch (XCHARSET_CHARS (charset))
1039 return MIN_CHAR_94x94
1040 + (final - '0') * 94 * 94
1041 + (((code_point >> 8) & 0x7F) - 33) * 94
1042 + ((code_point & 0x7F) - 33);
1044 return MIN_CHAR_96x96
1045 + (final - '0') * 96 * 96
1046 + (((code_point >> 8) & 0x7F) - 32) * 96
1047 + ((code_point & 0x7F) - 32);
1054 else if (XCHARSET_MAX_CODE (charset))
1057 = (XCHARSET_DIMENSION (charset) == 1
1059 code_point - XCHARSET_BYTE_OFFSET (charset)
1061 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1062 * XCHARSET_CHARS (charset)
1063 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1064 + XCHARSET_CODE_OFFSET (charset);
1065 if ((cid < XCHARSET_MIN_CODE (charset))
1066 || (XCHARSET_MAX_CODE (charset) < cid))
1075 charset_code_point (Lisp_Object charset, Emchar ch)
1077 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
1080 if ( CHAR_TABLEP (encoding_table)
1081 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1086 Lisp_Object mother = XCHARSET_MOTHER (charset);
1087 int min = XCHARSET_MIN_CODE (charset);
1088 int max = XCHARSET_MAX_CODE (charset);
1091 if ( CHARSETP (mother) )
1092 code = charset_code_point (mother, ch);
1095 if ( (min <= code) && (code <= max) )
1097 int d = code - XCHARSET_CODE_OFFSET (charset);
1099 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1102 int cell = d % 94 + 33;
1108 return (row << 8) | cell;
1110 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1112 int plane = d / (94 * 60) + 33;
1113 int row = (d % (94 * 60)) / 94;
1114 int cell = d % 94 + 33;
1120 return (plane << 16) | (row << 8) | cell;
1122 else if (XCHARSET_CHARS (charset) == 94)
1124 if (XCHARSET_DIMENSION (charset) == 1)
1126 else if (XCHARSET_DIMENSION (charset) == 2)
1127 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1128 else if (XCHARSET_DIMENSION (charset) == 3)
1130 ( (d / (94 * 94) + 33) << 16)
1131 | ((d / 94 % 94 + 33) << 8)
1133 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1135 ( (d / (94 * 94 * 94) + 33) << 24)
1136 | ((d / (94 * 94) % 94 + 33) << 16)
1137 | ((d / 94 % 94 + 33) << 8)
1140 else if (XCHARSET_CHARS (charset) == 96)
1142 if (XCHARSET_DIMENSION (charset) == 1)
1144 else if (XCHARSET_DIMENSION (charset) == 2)
1145 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1146 else if (XCHARSET_DIMENSION (charset) == 3)
1148 ( (d / (96 * 96) + 32) << 16)
1149 | ((d / 96 % 96 + 32) << 8)
1151 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1153 ( (d / (96 * 96 * 96) + 32) << 24)
1154 | ((d / (96 * 96) % 96 + 32) << 16)
1155 | ((d / 96 % 96 + 32) << 8)
1159 return code - XCHARSET_CODE_OFFSET (charset);
1161 else if ( (XCHARSET_CODE_OFFSET (charset) == 0) ||
1162 (XCHARSET_CODE_OFFSET (charset)
1163 == XCHARSET_MIN_CODE (charset)) )
1167 if (XCHARSET_DIMENSION (charset) == 1)
1169 if (XCHARSET_CHARS (charset) == 94)
1171 if (((d = ch - (MIN_CHAR_94
1172 + (XCHARSET_FINAL (charset) - '0') * 94))
1177 else if (XCHARSET_CHARS (charset) == 96)
1179 if (((d = ch - (MIN_CHAR_96
1180 + (XCHARSET_FINAL (charset) - '0') * 96))
1188 else if (XCHARSET_DIMENSION (charset) == 2)
1190 if (XCHARSET_CHARS (charset) == 94)
1192 if (((d = ch - (MIN_CHAR_94x94
1194 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1197 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1199 else if (XCHARSET_CHARS (charset) == 96)
1201 if (((d = ch - (MIN_CHAR_96x96
1203 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1206 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1217 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1219 if (c <= MAX_CHAR_BASIC_LATIN)
1221 *charset = Vcharset_ascii;
1226 *charset = Vcharset_control_1;
1231 *charset = Vcharset_latin_iso8859_1;
1235 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1237 *charset = Vcharset_hebrew_iso8859_8;
1238 return c - MIN_CHAR_HEBREW + 0x20;
1241 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1243 *charset = Vcharset_thai_tis620;
1244 return c - MIN_CHAR_THAI + 0x20;
1247 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1248 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1250 return list2 (Vcharset_katakana_jisx0201,
1251 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1254 else if (c <= MAX_CHAR_BMP)
1256 *charset = Vcharset_ucs_bmp;
1259 else if (c <= MAX_CHAR_SMP)
1261 *charset = Vcharset_ucs_smp;
1262 return c - MIN_CHAR_SMP;
1264 else if (c <= MAX_CHAR_SIP)
1266 *charset = Vcharset_ucs_sip;
1267 return c - MIN_CHAR_SIP;
1269 else if (c < MIN_CHAR_DAIKANWA)
1271 *charset = Vcharset_ucs;
1274 else if (c <= MAX_CHAR_DAIKANWA)
1276 *charset = Vcharset_ideograph_daikanwa;
1277 return c - MIN_CHAR_DAIKANWA;
1279 else if (c < MIN_CHAR_94)
1281 *charset = Vcharset_ucs;
1284 else if (c <= MAX_CHAR_94)
1286 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1287 ((c - MIN_CHAR_94) / 94) + '0',
1288 CHARSET_LEFT_TO_RIGHT);
1289 if (!NILP (*charset))
1290 return ((c - MIN_CHAR_94) % 94) + 33;
1293 *charset = Vcharset_ucs;
1297 else if (c <= MAX_CHAR_96)
1299 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1300 ((c - MIN_CHAR_96) / 96) + '0',
1301 CHARSET_LEFT_TO_RIGHT);
1302 if (!NILP (*charset))
1303 return ((c - MIN_CHAR_96) % 96) + 32;
1306 *charset = Vcharset_ucs;
1310 else if (c <= MAX_CHAR_94x94)
1313 = CHARSET_BY_ATTRIBUTES (94, 2,
1314 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1315 CHARSET_LEFT_TO_RIGHT);
1316 if (!NILP (*charset))
1317 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1318 | (((c - MIN_CHAR_94x94) % 94) + 33);
1321 *charset = Vcharset_ucs;
1325 else if (c <= MAX_CHAR_96x96)
1328 = CHARSET_BY_ATTRIBUTES (96, 2,
1329 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1330 CHARSET_LEFT_TO_RIGHT);
1331 if (!NILP (*charset))
1332 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1333 | (((c - MIN_CHAR_96x96) % 96) + 32);
1336 *charset = Vcharset_ucs;
1342 *charset = Vcharset_ucs;
1347 Lisp_Object Vdefault_coded_charset_priority_list;
1351 /************************************************************************/
1352 /* Basic charset Lisp functions */
1353 /************************************************************************/
1355 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1356 Return non-nil if OBJECT is a charset.
1360 return CHARSETP (object) ? Qt : Qnil;
1363 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1364 Retrieve the charset of the given name.
1365 If CHARSET-OR-NAME is a charset object, it is simply returned.
1366 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1367 nil is returned. Otherwise the associated charset object is returned.
1371 if (CHARSETP (charset_or_name))
1372 return charset_or_name;
1374 CHECK_SYMBOL (charset_or_name);
1375 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1378 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1379 Retrieve the charset of the given name.
1380 Same as `find-charset' except an error is signalled if there is no such
1381 charset instead of returning nil.
1385 Lisp_Object charset = Ffind_charset (name);
1388 signal_simple_error ("No such charset", name);
1392 /* We store the charsets in hash tables with the names as the key and the
1393 actual charset object as the value. Occasionally we need to use them
1394 in a list format. These routines provide us with that. */
1395 struct charset_list_closure
1397 Lisp_Object *charset_list;
1401 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1402 void *charset_list_closure)
1404 /* This function can GC */
1405 struct charset_list_closure *chcl =
1406 (struct charset_list_closure*) charset_list_closure;
1407 Lisp_Object *charset_list = chcl->charset_list;
1409 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1413 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1414 Return a list of the names of all defined charsets.
1418 Lisp_Object charset_list = Qnil;
1419 struct gcpro gcpro1;
1420 struct charset_list_closure charset_list_closure;
1422 GCPRO1 (charset_list);
1423 charset_list_closure.charset_list = &charset_list;
1424 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1425 &charset_list_closure);
1428 return charset_list;
1431 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1432 Return the name of charset CHARSET.
1436 return XCHARSET_NAME (Fget_charset (charset));
1439 /* #### SJT Should generic properties be allowed? */
1440 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1441 Define a new character set.
1442 This function is for use with Mule support.
1443 NAME is a symbol, the name by which the character set is normally referred.
1444 DOC-STRING is a string describing the character set.
1445 PROPS is a property list, describing the specific nature of the
1446 character set. Recognized properties are:
1448 'short-name Short version of the charset name (ex: Latin-1)
1449 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1450 'registry A regular expression matching the font registry field for
1452 'dimension Number of octets used to index a character in this charset.
1453 Either 1 or 2. Defaults to 1.
1454 'columns Number of columns used to display a character in this charset.
1455 Only used in TTY mode. (Under X, the actual width of a
1456 character can be derived from the font used to display the
1457 characters.) If unspecified, defaults to the dimension
1458 (this is almost always the correct value).
1459 'chars Number of characters in each dimension (94 or 96).
1460 Defaults to 94. Note that if the dimension is 2, the
1461 character set thus described is 94x94 or 96x96.
1462 'final Final byte of ISO 2022 escape sequence. Must be
1463 supplied. Each combination of (DIMENSION, CHARS) defines a
1464 separate namespace for final bytes. Note that ISO
1465 2022 restricts the final byte to the range
1466 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1467 dimension == 2. Note also that final bytes in the range
1468 0x30 - 0x3F are reserved for user-defined (not official)
1470 'graphic 0 (use left half of font on output) or 1 (use right half
1471 of font on output). Defaults to 0. For example, for
1472 a font whose registry is ISO8859-1, the left half
1473 (octets 0x20 - 0x7F) is the `ascii' character set, while
1474 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1475 character set. With 'graphic set to 0, the octets
1476 will have their high bit cleared; with it set to 1,
1477 the octets will have their high bit set.
1478 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1480 'ccl-program A compiled CCL program used to convert a character in
1481 this charset into an index into the font. This is in
1482 addition to the 'graphic property. The CCL program
1483 is passed the octets of the character, with the high
1484 bit cleared and set depending upon whether the value
1485 of the 'graphic property is 0 or 1.
1487 (name, doc_string, props))
1489 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1490 int direction = CHARSET_LEFT_TO_RIGHT;
1491 Lisp_Object registry = Qnil;
1492 Lisp_Object charset;
1493 Lisp_Object ccl_program = Qnil;
1494 Lisp_Object short_name = Qnil, long_name = Qnil;
1495 Lisp_Object mother = Qnil;
1496 int min_code = 0, max_code = 0, code_offset = 0;
1497 int byte_offset = -1;
1500 CHECK_SYMBOL (name);
1501 if (!NILP (doc_string))
1502 CHECK_STRING (doc_string);
1504 charset = Ffind_charset (name);
1505 if (!NILP (charset))
1506 signal_simple_error ("Cannot redefine existing charset", name);
1509 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1511 if (EQ (keyword, Qshort_name))
1513 CHECK_STRING (value);
1517 if (EQ (keyword, Qlong_name))
1519 CHECK_STRING (value);
1523 else if (EQ (keyword, Qdimension))
1526 dimension = XINT (value);
1527 if (dimension < 1 ||
1534 signal_simple_error ("Invalid value for 'dimension", value);
1537 else if (EQ (keyword, Qchars))
1540 chars = XINT (value);
1541 if (chars != 94 && chars != 96
1543 && chars != 128 && chars != 256
1546 signal_simple_error ("Invalid value for 'chars", value);
1549 else if (EQ (keyword, Qcolumns))
1552 columns = XINT (value);
1553 if (columns != 1 && columns != 2)
1554 signal_simple_error ("Invalid value for 'columns", value);
1557 else if (EQ (keyword, Qgraphic))
1560 graphic = XINT (value);
1568 signal_simple_error ("Invalid value for 'graphic", value);
1571 else if (EQ (keyword, Qregistry))
1573 CHECK_STRING (value);
1577 else if (EQ (keyword, Qdirection))
1579 if (EQ (value, Ql2r))
1580 direction = CHARSET_LEFT_TO_RIGHT;
1581 else if (EQ (value, Qr2l))
1582 direction = CHARSET_RIGHT_TO_LEFT;
1584 signal_simple_error ("Invalid value for 'direction", value);
1587 else if (EQ (keyword, Qfinal))
1589 CHECK_CHAR_COERCE_INT (value);
1590 final = XCHAR (value);
1591 if (final < '0' || final > '~')
1592 signal_simple_error ("Invalid value for 'final", value);
1596 else if (EQ (keyword, Qmother))
1598 mother = Fget_charset (value);
1601 else if (EQ (keyword, Qmin_code))
1604 min_code = XUINT (value);
1607 else if (EQ (keyword, Qmax_code))
1610 max_code = XUINT (value);
1613 else if (EQ (keyword, Qcode_offset))
1616 code_offset = XUINT (value);
1619 else if (EQ (keyword, Qconversion))
1621 if (EQ (value, Q94x60))
1622 conversion = CONVERSION_94x60;
1623 else if (EQ (value, Q94x94x60))
1624 conversion = CONVERSION_94x94x60;
1626 signal_simple_error ("Unrecognized conversion", value);
1630 else if (EQ (keyword, Qccl_program))
1632 struct ccl_program test_ccl;
1634 if (setup_ccl_program (&test_ccl, value) < 0)
1635 signal_simple_error ("Invalid value for 'ccl-program", value);
1636 ccl_program = value;
1640 signal_simple_error ("Unrecognized property", keyword);
1646 error ("'final must be specified");
1648 if (dimension == 2 && final > 0x5F)
1650 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1653 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1654 CHARSET_LEFT_TO_RIGHT)) ||
1655 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1656 CHARSET_RIGHT_TO_LEFT)))
1658 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1660 id = get_unallocated_leading_byte (dimension);
1662 if (NILP (doc_string))
1663 doc_string = build_string ("");
1665 if (NILP (registry))
1666 registry = build_string ("");
1668 if (NILP (short_name))
1669 XSETSTRING (short_name, XSYMBOL (name)->name);
1671 if (NILP (long_name))
1672 long_name = doc_string;
1675 columns = dimension;
1677 if (byte_offset < 0)
1681 else if (chars == 96)
1687 charset = make_charset (id, name, chars, dimension, columns, graphic,
1688 final, direction, short_name, long_name,
1689 doc_string, registry,
1690 Qnil, min_code, max_code, code_offset, byte_offset,
1691 mother, conversion);
1692 if (!NILP (ccl_program))
1693 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1697 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1699 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1700 NEW-NAME is the name of the new charset. Return the new charset.
1702 (charset, new_name))
1704 Lisp_Object new_charset = Qnil;
1705 int id, chars, dimension, columns, graphic, final;
1707 Lisp_Object registry, doc_string, short_name, long_name;
1710 charset = Fget_charset (charset);
1711 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1712 signal_simple_error ("Charset already has reverse-direction charset",
1715 CHECK_SYMBOL (new_name);
1716 if (!NILP (Ffind_charset (new_name)))
1717 signal_simple_error ("Cannot redefine existing charset", new_name);
1719 cs = XCHARSET (charset);
1721 chars = CHARSET_CHARS (cs);
1722 dimension = CHARSET_DIMENSION (cs);
1723 columns = CHARSET_COLUMNS (cs);
1724 id = get_unallocated_leading_byte (dimension);
1726 graphic = CHARSET_GRAPHIC (cs);
1727 final = CHARSET_FINAL (cs);
1728 direction = CHARSET_RIGHT_TO_LEFT;
1729 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1730 direction = CHARSET_LEFT_TO_RIGHT;
1731 doc_string = CHARSET_DOC_STRING (cs);
1732 short_name = CHARSET_SHORT_NAME (cs);
1733 long_name = CHARSET_LONG_NAME (cs);
1734 registry = CHARSET_REGISTRY (cs);
1736 new_charset = make_charset (id, new_name, chars, dimension, columns,
1737 graphic, final, direction, short_name, long_name,
1738 doc_string, registry,
1740 CHARSET_DECODING_TABLE(cs),
1741 CHARSET_MIN_CODE(cs),
1742 CHARSET_MAX_CODE(cs),
1743 CHARSET_CODE_OFFSET(cs),
1744 CHARSET_BYTE_OFFSET(cs),
1746 CHARSET_CONVERSION (cs)
1748 Qnil, 0, 0, 0, 0, Qnil, 0
1752 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1753 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1758 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1759 Define symbol ALIAS as an alias for CHARSET.
1763 CHECK_SYMBOL (alias);
1764 charset = Fget_charset (charset);
1765 return Fputhash (alias, charset, Vcharset_hash_table);
1768 /* #### Reverse direction charsets not yet implemented. */
1770 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1772 Return the reverse-direction charset parallel to CHARSET, if any.
1773 This is the charset with the same properties (in particular, the same
1774 dimension, number of characters per dimension, and final byte) as
1775 CHARSET but whose characters are displayed in the opposite direction.
1779 charset = Fget_charset (charset);
1780 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1784 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1785 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1786 If DIRECTION is omitted, both directions will be checked (left-to-right
1787 will be returned if character sets exist for both directions).
1789 (dimension, chars, final, direction))
1791 int dm, ch, fi, di = -1;
1792 Lisp_Object obj = Qnil;
1794 CHECK_INT (dimension);
1795 dm = XINT (dimension);
1796 if (dm < 1 || dm > 2)
1797 signal_simple_error ("Invalid value for DIMENSION", dimension);
1801 if (ch != 94 && ch != 96)
1802 signal_simple_error ("Invalid value for CHARS", chars);
1804 CHECK_CHAR_COERCE_INT (final);
1806 if (fi < '0' || fi > '~')
1807 signal_simple_error ("Invalid value for FINAL", final);
1809 if (EQ (direction, Ql2r))
1810 di = CHARSET_LEFT_TO_RIGHT;
1811 else if (EQ (direction, Qr2l))
1812 di = CHARSET_RIGHT_TO_LEFT;
1813 else if (!NILP (direction))
1814 signal_simple_error ("Invalid value for DIRECTION", direction);
1816 if (dm == 2 && fi > 0x5F)
1818 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1822 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1824 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1827 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1830 return XCHARSET_NAME (obj);
1834 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1835 Return short name of CHARSET.
1839 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1842 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1843 Return long name of CHARSET.
1847 return XCHARSET_LONG_NAME (Fget_charset (charset));
1850 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1851 Return description of CHARSET.
1855 return XCHARSET_DOC_STRING (Fget_charset (charset));
1858 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1859 Return dimension of CHARSET.
1863 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1866 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1867 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1868 Recognized properties are those listed in `make-charset', as well as
1869 'name and 'doc-string.
1875 charset = Fget_charset (charset);
1876 cs = XCHARSET (charset);
1878 CHECK_SYMBOL (prop);
1879 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1880 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1881 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1882 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1883 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1884 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1885 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1886 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1887 Qnil : make_char (CHARSET_FINAL (cs));
1888 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1889 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1890 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1891 if (EQ (prop, Qdirection))
1892 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1893 if (EQ (prop, Qreverse_direction_charset))
1895 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1896 /* #### Is this translation OK? If so, error checking sufficient? */
1897 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1900 if (EQ (prop, Qmother))
1901 return CHARSET_MOTHER (cs);
1902 if (EQ (prop, Qmin_code))
1903 return make_int (CHARSET_MIN_CODE (cs));
1904 if (EQ (prop, Qmax_code))
1905 return make_int (CHARSET_MAX_CODE (cs));
1907 signal_simple_error ("Unrecognized charset property name", prop);
1908 return Qnil; /* not reached */
1911 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1912 Return charset identification number of CHARSET.
1916 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1919 /* #### We need to figure out which properties we really want to
1922 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1923 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1925 (charset, ccl_program))
1927 struct ccl_program test_ccl;
1929 charset = Fget_charset (charset);
1930 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1931 signal_simple_error ("Invalid ccl-program", ccl_program);
1932 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1937 invalidate_charset_font_caches (Lisp_Object charset)
1939 /* Invalidate font cache entries for charset on all devices. */
1940 Lisp_Object devcons, concons, hash_table;
1941 DEVICE_LOOP_NO_BREAK (devcons, concons)
1943 struct device *d = XDEVICE (XCAR (devcons));
1944 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1945 if (!UNBOUNDP (hash_table))
1946 Fclrhash (hash_table);
1950 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1951 Set the 'registry property of CHARSET to REGISTRY.
1953 (charset, registry))
1955 charset = Fget_charset (charset);
1956 CHECK_STRING (registry);
1957 XCHARSET_REGISTRY (charset) = registry;
1958 invalidate_charset_font_caches (charset);
1959 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1964 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1965 Return mapping-table of CHARSET.
1969 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1972 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1973 Set mapping-table of CHARSET to TABLE.
1977 struct Lisp_Charset *cs;
1981 charset = Fget_charset (charset);
1982 cs = XCHARSET (charset);
1986 CHARSET_DECODING_TABLE(cs) = Qnil;
1989 else if (VECTORP (table))
1991 int ccs_len = CHARSET_BYTE_SIZE (cs);
1992 int ret = decoding_table_check_elements (table,
1993 CHARSET_DIMENSION (cs),
1998 signal_simple_error ("Too big table", table);
2000 signal_simple_error ("Invalid element is found", table);
2002 signal_simple_error ("Something wrong", table);
2004 CHARSET_DECODING_TABLE(cs) = Qnil;
2007 signal_error (Qwrong_type_argument,
2008 list2 (build_translated_string ("vector-or-nil-p"),
2011 byte_offset = CHARSET_BYTE_OFFSET (cs);
2012 switch (CHARSET_DIMENSION (cs))
2015 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2017 Lisp_Object c = XVECTOR_DATA(table)[i];
2020 Fput_char_attribute (c, XCHARSET_NAME (charset),
2021 make_int (i + byte_offset));
2025 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2027 Lisp_Object v = XVECTOR_DATA(table)[i];
2033 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2035 Lisp_Object c = XVECTOR_DATA(v)[j];
2039 (c, XCHARSET_NAME (charset),
2040 make_int ( ( (i + byte_offset) << 8 )
2046 Fput_char_attribute (v, XCHARSET_NAME (charset),
2047 make_int (i + byte_offset));
2056 /************************************************************************/
2057 /* Lisp primitives for working with characters */
2058 /************************************************************************/
2061 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2062 Make a character from CHARSET and code-point CODE.
2063 If DEFINED_ONLY is non-nil, builtin character is not returned.
2064 If corresponding character is not found, nil is returned.
2066 (charset, code, defined_only))
2070 charset = Fget_charset (charset);
2073 if (XCHARSET_GRAPHIC (charset) == 1)
2075 if (NILP (defined_only))
2076 c = DECODE_CHAR (charset, c);
2078 c = DECODE_DEFINED_CHAR (charset, c);
2079 return c >= 0 ? make_char (c) : Qnil;
2082 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2083 Make a builtin character from CHARSET and code-point CODE.
2089 charset = Fget_charset (charset);
2091 if (EQ (charset, Vcharset_latin_viscii))
2093 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2099 (ret = Fget_char_attribute (chr,
2100 Vcharset_latin_viscii_lower,
2103 charset = Vcharset_latin_viscii_lower;
2107 (ret = Fget_char_attribute (chr,
2108 Vcharset_latin_viscii_upper,
2111 charset = Vcharset_latin_viscii_upper;
2118 if (XCHARSET_GRAPHIC (charset) == 1)
2121 c = decode_builtin_char (charset, c);
2122 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2126 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2127 Make a character from CHARSET and octets ARG1 and ARG2.
2128 ARG2 is required only for characters from two-dimensional charsets.
2129 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2130 character s with caron.
2132 (charset, arg1, arg2))
2136 int lowlim, highlim;
2138 charset = Fget_charset (charset);
2139 cs = XCHARSET (charset);
2141 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2142 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2144 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2146 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2147 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2150 /* It is useful (and safe, according to Olivier Galibert) to strip
2151 the 8th bit off ARG1 and ARG2 because it allows programmers to
2152 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2153 Latin 2 code of the character. */
2161 if (a1 < lowlim || a1 > highlim)
2162 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2164 if (CHARSET_DIMENSION (cs) == 1)
2168 ("Charset is of dimension one; second octet must be nil", arg2);
2169 return make_char (MAKE_CHAR (charset, a1, 0));
2178 a2 = XINT (arg2) & 0x7f;
2180 if (a2 < lowlim || a2 > highlim)
2181 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2183 return make_char (MAKE_CHAR (charset, a1, a2));
2186 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2187 Return the character set of CHARACTER.
2191 CHECK_CHAR_COERCE_INT (character);
2193 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2196 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2197 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2198 N defaults to 0 if omitted.
2202 Lisp_Object charset;
2205 CHECK_CHAR_COERCE_INT (character);
2207 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2209 if (NILP (n) || EQ (n, Qzero))
2210 return make_int (octet0);
2211 else if (EQ (n, make_int (1)))
2212 return make_int (octet1);
2214 signal_simple_error ("Octet number must be 0 or 1", n);
2218 DEFUN ("encode-char", Fencode_char, 2, 2, 0, /*
2219 Return code-point of CHARACTER in specified CHARSET.
2221 (character, charset))
2225 CHECK_CHAR_COERCE_INT (character);
2226 charset = Fget_charset (charset);
2227 code_point = charset_code_point (charset, XCHAR (character));
2228 if (code_point >= 0)
2229 return make_int (code_point);
2235 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2236 Return list of charset and one or two position-codes of CHARACTER.
2240 /* This function can GC */
2241 struct gcpro gcpro1, gcpro2;
2242 Lisp_Object charset = Qnil;
2243 Lisp_Object rc = Qnil;
2251 GCPRO2 (charset, rc);
2252 CHECK_CHAR_COERCE_INT (character);
2255 code_point = ENCODE_CHAR (XCHAR (character), charset);
2256 dimension = XCHARSET_DIMENSION (charset);
2257 while (dimension > 0)
2259 rc = Fcons (make_int (code_point & 255), rc);
2263 rc = Fcons (XCHARSET_NAME (charset), rc);
2265 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2267 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2269 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2273 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2282 #ifdef ENABLE_COMPOSITE_CHARS
2283 /************************************************************************/
2284 /* composite character functions */
2285 /************************************************************************/
2288 lookup_composite_char (Bufbyte *str, int len)
2290 Lisp_Object lispstr = make_string (str, len);
2291 Lisp_Object ch = Fgethash (lispstr,
2292 Vcomposite_char_string2char_hash_table,
2298 if (composite_char_row_next >= 128)
2299 signal_simple_error ("No more composite chars available", lispstr);
2300 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2301 composite_char_col_next);
2302 Fputhash (make_char (emch), lispstr,
2303 Vcomposite_char_char2string_hash_table);
2304 Fputhash (lispstr, make_char (emch),
2305 Vcomposite_char_string2char_hash_table);
2306 composite_char_col_next++;
2307 if (composite_char_col_next >= 128)
2309 composite_char_col_next = 32;
2310 composite_char_row_next++;
2319 composite_char_string (Emchar ch)
2321 Lisp_Object str = Fgethash (make_char (ch),
2322 Vcomposite_char_char2string_hash_table,
2324 assert (!UNBOUNDP (str));
2328 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2329 Convert a string into a single composite character.
2330 The character is the result of overstriking all the characters in
2335 CHECK_STRING (string);
2336 return make_char (lookup_composite_char (XSTRING_DATA (string),
2337 XSTRING_LENGTH (string)));
2340 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2341 Return a string of the characters comprising a composite character.
2349 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2350 signal_simple_error ("Must be composite char", ch);
2351 return composite_char_string (emch);
2353 #endif /* ENABLE_COMPOSITE_CHARS */
2356 /************************************************************************/
2357 /* initialization */
2358 /************************************************************************/
2361 syms_of_mule_charset (void)
2363 INIT_LRECORD_IMPLEMENTATION (charset);
2365 DEFSUBR (Fcharsetp);
2366 DEFSUBR (Ffind_charset);
2367 DEFSUBR (Fget_charset);
2368 DEFSUBR (Fcharset_list);
2369 DEFSUBR (Fcharset_name);
2370 DEFSUBR (Fmake_charset);
2371 DEFSUBR (Fmake_reverse_direction_charset);
2372 /* DEFSUBR (Freverse_direction_charset); */
2373 DEFSUBR (Fdefine_charset_alias);
2374 DEFSUBR (Fcharset_from_attributes);
2375 DEFSUBR (Fcharset_short_name);
2376 DEFSUBR (Fcharset_long_name);
2377 DEFSUBR (Fcharset_description);
2378 DEFSUBR (Fcharset_dimension);
2379 DEFSUBR (Fcharset_property);
2380 DEFSUBR (Fcharset_id);
2381 DEFSUBR (Fset_charset_ccl_program);
2382 DEFSUBR (Fset_charset_registry);
2384 DEFSUBR (Fcharset_mapping_table);
2385 DEFSUBR (Fset_charset_mapping_table);
2389 DEFSUBR (Fdecode_char);
2390 DEFSUBR (Fdecode_builtin_char);
2391 DEFSUBR (Fencode_char);
2393 DEFSUBR (Fmake_char);
2394 DEFSUBR (Fchar_charset);
2395 DEFSUBR (Fchar_octet);
2396 DEFSUBR (Fsplit_char);
2398 #ifdef ENABLE_COMPOSITE_CHARS
2399 DEFSUBR (Fmake_composite_char);
2400 DEFSUBR (Fcomposite_char_string);
2403 defsymbol (&Qcharsetp, "charsetp");
2404 defsymbol (&Qregistry, "registry");
2405 defsymbol (&Qfinal, "final");
2406 defsymbol (&Qgraphic, "graphic");
2407 defsymbol (&Qdirection, "direction");
2408 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2409 defsymbol (&Qshort_name, "short-name");
2410 defsymbol (&Qlong_name, "long-name");
2412 defsymbol (&Qmother, "mother");
2413 defsymbol (&Qmin_code, "min-code");
2414 defsymbol (&Qmax_code, "max-code");
2415 defsymbol (&Qcode_offset, "code-offset");
2416 defsymbol (&Qconversion, "conversion");
2417 defsymbol (&Q94x60, "94x60");
2418 defsymbol (&Q94x94x60, "94x94x60");
2421 defsymbol (&Ql2r, "l2r");
2422 defsymbol (&Qr2l, "r2l");
2424 /* Charsets, compatible with FSF 20.3
2425 Naming convention is Script-Charset[-Edition] */
2426 defsymbol (&Qascii, "ascii");
2427 defsymbol (&Qcontrol_1, "control-1");
2428 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2429 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2430 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2431 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2432 defsymbol (&Qthai_tis620, "thai-tis620");
2433 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2434 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2435 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2436 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2437 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2438 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2439 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2440 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2441 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2442 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2443 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2444 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2445 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2446 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2447 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2448 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2450 defsymbol (&Qucs, "ucs");
2451 defsymbol (&Qucs_bmp, "ucs-bmp");
2452 defsymbol (&Qucs_smp, "ucs-smp");
2453 defsymbol (&Qucs_sip, "ucs-sip");
2454 defsymbol (&Qucs_cns, "ucs-cns");
2455 defsymbol (&Qucs_jis, "ucs-jis");
2456 defsymbol (&Qucs_ks, "ucs-ks");
2457 defsymbol (&Qucs_big5, "ucs-big5");
2458 defsymbol (&Qlatin_viscii, "latin-viscii");
2459 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2460 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2461 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2462 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2463 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2464 defsymbol (&Qideograph_gt, "ideograph-gt");
2465 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2466 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2467 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2468 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2469 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2470 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2471 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2472 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2473 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2474 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2475 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2476 defsymbol (&Qideograph_daikanwa_2, "ideograph-daikanwa-2");
2477 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2478 defsymbol (&Qchinese_big5, "chinese-big5");
2479 defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp");
2480 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2481 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2482 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2483 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2484 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2485 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2486 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2487 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2488 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2489 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2490 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2491 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2492 defsymbol (&Qchina3_jef, "china3-jef");
2493 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2494 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2496 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2497 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2499 defsymbol (&Qcomposite, "composite");
2503 vars_of_mule_charset (void)
2510 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2511 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2513 /* Table of charsets indexed by leading byte. */
2514 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2515 chlook->charset_by_leading_byte[i] = Qnil;
2518 /* Table of charsets indexed by type/final-byte. */
2519 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2520 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2521 chlook->charset_by_attributes[i][j] = Qnil;
2523 /* Table of charsets indexed by type/final-byte/direction. */
2524 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2525 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2526 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2527 chlook->charset_by_attributes[i][j][k] = Qnil;
2531 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2533 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2534 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2538 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2539 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2540 Leading-code of private TYPE9N charset of column-width 1.
2542 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2546 Vdefault_coded_charset_priority_list = Qnil;
2547 DEFVAR_LISP ("default-coded-charset-priority-list",
2548 &Vdefault_coded_charset_priority_list /*
2549 Default order of preferred coded-character-sets.
2555 complex_vars_of_mule_charset (void)
2557 staticpro (&Vcharset_hash_table);
2558 Vcharset_hash_table =
2559 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2561 /* Predefined character sets. We store them into variables for
2565 staticpro (&Vcharset_ucs);
2567 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2568 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2569 build_string ("UCS"),
2570 build_string ("UCS"),
2571 build_string ("ISO/IEC 10646"),
2573 Qnil, 0, 0xFFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2574 staticpro (&Vcharset_ucs_bmp);
2576 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2577 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2578 build_string ("BMP"),
2579 build_string ("UCS-BMP"),
2580 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2582 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2583 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2584 staticpro (&Vcharset_ucs_smp);
2586 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2587 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2588 build_string ("SMP"),
2589 build_string ("UCS-SMP"),
2590 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2591 build_string ("UCS00-1"),
2592 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2593 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2594 staticpro (&Vcharset_ucs_sip);
2596 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2597 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2598 build_string ("SIP"),
2599 build_string ("UCS-SIP"),
2600 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2601 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2602 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2603 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2604 staticpro (&Vcharset_ucs_cns);
2606 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2607 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2608 build_string ("UCS for CNS"),
2609 build_string ("UCS for CNS 11643"),
2610 build_string ("ISO/IEC 10646 for CNS 11643"),
2613 Qnil, CONVERSION_IDENTICAL);
2614 staticpro (&Vcharset_ucs_jis);
2616 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2617 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2618 build_string ("UCS for JIS"),
2619 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2620 build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2622 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2623 staticpro (&Vcharset_ucs_ks);
2625 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2626 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2627 build_string ("UCS for KS"),
2628 build_string ("UCS for CCS defined by KS"),
2629 build_string ("ISO/IEC 10646 for Korean Standards"),
2631 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2632 staticpro (&Vcharset_ucs_big5);
2634 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2635 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2636 build_string ("UCS for Big5"),
2637 build_string ("UCS for Big5"),
2638 build_string ("ISO/IEC 10646 for Big5"),
2640 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2642 # define MIN_CHAR_THAI 0
2643 # define MAX_CHAR_THAI 0
2644 /* # define MIN_CHAR_HEBREW 0 */
2645 /* # define MAX_CHAR_HEBREW 0 */
2646 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2647 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2649 staticpro (&Vcharset_ascii);
2651 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2652 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2653 build_string ("ASCII"),
2654 build_string ("ASCII)"),
2655 build_string ("ASCII (ISO646 IRV)"),
2656 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2657 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2658 staticpro (&Vcharset_control_1);
2659 Vcharset_control_1 =
2660 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2661 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2662 build_string ("C1"),
2663 build_string ("Control characters"),
2664 build_string ("Control characters 128-191"),
2666 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2667 staticpro (&Vcharset_latin_iso8859_1);
2668 Vcharset_latin_iso8859_1 =
2669 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2670 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2671 build_string ("Latin-1"),
2672 build_string ("ISO8859-1 (Latin-1)"),
2673 build_string ("ISO8859-1 (Latin-1)"),
2674 build_string ("iso8859-1"),
2675 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2676 staticpro (&Vcharset_latin_iso8859_2);
2677 Vcharset_latin_iso8859_2 =
2678 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2679 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2680 build_string ("Latin-2"),
2681 build_string ("ISO8859-2 (Latin-2)"),
2682 build_string ("ISO8859-2 (Latin-2)"),
2683 build_string ("iso8859-2"),
2684 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2685 staticpro (&Vcharset_latin_iso8859_3);
2686 Vcharset_latin_iso8859_3 =
2687 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2688 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2689 build_string ("Latin-3"),
2690 build_string ("ISO8859-3 (Latin-3)"),
2691 build_string ("ISO8859-3 (Latin-3)"),
2692 build_string ("iso8859-3"),
2693 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2694 staticpro (&Vcharset_latin_iso8859_4);
2695 Vcharset_latin_iso8859_4 =
2696 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2697 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2698 build_string ("Latin-4"),
2699 build_string ("ISO8859-4 (Latin-4)"),
2700 build_string ("ISO8859-4 (Latin-4)"),
2701 build_string ("iso8859-4"),
2702 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2703 staticpro (&Vcharset_thai_tis620);
2704 Vcharset_thai_tis620 =
2705 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2706 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2707 build_string ("TIS620"),
2708 build_string ("TIS620 (Thai)"),
2709 build_string ("TIS620.2529 (Thai)"),
2710 build_string ("tis620"),
2711 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI,
2712 MIN_CHAR_THAI, 32, Qnil, CONVERSION_IDENTICAL);
2713 staticpro (&Vcharset_greek_iso8859_7);
2714 Vcharset_greek_iso8859_7 =
2715 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2716 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2717 build_string ("ISO8859-7"),
2718 build_string ("ISO8859-7 (Greek)"),
2719 build_string ("ISO8859-7 (Greek)"),
2720 build_string ("iso8859-7"),
2721 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2722 staticpro (&Vcharset_arabic_iso8859_6);
2723 Vcharset_arabic_iso8859_6 =
2724 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2725 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2726 build_string ("ISO8859-6"),
2727 build_string ("ISO8859-6 (Arabic)"),
2728 build_string ("ISO8859-6 (Arabic)"),
2729 build_string ("iso8859-6"),
2730 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2731 staticpro (&Vcharset_hebrew_iso8859_8);
2732 Vcharset_hebrew_iso8859_8 =
2733 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2734 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2735 build_string ("ISO8859-8"),
2736 build_string ("ISO8859-8 (Hebrew)"),
2737 build_string ("ISO8859-8 (Hebrew)"),
2738 build_string ("iso8859-8"),
2740 0 /* MIN_CHAR_HEBREW */,
2741 0 /* MAX_CHAR_HEBREW */, 0, 32,
2742 Qnil, CONVERSION_IDENTICAL);
2743 staticpro (&Vcharset_katakana_jisx0201);
2744 Vcharset_katakana_jisx0201 =
2745 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2746 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2747 build_string ("JISX0201 Kana"),
2748 build_string ("JISX0201.1976 (Japanese Kana)"),
2749 build_string ("JISX0201.1976 Japanese Kana"),
2750 build_string ("jisx0201\\.1976"),
2751 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2752 staticpro (&Vcharset_latin_jisx0201);
2753 Vcharset_latin_jisx0201 =
2754 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2755 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2756 build_string ("JISX0201 Roman"),
2757 build_string ("JISX0201.1976 (Japanese Roman)"),
2758 build_string ("JISX0201.1976 Japanese Roman"),
2759 build_string ("jisx0201\\.1976"),
2760 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2761 staticpro (&Vcharset_cyrillic_iso8859_5);
2762 Vcharset_cyrillic_iso8859_5 =
2763 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2764 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2765 build_string ("ISO8859-5"),
2766 build_string ("ISO8859-5 (Cyrillic)"),
2767 build_string ("ISO8859-5 (Cyrillic)"),
2768 build_string ("iso8859-5"),
2769 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2770 staticpro (&Vcharset_latin_iso8859_9);
2771 Vcharset_latin_iso8859_9 =
2772 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2773 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2774 build_string ("Latin-5"),
2775 build_string ("ISO8859-9 (Latin-5)"),
2776 build_string ("ISO8859-9 (Latin-5)"),
2777 build_string ("iso8859-9"),
2778 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2779 staticpro (&Vcharset_japanese_jisx0208_1978);
2780 Vcharset_japanese_jisx0208_1978 =
2781 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2782 Qjapanese_jisx0208_1978, 94, 2,
2783 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2784 build_string ("JIS X0208:1978"),
2785 build_string ("JIS X0208:1978 (Japanese)"),
2787 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2788 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2789 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2790 staticpro (&Vcharset_chinese_gb2312);
2791 Vcharset_chinese_gb2312 =
2792 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2793 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2794 build_string ("GB2312"),
2795 build_string ("GB2312)"),
2796 build_string ("GB2312 Chinese simplified"),
2797 build_string ("gb2312"),
2798 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2799 staticpro (&Vcharset_chinese_gb12345);
2800 Vcharset_chinese_gb12345 =
2801 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2802 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2803 build_string ("G1"),
2804 build_string ("GB 12345)"),
2805 build_string ("GB 12345-1990"),
2806 build_string ("GB12345\\(\\.1990\\)?-0"),
2807 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2808 staticpro (&Vcharset_japanese_jisx0208);
2809 Vcharset_japanese_jisx0208 =
2810 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2811 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2812 build_string ("JISX0208"),
2813 build_string ("JIS X0208:1983 (Japanese)"),
2814 build_string ("JIS X0208:1983 Japanese Kanji"),
2815 build_string ("jisx0208\\.1983"),
2816 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2818 staticpro (&Vcharset_japanese_jisx0208_1990);
2819 Vcharset_japanese_jisx0208_1990 =
2820 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2821 Qjapanese_jisx0208_1990, 94, 2,
2822 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2823 build_string ("JISX0208-1990"),
2824 build_string ("JIS X0208:1990 (Japanese)"),
2825 build_string ("JIS X0208:1990 Japanese Kanji"),
2826 build_string ("jisx0208\\.1990"),
2828 MIN_CHAR_JIS_X0208_1990,
2829 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2830 Qnil, CONVERSION_IDENTICAL);
2832 staticpro (&Vcharset_korean_ksc5601);
2833 Vcharset_korean_ksc5601 =
2834 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2835 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2836 build_string ("KSC5601"),
2837 build_string ("KSC5601 (Korean"),
2838 build_string ("KSC5601 Korean Hangul and Hanja"),
2839 build_string ("ksc5601"),
2840 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2841 staticpro (&Vcharset_japanese_jisx0212);
2842 Vcharset_japanese_jisx0212 =
2843 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2844 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2845 build_string ("JISX0212"),
2846 build_string ("JISX0212 (Japanese)"),
2847 build_string ("JISX0212 Japanese Supplement"),
2848 build_string ("jisx0212"),
2849 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2851 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2852 staticpro (&Vcharset_chinese_cns11643_1);
2853 Vcharset_chinese_cns11643_1 =
2854 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2855 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2856 build_string ("CNS11643-1"),
2857 build_string ("CNS11643-1 (Chinese traditional)"),
2859 ("CNS 11643 Plane 1 Chinese traditional"),
2860 build_string (CHINESE_CNS_PLANE_RE("1")),
2861 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2862 staticpro (&Vcharset_chinese_cns11643_2);
2863 Vcharset_chinese_cns11643_2 =
2864 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2865 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2866 build_string ("CNS11643-2"),
2867 build_string ("CNS11643-2 (Chinese traditional)"),
2869 ("CNS 11643 Plane 2 Chinese traditional"),
2870 build_string (CHINESE_CNS_PLANE_RE("2")),
2871 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2873 staticpro (&Vcharset_latin_tcvn5712);
2874 Vcharset_latin_tcvn5712 =
2875 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2876 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2877 build_string ("TCVN 5712"),
2878 build_string ("TCVN 5712 (VSCII-2)"),
2879 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2880 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2881 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2882 staticpro (&Vcharset_latin_viscii_lower);
2883 Vcharset_latin_viscii_lower =
2884 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2885 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2886 build_string ("VISCII lower"),
2887 build_string ("VISCII lower (Vietnamese)"),
2888 build_string ("VISCII lower (Vietnamese)"),
2889 build_string ("MULEVISCII-LOWER"),
2890 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2891 staticpro (&Vcharset_latin_viscii_upper);
2892 Vcharset_latin_viscii_upper =
2893 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2894 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2895 build_string ("VISCII upper"),
2896 build_string ("VISCII upper (Vietnamese)"),
2897 build_string ("VISCII upper (Vietnamese)"),
2898 build_string ("MULEVISCII-UPPER"),
2899 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2900 staticpro (&Vcharset_latin_viscii);
2901 Vcharset_latin_viscii =
2902 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2903 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2904 build_string ("VISCII"),
2905 build_string ("VISCII 1.1 (Vietnamese)"),
2906 build_string ("VISCII 1.1 (Vietnamese)"),
2907 build_string ("VISCII1\\.1"),
2908 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2909 staticpro (&Vcharset_chinese_big5);
2910 Vcharset_chinese_big5 =
2911 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2912 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2913 build_string ("Big5"),
2914 build_string ("Big5"),
2915 build_string ("Big5 Chinese traditional"),
2916 build_string ("big5"),
2918 0 /* MIN_CHAR_BIG5_CDP */,
2919 0 /* MAX_CHAR_BIG5_CDP */, 0, 0,
2920 Qnil, CONVERSION_IDENTICAL);
2921 staticpro (&Vcharset_chinese_big5_cdp);
2922 Vcharset_chinese_big5_cdp =
2923 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2924 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2925 build_string ("Big5-CDP"),
2926 build_string ("Big5 + CDP extension"),
2927 build_string ("Big5 with CDP extension"),
2928 build_string ("big5\\.cdp-0"),
2929 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2930 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2931 #define DEF_HANZIKU(n) \
2932 staticpro (&Vcharset_ideograph_hanziku_##n); \
2933 Vcharset_ideograph_hanziku_##n = \
2934 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
2935 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
2936 build_string ("HZK-"#n), \
2937 build_string ("HANZIKU-"#n), \
2938 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
2940 ("hanziku-"#n"$"), \
2941 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, \
2942 MIN_CHAR_HANZIKU_##n, 0, Qnil, CONVERSION_IDENTICAL);
2955 staticpro (&Vcharset_china3_jef);
2956 Vcharset_china3_jef =
2957 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
2958 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2959 build_string ("JC3"),
2960 build_string ("JEF + CHINA3"),
2961 build_string ("JEF + CHINA3 private characters"),
2962 build_string ("china3jef-0"),
2963 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF,
2964 MIN_CHAR_CHINA3_JEF, 0, Qnil, CONVERSION_IDENTICAL);
2965 staticpro (&Vcharset_ideograph_cbeta);
2966 Vcharset_ideograph_cbeta =
2967 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
2968 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2969 build_string ("CB"),
2970 build_string ("CBETA"),
2971 build_string ("CBETA private characters"),
2972 build_string ("cbeta-0"),
2973 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA,
2974 MIN_CHAR_CBETA, 0, Qnil, CONVERSION_IDENTICAL);
2975 staticpro (&Vcharset_ideograph_gt);
2976 Vcharset_ideograph_gt =
2977 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
2978 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2979 build_string ("GT"),
2980 build_string ("GT"),
2981 build_string ("GT"),
2983 Qnil, MIN_CHAR_GT, MAX_CHAR_GT,
2984 MIN_CHAR_GT, 0, Qnil, CONVERSION_IDENTICAL);
2985 #define DEF_GT_PJ(n) \
2986 staticpro (&Vcharset_ideograph_gt_pj_##n); \
2987 Vcharset_ideograph_gt_pj_##n = \
2988 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
2989 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2990 build_string ("GT-PJ-"#n), \
2991 build_string ("GT (pseudo JIS encoding) part "#n), \
2992 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
2994 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
2995 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3008 staticpro (&Vcharset_ideograph_daikanwa_2);
3009 Vcharset_ideograph_daikanwa_2 =
3010 make_charset (LEADING_BYTE_DAIKANWA_2, Qideograph_daikanwa_2, 256, 2,
3011 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3012 build_string ("Daikanwa Rev."),
3013 build_string ("Morohashi's Daikanwa Rev."),
3015 ("Daikanwa dictionary (revised version)"),
3016 build_string ("Daikanwa\\(\\.[0-9]+\\)?-2"),
3017 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
3018 staticpro (&Vcharset_ideograph_daikanwa);
3019 Vcharset_ideograph_daikanwa =
3020 make_charset (LEADING_BYTE_DAIKANWA_3, Qideograph_daikanwa, 256, 2,
3021 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3022 build_string ("Daikanwa"),
3023 build_string ("Morohashi's Daikanwa Rev.2"),
3025 ("Daikanwa dictionary (second revised version)"),
3026 build_string ("Daikanwa\\(\\.[0-9]+\\)?-3"),
3027 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA,
3028 MIN_CHAR_DAIKANWA, 0, Qnil, CONVERSION_IDENTICAL);
3030 staticpro (&Vcharset_ethiopic_ucs);
3031 Vcharset_ethiopic_ucs =
3032 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
3033 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3034 build_string ("Ethiopic (UCS)"),
3035 build_string ("Ethiopic (UCS)"),
3036 build_string ("Ethiopic of UCS"),
3037 build_string ("Ethiopic-Unicode"),
3038 Qnil, 0x1200, 0x137F, 0, 0,
3039 Qnil, CONVERSION_IDENTICAL);
3041 staticpro (&Vcharset_chinese_big5_1);
3042 Vcharset_chinese_big5_1 =
3043 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3044 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3045 build_string ("Big5"),
3046 build_string ("Big5 (Level-1)"),
3048 ("Big5 Level-1 Chinese traditional"),
3049 build_string ("big5"),
3050 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3051 staticpro (&Vcharset_chinese_big5_2);
3052 Vcharset_chinese_big5_2 =
3053 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3054 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3055 build_string ("Big5"),
3056 build_string ("Big5 (Level-2)"),
3058 ("Big5 Level-2 Chinese traditional"),
3059 build_string ("big5"),
3060 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
3062 #ifdef ENABLE_COMPOSITE_CHARS
3063 /* #### For simplicity, we put composite chars into a 96x96 charset.
3064 This is going to lead to problems because you can run out of
3065 room, esp. as we don't yet recycle numbers. */
3066 staticpro (&Vcharset_composite);
3067 Vcharset_composite =
3068 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3069 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3070 build_string ("Composite"),
3071 build_string ("Composite characters"),
3072 build_string ("Composite characters"),
3075 /* #### not dumped properly */
3076 composite_char_row_next = 32;
3077 composite_char_col_next = 32;
3079 Vcomposite_char_string2char_hash_table =
3080 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3081 Vcomposite_char_char2string_hash_table =
3082 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3083 staticpro (&Vcomposite_char_string2char_hash_table);
3084 staticpro (&Vcomposite_char_char2string_hash_table);
3085 #endif /* ENABLE_COMPOSITE_CHARS */