1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_cns;
70 Lisp_Object Vcharset_ucs_jis;
71 Lisp_Object Vcharset_ucs_ks;
72 Lisp_Object Vcharset_ucs_big5;
73 Lisp_Object Vcharset_latin_viscii;
74 Lisp_Object Vcharset_latin_tcvn5712;
75 Lisp_Object Vcharset_latin_viscii_lower;
76 Lisp_Object Vcharset_latin_viscii_upper;
77 Lisp_Object Vcharset_chinese_big5;
78 Lisp_Object Vcharset_chinese_big5_cdp;
79 Lisp_Object Vcharset_ideograph_hanziku_1;
80 Lisp_Object Vcharset_ideograph_hanziku_2;
81 Lisp_Object Vcharset_ideograph_hanziku_3;
82 Lisp_Object Vcharset_ideograph_hanziku_4;
83 Lisp_Object Vcharset_ideograph_hanziku_5;
84 Lisp_Object Vcharset_ideograph_hanziku_6;
85 Lisp_Object Vcharset_ideograph_hanziku_7;
86 Lisp_Object Vcharset_ideograph_hanziku_8;
87 Lisp_Object Vcharset_ideograph_hanziku_9;
88 Lisp_Object Vcharset_ideograph_hanziku_10;
89 Lisp_Object Vcharset_ideograph_hanziku_11;
90 Lisp_Object Vcharset_ideograph_hanziku_12;
91 Lisp_Object Vcharset_china3_jef;
92 Lisp_Object Vcharset_ideograph_cbeta;
93 Lisp_Object Vcharset_ideograph_gt;
94 Lisp_Object Vcharset_ideograph_gt_pj_1;
95 Lisp_Object Vcharset_ideograph_gt_pj_2;
96 Lisp_Object Vcharset_ideograph_gt_pj_3;
97 Lisp_Object Vcharset_ideograph_gt_pj_4;
98 Lisp_Object Vcharset_ideograph_gt_pj_5;
99 Lisp_Object Vcharset_ideograph_gt_pj_6;
100 Lisp_Object Vcharset_ideograph_gt_pj_7;
101 Lisp_Object Vcharset_ideograph_gt_pj_8;
102 Lisp_Object Vcharset_ideograph_gt_pj_9;
103 Lisp_Object Vcharset_ideograph_gt_pj_10;
104 Lisp_Object Vcharset_ideograph_gt_pj_11;
105 Lisp_Object Vcharset_ideograph_daikanwa;
106 Lisp_Object Vcharset_mojikyo;
107 Lisp_Object Vcharset_mojikyo_2022_1;
108 Lisp_Object Vcharset_mojikyo_pj_1;
109 Lisp_Object Vcharset_mojikyo_pj_2;
110 Lisp_Object Vcharset_mojikyo_pj_3;
111 Lisp_Object Vcharset_mojikyo_pj_4;
112 Lisp_Object Vcharset_mojikyo_pj_5;
113 Lisp_Object Vcharset_mojikyo_pj_6;
114 Lisp_Object Vcharset_mojikyo_pj_7;
115 Lisp_Object Vcharset_mojikyo_pj_8;
116 Lisp_Object Vcharset_mojikyo_pj_9;
117 Lisp_Object Vcharset_mojikyo_pj_10;
118 Lisp_Object Vcharset_mojikyo_pj_11;
119 Lisp_Object Vcharset_mojikyo_pj_12;
120 Lisp_Object Vcharset_mojikyo_pj_13;
121 Lisp_Object Vcharset_mojikyo_pj_14;
122 Lisp_Object Vcharset_mojikyo_pj_15;
123 Lisp_Object Vcharset_mojikyo_pj_16;
124 Lisp_Object Vcharset_mojikyo_pj_17;
125 Lisp_Object Vcharset_mojikyo_pj_18;
126 Lisp_Object Vcharset_mojikyo_pj_19;
127 Lisp_Object Vcharset_mojikyo_pj_20;
128 Lisp_Object Vcharset_mojikyo_pj_21;
129 Lisp_Object Vcharset_ethiopic_ucs;
131 Lisp_Object Vcharset_chinese_big5_1;
132 Lisp_Object Vcharset_chinese_big5_2;
134 #ifdef ENABLE_COMPOSITE_CHARS
135 Lisp_Object Vcharset_composite;
137 /* Hash tables for composite chars. One maps string representing
138 composed chars to their equivalent chars; one goes the
140 Lisp_Object Vcomposite_char_char2string_hash_table;
141 Lisp_Object Vcomposite_char_string2char_hash_table;
143 static int composite_char_row_next;
144 static int composite_char_col_next;
146 #endif /* ENABLE_COMPOSITE_CHARS */
148 struct charset_lookup *chlook;
150 static const struct lrecord_description charset_lookup_description_1[] = {
151 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
160 static const struct struct_description charset_lookup_description = {
161 sizeof (struct charset_lookup),
162 charset_lookup_description_1
166 /* Table of number of bytes in the string representation of a character
167 indexed by the first byte of that representation.
169 rep_bytes_by_first_byte(c) is more efficient than the equivalent
170 canonical computation:
172 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
174 const Bytecount rep_bytes_by_first_byte[0xA0] =
175 { /* 0x00 - 0x7f are for straight ASCII */
176 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
177 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
178 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
179 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
180 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
181 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
182 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
183 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
184 /* 0x80 - 0x8f are for Dimension-1 official charsets */
186 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
188 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
190 /* 0x90 - 0x9d are for Dimension-2 official charsets */
191 /* 0x9e is for Dimension-1 private charsets */
192 /* 0x9f is for Dimension-2 private charsets */
193 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
199 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
201 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
203 /* ad-hoc method for `ascii' */
204 if ((CHARSET_CHARS (cs) == 94) &&
205 (CHARSET_BYTE_OFFSET (cs) != 33))
206 return 128 - CHARSET_BYTE_OFFSET (cs);
208 return CHARSET_CHARS (cs);
211 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
213 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
215 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
219 if (XVECTOR_LENGTH (v) > ccs_len)
222 for (i = 0; i < XVECTOR_LENGTH (v); i++)
224 Lisp_Object c = XVECTOR_DATA(v)[i];
226 if (!NILP (c) && !CHARP (c))
230 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
242 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
245 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
255 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
256 nv = XVECTOR_DATA(v)[i];
262 XVECTOR_DATA(v)[i] = Qnil;
266 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
267 int code_point, Lisp_Object character);
269 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
270 int code_point, Lisp_Object character)
274 int ccs_len = XVECTOR_LENGTH (v);
279 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
280 nv = XVECTOR_DATA(v)[i];
284 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
290 XVECTOR_DATA(v)[i] = character;
294 put_char_ccs_code_point (Lisp_Object character,
295 Lisp_Object ccs, Lisp_Object value)
297 Lisp_Object encoding_table;
299 if (!EQ (XCHARSET_NAME (ccs), Qucs)
300 || (XCHAR (character) != XINT (value)))
302 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
303 int dim = XCHARSET_DIMENSION (ccs);
304 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
305 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
309 { /* obsolete representation: value must be a list of bytes */
310 Lisp_Object ret = Fcar (value);
314 signal_simple_error ("Invalid value for coded-charset", value);
315 code_point = XINT (ret);
316 if (XCHARSET_GRAPHIC (ccs) == 1)
324 signal_simple_error ("Invalid value for coded-charset",
328 signal_simple_error ("Invalid value for coded-charset",
331 if (XCHARSET_GRAPHIC (ccs) == 1)
333 code_point = (code_point << 8) | j;
336 value = make_int (code_point);
338 else if (INTP (value))
340 code_point = XINT (value);
341 if (XCHARSET_GRAPHIC (ccs) == 1)
343 code_point &= 0x7F7F7F7F;
344 value = make_int (code_point);
348 signal_simple_error ("Invalid value for coded-charset", value);
352 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
355 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
360 XCHARSET_DECODING_TABLE (ccs)
361 = v = make_older_vector (ccs_len, Qnil);
364 decoding_table_put_char (v, dim, byte_offset, code_point, character);
366 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
368 XCHARSET_ENCODING_TABLE (ccs)
369 = encoding_table = make_char_id_table (Qnil);
371 put_char_id_table (XCHAR_TABLE(encoding_table), character, value);
376 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
378 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
379 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
381 if (VECTORP (decoding_table))
383 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
387 decoding_table_remove_char (decoding_table,
388 XCHARSET_DIMENSION (ccs),
389 XCHARSET_BYTE_OFFSET (ccs),
393 if (CHAR_TABLEP (encoding_table))
395 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qnil);
403 int leading_code_private_11;
406 Lisp_Object Qcharsetp;
408 /* Qdoc_string, Qdimension, Qchars defined in general.c */
409 Lisp_Object Qregistry, Qfinal, Qgraphic;
410 Lisp_Object Qdirection;
411 Lisp_Object Qreverse_direction_charset;
412 Lisp_Object Qleading_byte;
413 Lisp_Object Qshort_name, Qlong_name;
429 Qjapanese_jisx0208_1978,
433 Qjapanese_jisx0208_1990,
449 Qvietnamese_viscii_lower,
450 Qvietnamese_viscii_upper,
453 Qideograph_hanziku_1,
454 Qideograph_hanziku_2,
455 Qideograph_hanziku_3,
456 Qideograph_hanziku_4,
457 Qideograph_hanziku_5,
458 Qideograph_hanziku_6,
459 Qideograph_hanziku_7,
460 Qideograph_hanziku_8,
461 Qideograph_hanziku_9,
462 Qideograph_hanziku_10,
463 Qideograph_hanziku_11,
464 Qideograph_hanziku_12,
509 Lisp_Object Ql2r, Qr2l;
511 Lisp_Object Vcharset_hash_table;
513 /* Composite characters are characters constructed by overstriking two
514 or more regular characters.
516 1) The old Mule implementation involves storing composite characters
517 in a buffer as a tag followed by all of the actual characters
518 used to make up the composite character. I think this is a bad
519 idea; it greatly complicates code that wants to handle strings
520 one character at a time because it has to deal with the possibility
521 of great big ungainly characters. It's much more reasonable to
522 simply store an index into a table of composite characters.
524 2) The current implementation only allows for 16,384 separate
525 composite characters over the lifetime of the XEmacs process.
526 This could become a potential problem if the user
527 edited lots of different files that use composite characters.
528 Due to FSF bogosity, increasing the number of allowable
529 composite characters under Mule would decrease the number
530 of possible faces that can exist. Mule already has shrunk
531 this to 2048, and further shrinkage would become uncomfortable.
532 No such problems exist in XEmacs.
534 Composite characters could be represented as 0x80 C1 C2 C3,
535 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
536 for slightly under 2^20 (one million) composite characters
537 over the XEmacs process lifetime, and you only need to
538 increase the size of a Mule character from 19 to 21 bits.
539 Or you could use 0x80 C1 C2 C3 C4, allowing for about
540 85 million (slightly over 2^26) composite characters. */
543 /************************************************************************/
544 /* Basic Emchar functions */
545 /************************************************************************/
547 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
548 string in STR. Returns the number of bytes stored.
549 Do not call this directly. Use the macro set_charptr_emchar() instead.
553 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
568 else if ( c <= 0x7ff )
570 *p++ = (c >> 6) | 0xc0;
571 *p++ = (c & 0x3f) | 0x80;
573 else if ( c <= 0xffff )
575 *p++ = (c >> 12) | 0xe0;
576 *p++ = ((c >> 6) & 0x3f) | 0x80;
577 *p++ = (c & 0x3f) | 0x80;
579 else if ( c <= 0x1fffff )
581 *p++ = (c >> 18) | 0xf0;
582 *p++ = ((c >> 12) & 0x3f) | 0x80;
583 *p++ = ((c >> 6) & 0x3f) | 0x80;
584 *p++ = (c & 0x3f) | 0x80;
586 else if ( c <= 0x3ffffff )
588 *p++ = (c >> 24) | 0xf8;
589 *p++ = ((c >> 18) & 0x3f) | 0x80;
590 *p++ = ((c >> 12) & 0x3f) | 0x80;
591 *p++ = ((c >> 6) & 0x3f) | 0x80;
592 *p++ = (c & 0x3f) | 0x80;
596 *p++ = (c >> 30) | 0xfc;
597 *p++ = ((c >> 24) & 0x3f) | 0x80;
598 *p++ = ((c >> 18) & 0x3f) | 0x80;
599 *p++ = ((c >> 12) & 0x3f) | 0x80;
600 *p++ = ((c >> 6) & 0x3f) | 0x80;
601 *p++ = (c & 0x3f) | 0x80;
604 BREAKUP_CHAR (c, charset, c1, c2);
605 lb = CHAR_LEADING_BYTE (c);
606 if (LEADING_BYTE_PRIVATE_P (lb))
607 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
609 if (EQ (charset, Vcharset_control_1))
618 /* Return the first character from a Mule-encoded string in STR,
619 assuming it's non-ASCII. Do not call this directly.
620 Use the macro charptr_emchar() instead. */
623 non_ascii_charptr_emchar (const Bufbyte *str)
636 else if ( b >= 0xf8 )
641 else if ( b >= 0xf0 )
646 else if ( b >= 0xe0 )
651 else if ( b >= 0xc0 )
661 for( ; len > 0; len-- )
664 ch = ( ch << 6 ) | ( b & 0x3f );
668 Bufbyte i0 = *str, i1, i2 = 0;
671 if (i0 == LEADING_BYTE_CONTROL_1)
672 return (Emchar) (*++str - 0x20);
674 if (LEADING_BYTE_PREFIX_P (i0))
679 charset = CHARSET_BY_LEADING_BYTE (i0);
680 if (XCHARSET_DIMENSION (charset) == 2)
683 return MAKE_CHAR (charset, i1, i2);
687 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
688 Do not call this directly. Use the macro valid_char_p() instead. */
692 non_ascii_valid_char_p (Emchar ch)
696 /* Must have only lowest 19 bits set */
700 f1 = CHAR_FIELD1 (ch);
701 f2 = CHAR_FIELD2 (ch);
702 f3 = CHAR_FIELD3 (ch);
708 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
709 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
710 f2 > MAX_CHAR_FIELD2_PRIVATE)
715 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
716 f2 <= MAX_CHAR_FIELD2_PRIVATE))
720 NOTE: This takes advantage of the fact that
721 FIELD2_TO_OFFICIAL_LEADING_BYTE and
722 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
724 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
725 if (EQ (charset, Qnil))
727 return (XCHARSET_CHARS (charset) == 96);
733 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
734 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
735 f1 > MAX_CHAR_FIELD1_PRIVATE)
737 if (f2 < 0x20 || f3 < 0x20)
740 #ifdef ENABLE_COMPOSITE_CHARS
741 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
743 if (UNBOUNDP (Fgethash (make_int (ch),
744 Vcomposite_char_char2string_hash_table,
749 #endif /* ENABLE_COMPOSITE_CHARS */
751 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
752 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
755 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
757 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
760 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
762 if (EQ (charset, Qnil))
764 return (XCHARSET_CHARS (charset) == 96);
770 /************************************************************************/
771 /* Basic string functions */
772 /************************************************************************/
774 /* Copy the character pointed to by SRC into DST. Do not call this
775 directly. Use the macro charptr_copy_char() instead.
776 Return the number of bytes copied. */
779 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
781 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
783 for (i = bytes; i; i--, dst++, src++)
789 /************************************************************************/
790 /* streams of Emchars */
791 /************************************************************************/
793 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
794 The functions below are not meant to be called directly; use
795 the macros in insdel.h. */
798 Lstream_get_emchar_1 (Lstream *stream, int ch)
800 Bufbyte str[MAX_EMCHAR_LEN];
801 Bufbyte *strptr = str;
804 str[0] = (Bufbyte) ch;
806 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
808 int c = Lstream_getc (stream);
809 bufpos_checking_assert (c >= 0);
810 *++strptr = (Bufbyte) c;
812 return charptr_emchar (str);
816 Lstream_fput_emchar (Lstream *stream, Emchar ch)
818 Bufbyte str[MAX_EMCHAR_LEN];
819 Bytecount len = set_charptr_emchar (str, ch);
820 return Lstream_write (stream, str, len);
824 Lstream_funget_emchar (Lstream *stream, Emchar ch)
826 Bufbyte str[MAX_EMCHAR_LEN];
827 Bytecount len = set_charptr_emchar (str, ch);
828 Lstream_unread (stream, str, len);
832 /************************************************************************/
834 /************************************************************************/
837 mark_charset (Lisp_Object obj)
839 Lisp_Charset *cs = XCHARSET (obj);
841 mark_object (cs->short_name);
842 mark_object (cs->long_name);
843 mark_object (cs->doc_string);
844 mark_object (cs->registry);
845 mark_object (cs->ccl_program);
847 mark_object (cs->encoding_table);
848 /* mark_object (cs->decoding_table); */
854 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
856 Lisp_Charset *cs = XCHARSET (obj);
860 error ("printing unreadable object #<charset %s 0x%x>",
861 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
864 write_c_string ("#<charset ", printcharfun);
865 print_internal (CHARSET_NAME (cs), printcharfun, 0);
866 write_c_string (" ", printcharfun);
867 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
868 write_c_string (" ", printcharfun);
869 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
870 write_c_string (" ", printcharfun);
871 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
872 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
874 CHARSET_DIMENSION (cs),
875 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
876 CHARSET_COLUMNS (cs),
877 CHARSET_GRAPHIC (cs),
879 write_c_string (buf, printcharfun);
880 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
881 sprintf (buf, " 0x%x>", cs->header.uid);
882 write_c_string (buf, printcharfun);
885 static const struct lrecord_description charset_description[] = {
886 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
887 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
888 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
889 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
890 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
891 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
892 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
894 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
895 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
900 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
901 mark_charset, print_charset, 0, 0, 0,
905 /* Make a new charset. */
906 /* #### SJT Should generic properties be allowed? */
908 make_charset (Charset_ID id, Lisp_Object name,
909 unsigned short chars, unsigned char dimension,
910 unsigned char columns, unsigned char graphic,
911 Bufbyte final, unsigned char direction, Lisp_Object short_name,
912 Lisp_Object long_name, Lisp_Object doc,
914 Lisp_Object decoding_table,
915 Emchar ucs_min, Emchar ucs_max,
916 Emchar code_offset, unsigned char byte_offset)
919 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
923 XSETCHARSET (obj, cs);
925 CHARSET_ID (cs) = id;
926 CHARSET_NAME (cs) = name;
927 CHARSET_SHORT_NAME (cs) = short_name;
928 CHARSET_LONG_NAME (cs) = long_name;
929 CHARSET_CHARS (cs) = chars;
930 CHARSET_DIMENSION (cs) = dimension;
931 CHARSET_DIRECTION (cs) = direction;
932 CHARSET_COLUMNS (cs) = columns;
933 CHARSET_GRAPHIC (cs) = graphic;
934 CHARSET_FINAL (cs) = final;
935 CHARSET_DOC_STRING (cs) = doc;
936 CHARSET_REGISTRY (cs) = reg;
937 CHARSET_CCL_PROGRAM (cs) = Qnil;
938 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
940 CHARSET_DECODING_TABLE(cs) = Qnil;
941 CHARSET_ENCODING_TABLE(cs) = Qnil;
942 CHARSET_UCS_MIN(cs) = ucs_min;
943 CHARSET_UCS_MAX(cs) = ucs_max;
944 CHARSET_CODE_OFFSET(cs) = code_offset;
945 CHARSET_BYTE_OFFSET(cs) = byte_offset;
949 if (id == LEADING_BYTE_ASCII)
950 CHARSET_REP_BYTES (cs) = 1;
952 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
954 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
959 /* some charsets do not have final characters. This includes
960 ASCII, Control-1, Composite, and the two faux private
962 unsigned char iso2022_type
963 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
965 if (code_offset == 0)
967 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
968 chlook->charset_by_attributes[iso2022_type][final] = obj;
972 (chlook->charset_by_attributes[iso2022_type][final][direction]));
973 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
977 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
978 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
980 /* Some charsets are "faux" and don't have names or really exist at
981 all except in the leading-byte table. */
983 Fputhash (name, obj, Vcharset_hash_table);
988 get_unallocated_leading_byte (int dimension)
993 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
996 lb = chlook->next_allocated_leading_byte++;
1000 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1003 lb = chlook->next_allocated_1_byte_leading_byte++;
1007 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1010 lb = chlook->next_allocated_2_byte_leading_byte++;
1016 ("No more character sets free for this dimension",
1017 make_int (dimension));
1023 /* Number of Big5 characters which have the same code in 1st byte. */
1025 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
1028 decode_builtin_char (Lisp_Object charset, int code_point)
1032 if (EQ (charset, Vcharset_chinese_big5))
1034 int c1 = code_point >> 8;
1035 int c2 = code_point & 0xFF;
1038 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
1040 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
1041 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
1043 I = (c1 - 0xA1) * BIG5_SAME_ROW
1044 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
1048 charset = Vcharset_chinese_big5_1;
1052 charset = Vcharset_chinese_big5_2;
1053 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1055 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
1058 if ((final = XCHARSET_FINAL (charset)) >= '0')
1060 if (XCHARSET_DIMENSION (charset) == 1)
1062 switch (XCHARSET_CHARS (charset))
1066 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1069 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1077 switch (XCHARSET_CHARS (charset))
1080 return MIN_CHAR_94x94
1081 + (final - '0') * 94 * 94
1082 + (((code_point >> 8) & 0x7F) - 33) * 94
1083 + ((code_point & 0x7F) - 33);
1085 return MIN_CHAR_96x96
1086 + (final - '0') * 96 * 96
1087 + (((code_point >> 8) & 0x7F) - 32) * 96
1088 + ((code_point & 0x7F) - 32);
1095 else if (XCHARSET_UCS_MAX (charset))
1098 = (XCHARSET_DIMENSION (charset) == 1
1100 code_point - XCHARSET_BYTE_OFFSET (charset)
1102 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1103 * XCHARSET_CHARS (charset)
1104 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1105 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1106 if ((cid < XCHARSET_UCS_MIN (charset))
1107 || (XCHARSET_UCS_MAX (charset) < cid))
1116 range_charset_code_point (Lisp_Object charset, Emchar ch)
1120 if ((XCHARSET_UCS_MIN (charset) <= ch)
1121 && (ch <= XCHARSET_UCS_MAX (charset)))
1123 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1125 if (XCHARSET_CHARS (charset) == 256)
1127 else if (XCHARSET_DIMENSION (charset) == 1)
1128 return d + XCHARSET_BYTE_OFFSET (charset);
1129 else if (XCHARSET_DIMENSION (charset) == 2)
1131 ((d / XCHARSET_CHARS (charset)
1132 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1133 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1134 else if (XCHARSET_DIMENSION (charset) == 3)
1136 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1137 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1138 | ((d / XCHARSET_CHARS (charset)
1139 % XCHARSET_CHARS (charset)
1140 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1141 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1142 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1144 ((d / (XCHARSET_CHARS (charset)
1145 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1146 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1147 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1148 % XCHARSET_CHARS (charset)
1149 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1150 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1151 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1152 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1154 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1156 if (XCHARSET_DIMENSION (charset) == 1)
1158 if (XCHARSET_CHARS (charset) == 94)
1160 if (((d = ch - (MIN_CHAR_94
1161 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1165 else if (XCHARSET_CHARS (charset) == 96)
1167 if (((d = ch - (MIN_CHAR_96
1168 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1175 else if (XCHARSET_DIMENSION (charset) == 2)
1177 if (XCHARSET_CHARS (charset) == 94)
1179 if (((d = ch - (MIN_CHAR_94x94
1180 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1183 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1185 else if (XCHARSET_CHARS (charset) == 96)
1187 if (((d = ch - (MIN_CHAR_96x96
1188 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1191 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1197 if (EQ (charset, Vcharset_mojikyo_2022_1)
1198 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
1200 int m = ch - MIN_CHAR_MOJIKYO - 1;
1201 int byte1 = m / (94 * 60) + 33;
1202 int byte2 = (m % (94 * 60)) / 94;
1203 int byte3 = m % 94 + 33;
1209 return (byte1 << 16) | (byte2 << 8) | byte3;
1215 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1217 if (c <= MAX_CHAR_BASIC_LATIN)
1219 *charset = Vcharset_ascii;
1224 *charset = Vcharset_control_1;
1229 *charset = Vcharset_latin_iso8859_1;
1233 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1235 *charset = Vcharset_hebrew_iso8859_8;
1236 return c - MIN_CHAR_HEBREW + 0x20;
1239 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1241 *charset = Vcharset_thai_tis620;
1242 return c - MIN_CHAR_THAI + 0x20;
1245 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1246 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1248 return list2 (Vcharset_katakana_jisx0201,
1249 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1252 else if (c <= MAX_CHAR_BMP)
1254 *charset = Vcharset_ucs_bmp;
1257 else if (c < MIN_CHAR_DAIKANWA)
1259 *charset = Vcharset_ucs;
1262 else if (c <= MAX_CHAR_DAIKANWA)
1264 *charset = Vcharset_ideograph_daikanwa;
1265 return c - MIN_CHAR_DAIKANWA;
1268 else if (c <= MAX_CHAR_MOJIKYO_0)
1270 *charset = Vcharset_mojikyo;
1271 return c - MIN_CHAR_MOJIKYO_0;
1274 else if (c < MIN_CHAR_94)
1276 *charset = Vcharset_ucs;
1279 else if (c <= MAX_CHAR_94)
1281 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1282 ((c - MIN_CHAR_94) / 94) + '0',
1283 CHARSET_LEFT_TO_RIGHT);
1284 if (!NILP (*charset))
1285 return ((c - MIN_CHAR_94) % 94) + 33;
1288 *charset = Vcharset_ucs;
1292 else if (c <= MAX_CHAR_96)
1294 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1295 ((c - MIN_CHAR_96) / 96) + '0',
1296 CHARSET_LEFT_TO_RIGHT);
1297 if (!NILP (*charset))
1298 return ((c - MIN_CHAR_96) % 96) + 32;
1301 *charset = Vcharset_ucs;
1305 else if (c <= MAX_CHAR_94x94)
1308 = CHARSET_BY_ATTRIBUTES (94, 2,
1309 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1310 CHARSET_LEFT_TO_RIGHT);
1311 if (!NILP (*charset))
1312 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1313 | (((c - MIN_CHAR_94x94) % 94) + 33);
1316 *charset = Vcharset_ucs;
1320 else if (c <= MAX_CHAR_96x96)
1323 = CHARSET_BY_ATTRIBUTES (96, 2,
1324 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1325 CHARSET_LEFT_TO_RIGHT);
1326 if (!NILP (*charset))
1327 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1328 | (((c - MIN_CHAR_96x96) % 96) + 32);
1331 *charset = Vcharset_ucs;
1336 else if (c < MIN_CHAR_MOJIKYO)
1338 *charset = Vcharset_ucs;
1341 else if (c <= MAX_CHAR_MOJIKYO)
1343 *charset = Vcharset_mojikyo;
1344 return c - MIN_CHAR_MOJIKYO;
1346 else if (c < MIN_CHAR_CHINA3_JEF)
1348 *charset = Vcharset_ucs;
1351 else if (c <= MAX_CHAR_CHINA3_JEF)
1353 *charset = Vcharset_china3_jef;
1354 return c - MIN_CHAR_CHINA3_JEF;
1356 else if (c <= MAX_CHAR_CBETA)
1358 *charset = Vcharset_ideograph_cbeta;
1359 return c - MIN_CHAR_CBETA;
1364 *charset = Vcharset_ucs;
1369 Lisp_Object Vdefault_coded_charset_priority_list;
1373 /************************************************************************/
1374 /* Basic charset Lisp functions */
1375 /************************************************************************/
1377 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1378 Return non-nil if OBJECT is a charset.
1382 return CHARSETP (object) ? Qt : Qnil;
1385 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1386 Retrieve the charset of the given name.
1387 If CHARSET-OR-NAME is a charset object, it is simply returned.
1388 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1389 nil is returned. Otherwise the associated charset object is returned.
1393 if (CHARSETP (charset_or_name))
1394 return charset_or_name;
1396 CHECK_SYMBOL (charset_or_name);
1397 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1400 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1401 Retrieve the charset of the given name.
1402 Same as `find-charset' except an error is signalled if there is no such
1403 charset instead of returning nil.
1407 Lisp_Object charset = Ffind_charset (name);
1410 signal_simple_error ("No such charset", name);
1414 /* We store the charsets in hash tables with the names as the key and the
1415 actual charset object as the value. Occasionally we need to use them
1416 in a list format. These routines provide us with that. */
1417 struct charset_list_closure
1419 Lisp_Object *charset_list;
1423 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1424 void *charset_list_closure)
1426 /* This function can GC */
1427 struct charset_list_closure *chcl =
1428 (struct charset_list_closure*) charset_list_closure;
1429 Lisp_Object *charset_list = chcl->charset_list;
1431 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1435 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1436 Return a list of the names of all defined charsets.
1440 Lisp_Object charset_list = Qnil;
1441 struct gcpro gcpro1;
1442 struct charset_list_closure charset_list_closure;
1444 GCPRO1 (charset_list);
1445 charset_list_closure.charset_list = &charset_list;
1446 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1447 &charset_list_closure);
1450 return charset_list;
1453 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1454 Return the name of charset CHARSET.
1458 return XCHARSET_NAME (Fget_charset (charset));
1461 /* #### SJT Should generic properties be allowed? */
1462 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1463 Define a new character set.
1464 This function is for use with Mule support.
1465 NAME is a symbol, the name by which the character set is normally referred.
1466 DOC-STRING is a string describing the character set.
1467 PROPS is a property list, describing the specific nature of the
1468 character set. Recognized properties are:
1470 'short-name Short version of the charset name (ex: Latin-1)
1471 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1472 'registry A regular expression matching the font registry field for
1474 'dimension Number of octets used to index a character in this charset.
1475 Either 1 or 2. Defaults to 1.
1476 'columns Number of columns used to display a character in this charset.
1477 Only used in TTY mode. (Under X, the actual width of a
1478 character can be derived from the font used to display the
1479 characters.) If unspecified, defaults to the dimension
1480 (this is almost always the correct value).
1481 'chars Number of characters in each dimension (94 or 96).
1482 Defaults to 94. Note that if the dimension is 2, the
1483 character set thus described is 94x94 or 96x96.
1484 'final Final byte of ISO 2022 escape sequence. Must be
1485 supplied. Each combination of (DIMENSION, CHARS) defines a
1486 separate namespace for final bytes. Note that ISO
1487 2022 restricts the final byte to the range
1488 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1489 dimension == 2. Note also that final bytes in the range
1490 0x30 - 0x3F are reserved for user-defined (not official)
1492 'graphic 0 (use left half of font on output) or 1 (use right half
1493 of font on output). Defaults to 0. For example, for
1494 a font whose registry is ISO8859-1, the left half
1495 (octets 0x20 - 0x7F) is the `ascii' character set, while
1496 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1497 character set. With 'graphic set to 0, the octets
1498 will have their high bit cleared; with it set to 1,
1499 the octets will have their high bit set.
1500 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1502 'ccl-program A compiled CCL program used to convert a character in
1503 this charset into an index into the font. This is in
1504 addition to the 'graphic property. The CCL program
1505 is passed the octets of the character, with the high
1506 bit cleared and set depending upon whether the value
1507 of the 'graphic property is 0 or 1.
1509 (name, doc_string, props))
1511 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1512 int direction = CHARSET_LEFT_TO_RIGHT;
1513 Lisp_Object registry = Qnil;
1514 Lisp_Object charset;
1515 Lisp_Object ccl_program = Qnil;
1516 Lisp_Object short_name = Qnil, long_name = Qnil;
1517 int byte_offset = -1;
1519 CHECK_SYMBOL (name);
1520 if (!NILP (doc_string))
1521 CHECK_STRING (doc_string);
1523 charset = Ffind_charset (name);
1524 if (!NILP (charset))
1525 signal_simple_error ("Cannot redefine existing charset", name);
1528 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1530 if (EQ (keyword, Qshort_name))
1532 CHECK_STRING (value);
1536 if (EQ (keyword, Qlong_name))
1538 CHECK_STRING (value);
1542 else if (EQ (keyword, Qdimension))
1545 dimension = XINT (value);
1546 if (dimension < 1 || dimension > 2)
1547 signal_simple_error ("Invalid value for 'dimension", value);
1550 else if (EQ (keyword, Qchars))
1553 chars = XINT (value);
1554 if (chars != 94 && chars != 96)
1555 signal_simple_error ("Invalid value for 'chars", value);
1558 else if (EQ (keyword, Qcolumns))
1561 columns = XINT (value);
1562 if (columns != 1 && columns != 2)
1563 signal_simple_error ("Invalid value for 'columns", value);
1566 else if (EQ (keyword, Qgraphic))
1569 graphic = XINT (value);
1571 if (graphic < 0 || graphic > 2)
1573 if (graphic < 0 || graphic > 1)
1575 signal_simple_error ("Invalid value for 'graphic", value);
1578 else if (EQ (keyword, Qregistry))
1580 CHECK_STRING (value);
1584 else if (EQ (keyword, Qdirection))
1586 if (EQ (value, Ql2r))
1587 direction = CHARSET_LEFT_TO_RIGHT;
1588 else if (EQ (value, Qr2l))
1589 direction = CHARSET_RIGHT_TO_LEFT;
1591 signal_simple_error ("Invalid value for 'direction", value);
1594 else if (EQ (keyword, Qfinal))
1596 CHECK_CHAR_COERCE_INT (value);
1597 final = XCHAR (value);
1598 if (final < '0' || final > '~')
1599 signal_simple_error ("Invalid value for 'final", value);
1602 else if (EQ (keyword, Qccl_program))
1604 struct ccl_program test_ccl;
1606 if (setup_ccl_program (&test_ccl, value) < 0)
1607 signal_simple_error ("Invalid value for 'ccl-program", value);
1608 ccl_program = value;
1612 signal_simple_error ("Unrecognized property", keyword);
1617 error ("'final must be specified");
1618 if (dimension == 2 && final > 0x5F)
1620 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1623 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1624 CHARSET_LEFT_TO_RIGHT)) ||
1625 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1626 CHARSET_RIGHT_TO_LEFT)))
1628 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1630 id = get_unallocated_leading_byte (dimension);
1632 if (NILP (doc_string))
1633 doc_string = build_string ("");
1635 if (NILP (registry))
1636 registry = build_string ("");
1638 if (NILP (short_name))
1639 XSETSTRING (short_name, XSYMBOL (name)->name);
1641 if (NILP (long_name))
1642 long_name = doc_string;
1645 columns = dimension;
1647 if (byte_offset < 0)
1651 else if (chars == 96)
1657 charset = make_charset (id, name, chars, dimension, columns, graphic,
1658 final, direction, short_name, long_name,
1659 doc_string, registry,
1660 Qnil, 0, 0, 0, byte_offset);
1661 if (!NILP (ccl_program))
1662 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1666 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1668 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1669 NEW-NAME is the name of the new charset. Return the new charset.
1671 (charset, new_name))
1673 Lisp_Object new_charset = Qnil;
1674 int id, chars, dimension, columns, graphic, final;
1676 Lisp_Object registry, doc_string, short_name, long_name;
1679 charset = Fget_charset (charset);
1680 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1681 signal_simple_error ("Charset already has reverse-direction charset",
1684 CHECK_SYMBOL (new_name);
1685 if (!NILP (Ffind_charset (new_name)))
1686 signal_simple_error ("Cannot redefine existing charset", new_name);
1688 cs = XCHARSET (charset);
1690 chars = CHARSET_CHARS (cs);
1691 dimension = CHARSET_DIMENSION (cs);
1692 columns = CHARSET_COLUMNS (cs);
1693 id = get_unallocated_leading_byte (dimension);
1695 graphic = CHARSET_GRAPHIC (cs);
1696 final = CHARSET_FINAL (cs);
1697 direction = CHARSET_RIGHT_TO_LEFT;
1698 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1699 direction = CHARSET_LEFT_TO_RIGHT;
1700 doc_string = CHARSET_DOC_STRING (cs);
1701 short_name = CHARSET_SHORT_NAME (cs);
1702 long_name = CHARSET_LONG_NAME (cs);
1703 registry = CHARSET_REGISTRY (cs);
1705 new_charset = make_charset (id, new_name, chars, dimension, columns,
1706 graphic, final, direction, short_name, long_name,
1707 doc_string, registry,
1709 CHARSET_DECODING_TABLE(cs),
1710 CHARSET_UCS_MIN(cs),
1711 CHARSET_UCS_MAX(cs),
1712 CHARSET_CODE_OFFSET(cs),
1713 CHARSET_BYTE_OFFSET(cs)
1719 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1720 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1725 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1726 Define symbol ALIAS as an alias for CHARSET.
1730 CHECK_SYMBOL (alias);
1731 charset = Fget_charset (charset);
1732 return Fputhash (alias, charset, Vcharset_hash_table);
1735 /* #### Reverse direction charsets not yet implemented. */
1737 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1739 Return the reverse-direction charset parallel to CHARSET, if any.
1740 This is the charset with the same properties (in particular, the same
1741 dimension, number of characters per dimension, and final byte) as
1742 CHARSET but whose characters are displayed in the opposite direction.
1746 charset = Fget_charset (charset);
1747 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1751 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1752 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1753 If DIRECTION is omitted, both directions will be checked (left-to-right
1754 will be returned if character sets exist for both directions).
1756 (dimension, chars, final, direction))
1758 int dm, ch, fi, di = -1;
1759 Lisp_Object obj = Qnil;
1761 CHECK_INT (dimension);
1762 dm = XINT (dimension);
1763 if (dm < 1 || dm > 2)
1764 signal_simple_error ("Invalid value for DIMENSION", dimension);
1768 if (ch != 94 && ch != 96)
1769 signal_simple_error ("Invalid value for CHARS", chars);
1771 CHECK_CHAR_COERCE_INT (final);
1773 if (fi < '0' || fi > '~')
1774 signal_simple_error ("Invalid value for FINAL", final);
1776 if (EQ (direction, Ql2r))
1777 di = CHARSET_LEFT_TO_RIGHT;
1778 else if (EQ (direction, Qr2l))
1779 di = CHARSET_RIGHT_TO_LEFT;
1780 else if (!NILP (direction))
1781 signal_simple_error ("Invalid value for DIRECTION", direction);
1783 if (dm == 2 && fi > 0x5F)
1785 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1789 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1791 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1794 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1797 return XCHARSET_NAME (obj);
1801 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1802 Return short name of CHARSET.
1806 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1809 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1810 Return long name of CHARSET.
1814 return XCHARSET_LONG_NAME (Fget_charset (charset));
1817 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1818 Return description of CHARSET.
1822 return XCHARSET_DOC_STRING (Fget_charset (charset));
1825 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1826 Return dimension of CHARSET.
1830 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1833 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1834 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1835 Recognized properties are those listed in `make-charset', as well as
1836 'name and 'doc-string.
1842 charset = Fget_charset (charset);
1843 cs = XCHARSET (charset);
1845 CHECK_SYMBOL (prop);
1846 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1847 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1848 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1849 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1850 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1851 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1852 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1853 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1854 Qnil : make_char (CHARSET_FINAL (cs));
1855 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1856 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1857 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1858 if (EQ (prop, Qdirection))
1859 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1860 if (EQ (prop, Qreverse_direction_charset))
1862 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1863 /* #### Is this translation OK? If so, error checking sufficient? */
1864 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1866 signal_simple_error ("Unrecognized charset property name", prop);
1867 return Qnil; /* not reached */
1870 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1871 Return charset identification number of CHARSET.
1875 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1878 /* #### We need to figure out which properties we really want to
1881 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1882 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1884 (charset, ccl_program))
1886 struct ccl_program test_ccl;
1888 charset = Fget_charset (charset);
1889 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1890 signal_simple_error ("Invalid ccl-program", ccl_program);
1891 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1896 invalidate_charset_font_caches (Lisp_Object charset)
1898 /* Invalidate font cache entries for charset on all devices. */
1899 Lisp_Object devcons, concons, hash_table;
1900 DEVICE_LOOP_NO_BREAK (devcons, concons)
1902 struct device *d = XDEVICE (XCAR (devcons));
1903 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1904 if (!UNBOUNDP (hash_table))
1905 Fclrhash (hash_table);
1909 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1910 Set the 'registry property of CHARSET to REGISTRY.
1912 (charset, registry))
1914 charset = Fget_charset (charset);
1915 CHECK_STRING (registry);
1916 XCHARSET_REGISTRY (charset) = registry;
1917 invalidate_charset_font_caches (charset);
1918 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1923 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1924 Return mapping-table of CHARSET.
1928 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1931 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1932 Set mapping-table of CHARSET to TABLE.
1936 struct Lisp_Charset *cs;
1940 charset = Fget_charset (charset);
1941 cs = XCHARSET (charset);
1945 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
1946 make_vector_newer (CHARSET_DECODING_TABLE(cs));
1947 CHARSET_DECODING_TABLE(cs) = Qnil;
1950 else if (VECTORP (table))
1952 int ccs_len = CHARSET_BYTE_SIZE (cs);
1953 int ret = decoding_table_check_elements (table,
1954 CHARSET_DIMENSION (cs),
1959 signal_simple_error ("Too big table", table);
1961 signal_simple_error ("Invalid element is found", table);
1963 signal_simple_error ("Something wrong", table);
1965 CHARSET_DECODING_TABLE(cs) = Qnil;
1968 signal_error (Qwrong_type_argument,
1969 list2 (build_translated_string ("vector-or-nil-p"),
1972 byte_offset = CHARSET_BYTE_OFFSET (cs);
1973 switch (CHARSET_DIMENSION (cs))
1976 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1978 Lisp_Object c = XVECTOR_DATA(table)[i];
1981 put_char_ccs_code_point (c, charset,
1982 make_int (i + byte_offset));
1986 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1988 Lisp_Object v = XVECTOR_DATA(table)[i];
1994 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1996 Lisp_Object c = XVECTOR_DATA(v)[j];
1999 put_char_ccs_code_point
2001 make_int ( ( (i + byte_offset) << 8 )
2007 put_char_ccs_code_point (v, charset,
2008 make_int (i + byte_offset));
2017 /************************************************************************/
2018 /* Lisp primitives for working with characters */
2019 /************************************************************************/
2022 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2023 Make a character from CHARSET and code-point CODE.
2024 If DEFINED_ONLY is non-nil, builtin character is not returned.
2025 If corresponding character is not found, nil is returned.
2027 (charset, code, defined_only))
2031 charset = Fget_charset (charset);
2034 if (XCHARSET_GRAPHIC (charset) == 1)
2036 if (NILP (defined_only))
2037 c = DECODE_CHAR (charset, c);
2039 c = DECODE_DEFINED_CHAR (charset, c);
2040 return c >= 0 ? make_char (c) : Qnil;
2043 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2044 Make a builtin character from CHARSET and code-point CODE.
2050 charset = Fget_charset (charset);
2052 if (EQ (charset, Vcharset_latin_viscii))
2054 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2060 (ret = Fget_char_attribute (chr,
2061 Vcharset_latin_viscii_lower,
2064 charset = Vcharset_latin_viscii_lower;
2068 (ret = Fget_char_attribute (chr,
2069 Vcharset_latin_viscii_upper,
2072 charset = Vcharset_latin_viscii_upper;
2079 if (XCHARSET_GRAPHIC (charset) == 1)
2082 c = decode_builtin_char (charset, c);
2083 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2087 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2088 Make a character from CHARSET and octets ARG1 and ARG2.
2089 ARG2 is required only for characters from two-dimensional charsets.
2090 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2091 character s with caron.
2093 (charset, arg1, arg2))
2097 int lowlim, highlim;
2099 charset = Fget_charset (charset);
2100 cs = XCHARSET (charset);
2102 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2103 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2105 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2107 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2108 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2111 /* It is useful (and safe, according to Olivier Galibert) to strip
2112 the 8th bit off ARG1 and ARG2 because it allows programmers to
2113 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2114 Latin 2 code of the character. */
2122 if (a1 < lowlim || a1 > highlim)
2123 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2125 if (CHARSET_DIMENSION (cs) == 1)
2129 ("Charset is of dimension one; second octet must be nil", arg2);
2130 return make_char (MAKE_CHAR (charset, a1, 0));
2139 a2 = XINT (arg2) & 0x7f;
2141 if (a2 < lowlim || a2 > highlim)
2142 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2144 return make_char (MAKE_CHAR (charset, a1, a2));
2147 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2148 Return the character set of CHARACTER.
2152 CHECK_CHAR_COERCE_INT (character);
2154 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2157 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2158 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2159 N defaults to 0 if omitted.
2163 Lisp_Object charset;
2166 CHECK_CHAR_COERCE_INT (character);
2168 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2170 if (NILP (n) || EQ (n, Qzero))
2171 return make_int (octet0);
2172 else if (EQ (n, make_int (1)))
2173 return make_int (octet1);
2175 signal_simple_error ("Octet number must be 0 or 1", n);
2178 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2179 Return list of charset and one or two position-codes of CHARACTER.
2183 /* This function can GC */
2184 struct gcpro gcpro1, gcpro2;
2185 Lisp_Object charset = Qnil;
2186 Lisp_Object rc = Qnil;
2194 GCPRO2 (charset, rc);
2195 CHECK_CHAR_COERCE_INT (character);
2198 code_point = ENCODE_CHAR (XCHAR (character), charset);
2199 dimension = XCHARSET_DIMENSION (charset);
2200 while (dimension > 0)
2202 rc = Fcons (make_int (code_point & 255), rc);
2206 rc = Fcons (XCHARSET_NAME (charset), rc);
2208 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2210 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2212 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2216 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2225 #ifdef ENABLE_COMPOSITE_CHARS
2226 /************************************************************************/
2227 /* composite character functions */
2228 /************************************************************************/
2231 lookup_composite_char (Bufbyte *str, int len)
2233 Lisp_Object lispstr = make_string (str, len);
2234 Lisp_Object ch = Fgethash (lispstr,
2235 Vcomposite_char_string2char_hash_table,
2241 if (composite_char_row_next >= 128)
2242 signal_simple_error ("No more composite chars available", lispstr);
2243 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2244 composite_char_col_next);
2245 Fputhash (make_char (emch), lispstr,
2246 Vcomposite_char_char2string_hash_table);
2247 Fputhash (lispstr, make_char (emch),
2248 Vcomposite_char_string2char_hash_table);
2249 composite_char_col_next++;
2250 if (composite_char_col_next >= 128)
2252 composite_char_col_next = 32;
2253 composite_char_row_next++;
2262 composite_char_string (Emchar ch)
2264 Lisp_Object str = Fgethash (make_char (ch),
2265 Vcomposite_char_char2string_hash_table,
2267 assert (!UNBOUNDP (str));
2271 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2272 Convert a string into a single composite character.
2273 The character is the result of overstriking all the characters in
2278 CHECK_STRING (string);
2279 return make_char (lookup_composite_char (XSTRING_DATA (string),
2280 XSTRING_LENGTH (string)));
2283 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2284 Return a string of the characters comprising a composite character.
2292 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2293 signal_simple_error ("Must be composite char", ch);
2294 return composite_char_string (emch);
2296 #endif /* ENABLE_COMPOSITE_CHARS */
2299 /************************************************************************/
2300 /* initialization */
2301 /************************************************************************/
2304 syms_of_mule_charset (void)
2306 INIT_LRECORD_IMPLEMENTATION (charset);
2308 DEFSUBR (Fcharsetp);
2309 DEFSUBR (Ffind_charset);
2310 DEFSUBR (Fget_charset);
2311 DEFSUBR (Fcharset_list);
2312 DEFSUBR (Fcharset_name);
2313 DEFSUBR (Fmake_charset);
2314 DEFSUBR (Fmake_reverse_direction_charset);
2315 /* DEFSUBR (Freverse_direction_charset); */
2316 DEFSUBR (Fdefine_charset_alias);
2317 DEFSUBR (Fcharset_from_attributes);
2318 DEFSUBR (Fcharset_short_name);
2319 DEFSUBR (Fcharset_long_name);
2320 DEFSUBR (Fcharset_description);
2321 DEFSUBR (Fcharset_dimension);
2322 DEFSUBR (Fcharset_property);
2323 DEFSUBR (Fcharset_id);
2324 DEFSUBR (Fset_charset_ccl_program);
2325 DEFSUBR (Fset_charset_registry);
2327 DEFSUBR (Fcharset_mapping_table);
2328 DEFSUBR (Fset_charset_mapping_table);
2332 DEFSUBR (Fdecode_char);
2333 DEFSUBR (Fdecode_builtin_char);
2335 DEFSUBR (Fmake_char);
2336 DEFSUBR (Fchar_charset);
2337 DEFSUBR (Fchar_octet);
2338 DEFSUBR (Fsplit_char);
2340 #ifdef ENABLE_COMPOSITE_CHARS
2341 DEFSUBR (Fmake_composite_char);
2342 DEFSUBR (Fcomposite_char_string);
2345 defsymbol (&Qcharsetp, "charsetp");
2346 defsymbol (&Qregistry, "registry");
2347 defsymbol (&Qfinal, "final");
2348 defsymbol (&Qgraphic, "graphic");
2349 defsymbol (&Qdirection, "direction");
2350 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2351 defsymbol (&Qshort_name, "short-name");
2352 defsymbol (&Qlong_name, "long-name");
2354 defsymbol (&Ql2r, "l2r");
2355 defsymbol (&Qr2l, "r2l");
2357 /* Charsets, compatible with FSF 20.3
2358 Naming convention is Script-Charset[-Edition] */
2359 defsymbol (&Qascii, "ascii");
2360 defsymbol (&Qcontrol_1, "control-1");
2361 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2362 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2363 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2364 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2365 defsymbol (&Qthai_tis620, "thai-tis620");
2366 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2367 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2368 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2369 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2370 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2371 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2372 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2373 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2374 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2375 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2376 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2377 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2378 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2379 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2380 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2381 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2383 defsymbol (&Qucs, "ucs");
2384 defsymbol (&Qucs_bmp, "ucs-bmp");
2385 defsymbol (&Qucs_cns, "ucs-cns");
2386 defsymbol (&Qucs_jis, "ucs-jis");
2387 defsymbol (&Qucs_ks, "ucs-ks");
2388 defsymbol (&Qucs_big5, "ucs-big5");
2389 defsymbol (&Qlatin_viscii, "latin-viscii");
2390 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2391 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2392 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2393 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2394 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2395 defsymbol (&Qideograph_gt, "ideograph-gt");
2396 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2397 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2398 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2399 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2400 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2401 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2402 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2403 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2404 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2405 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2406 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2407 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2408 defsymbol (&Qchinese_big5, "chinese-big5");
2409 defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp");
2410 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2411 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2412 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2413 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2414 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2415 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2416 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2417 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2418 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2419 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2420 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2421 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2422 defsymbol (&Qchina3_jef, "china3-jef");
2423 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2424 defsymbol (&Qmojikyo, "mojikyo");
2425 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
2426 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2427 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2428 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2429 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2430 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2431 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2432 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2433 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2434 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2435 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2436 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2437 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2438 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2439 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2440 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2441 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2442 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2443 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2444 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2445 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2446 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2447 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2449 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2450 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2452 defsymbol (&Qcomposite, "composite");
2456 vars_of_mule_charset (void)
2463 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2464 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2466 /* Table of charsets indexed by leading byte. */
2467 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2468 chlook->charset_by_leading_byte[i] = Qnil;
2471 /* Table of charsets indexed by type/final-byte. */
2472 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2473 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2474 chlook->charset_by_attributes[i][j] = Qnil;
2476 /* Table of charsets indexed by type/final-byte/direction. */
2477 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2478 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2479 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2480 chlook->charset_by_attributes[i][j][k] = Qnil;
2484 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2486 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2487 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2491 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2492 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2493 Leading-code of private TYPE9N charset of column-width 1.
2495 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2499 Vdefault_coded_charset_priority_list = Qnil;
2500 DEFVAR_LISP ("default-coded-charset-priority-list",
2501 &Vdefault_coded_charset_priority_list /*
2502 Default order of preferred coded-character-sets.
2508 complex_vars_of_mule_charset (void)
2510 staticpro (&Vcharset_hash_table);
2511 Vcharset_hash_table =
2512 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2514 /* Predefined character sets. We store them into variables for
2518 staticpro (&Vcharset_ucs);
2520 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2521 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2522 build_string ("UCS"),
2523 build_string ("UCS"),
2524 build_string ("ISO/IEC 10646"),
2526 Qnil, 0, 0xFFFFFFF, 0, 0);
2527 staticpro (&Vcharset_ucs_bmp);
2529 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2530 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2531 build_string ("BMP"),
2532 build_string ("BMP"),
2533 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2534 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2535 Qnil, 0, 0xFFFF, 0, 0);
2536 staticpro (&Vcharset_ucs_cns);
2538 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2539 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2540 build_string ("UCS for CNS"),
2541 build_string ("UCS for CNS 11643"),
2542 build_string ("ISO/IEC 10646 for CNS 11643"),
2545 staticpro (&Vcharset_ucs_jis);
2547 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2548 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2549 build_string ("UCS for JIS"),
2550 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2551 build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2554 staticpro (&Vcharset_ucs_ks);
2556 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2557 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2558 build_string ("UCS for KS"),
2559 build_string ("UCS for CCS defined by KS"),
2560 build_string ("ISO/IEC 10646 for Korean Standards"),
2563 staticpro (&Vcharset_ucs_big5);
2565 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2566 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2567 build_string ("UCS for Big5"),
2568 build_string ("UCS for Big5"),
2569 build_string ("ISO/IEC 10646 for Big5"),
2573 # define MIN_CHAR_THAI 0
2574 # define MAX_CHAR_THAI 0
2575 /* # define MIN_CHAR_HEBREW 0 */
2576 /* # define MAX_CHAR_HEBREW 0 */
2577 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2578 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2580 staticpro (&Vcharset_ascii);
2582 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2583 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2584 build_string ("ASCII"),
2585 build_string ("ASCII)"),
2586 build_string ("ASCII (ISO646 IRV)"),
2587 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2588 Qnil, 0, 0x7F, 0, 0);
2589 staticpro (&Vcharset_control_1);
2590 Vcharset_control_1 =
2591 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2592 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2593 build_string ("C1"),
2594 build_string ("Control characters"),
2595 build_string ("Control characters 128-191"),
2597 Qnil, 0x80, 0x9F, 0, 0);
2598 staticpro (&Vcharset_latin_iso8859_1);
2599 Vcharset_latin_iso8859_1 =
2600 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2601 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2602 build_string ("Latin-1"),
2603 build_string ("ISO8859-1 (Latin-1)"),
2604 build_string ("ISO8859-1 (Latin-1)"),
2605 build_string ("iso8859-1"),
2606 Qnil, 0xA0, 0xFF, 0, 32);
2607 staticpro (&Vcharset_latin_iso8859_2);
2608 Vcharset_latin_iso8859_2 =
2609 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2610 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2611 build_string ("Latin-2"),
2612 build_string ("ISO8859-2 (Latin-2)"),
2613 build_string ("ISO8859-2 (Latin-2)"),
2614 build_string ("iso8859-2"),
2616 staticpro (&Vcharset_latin_iso8859_3);
2617 Vcharset_latin_iso8859_3 =
2618 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2619 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2620 build_string ("Latin-3"),
2621 build_string ("ISO8859-3 (Latin-3)"),
2622 build_string ("ISO8859-3 (Latin-3)"),
2623 build_string ("iso8859-3"),
2625 staticpro (&Vcharset_latin_iso8859_4);
2626 Vcharset_latin_iso8859_4 =
2627 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2628 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2629 build_string ("Latin-4"),
2630 build_string ("ISO8859-4 (Latin-4)"),
2631 build_string ("ISO8859-4 (Latin-4)"),
2632 build_string ("iso8859-4"),
2634 staticpro (&Vcharset_thai_tis620);
2635 Vcharset_thai_tis620 =
2636 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2637 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2638 build_string ("TIS620"),
2639 build_string ("TIS620 (Thai)"),
2640 build_string ("TIS620.2529 (Thai)"),
2641 build_string ("tis620"),
2642 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2643 staticpro (&Vcharset_greek_iso8859_7);
2644 Vcharset_greek_iso8859_7 =
2645 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2646 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2647 build_string ("ISO8859-7"),
2648 build_string ("ISO8859-7 (Greek)"),
2649 build_string ("ISO8859-7 (Greek)"),
2650 build_string ("iso8859-7"),
2652 staticpro (&Vcharset_arabic_iso8859_6);
2653 Vcharset_arabic_iso8859_6 =
2654 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2655 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2656 build_string ("ISO8859-6"),
2657 build_string ("ISO8859-6 (Arabic)"),
2658 build_string ("ISO8859-6 (Arabic)"),
2659 build_string ("iso8859-6"),
2661 staticpro (&Vcharset_hebrew_iso8859_8);
2662 Vcharset_hebrew_iso8859_8 =
2663 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2664 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2665 build_string ("ISO8859-8"),
2666 build_string ("ISO8859-8 (Hebrew)"),
2667 build_string ("ISO8859-8 (Hebrew)"),
2668 build_string ("iso8859-8"),
2670 0 /* MIN_CHAR_HEBREW */,
2671 0 /* MAX_CHAR_HEBREW */, 0, 32);
2672 staticpro (&Vcharset_katakana_jisx0201);
2673 Vcharset_katakana_jisx0201 =
2674 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2675 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2676 build_string ("JISX0201 Kana"),
2677 build_string ("JISX0201.1976 (Japanese Kana)"),
2678 build_string ("JISX0201.1976 Japanese Kana"),
2679 build_string ("jisx0201\\.1976"),
2681 staticpro (&Vcharset_latin_jisx0201);
2682 Vcharset_latin_jisx0201 =
2683 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2684 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2685 build_string ("JISX0201 Roman"),
2686 build_string ("JISX0201.1976 (Japanese Roman)"),
2687 build_string ("JISX0201.1976 Japanese Roman"),
2688 build_string ("jisx0201\\.1976"),
2690 staticpro (&Vcharset_cyrillic_iso8859_5);
2691 Vcharset_cyrillic_iso8859_5 =
2692 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2693 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2694 build_string ("ISO8859-5"),
2695 build_string ("ISO8859-5 (Cyrillic)"),
2696 build_string ("ISO8859-5 (Cyrillic)"),
2697 build_string ("iso8859-5"),
2699 staticpro (&Vcharset_latin_iso8859_9);
2700 Vcharset_latin_iso8859_9 =
2701 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2702 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2703 build_string ("Latin-5"),
2704 build_string ("ISO8859-9 (Latin-5)"),
2705 build_string ("ISO8859-9 (Latin-5)"),
2706 build_string ("iso8859-9"),
2708 staticpro (&Vcharset_japanese_jisx0208_1978);
2709 Vcharset_japanese_jisx0208_1978 =
2710 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2711 Qjapanese_jisx0208_1978, 94, 2,
2712 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2713 build_string ("JIS X0208:1978"),
2714 build_string ("JIS X0208:1978 (Japanese)"),
2716 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2717 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2719 staticpro (&Vcharset_chinese_gb2312);
2720 Vcharset_chinese_gb2312 =
2721 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2722 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2723 build_string ("GB2312"),
2724 build_string ("GB2312)"),
2725 build_string ("GB2312 Chinese simplified"),
2726 build_string ("gb2312"),
2728 staticpro (&Vcharset_chinese_gb12345);
2729 Vcharset_chinese_gb12345 =
2730 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2731 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2732 build_string ("G1"),
2733 build_string ("GB 12345)"),
2734 build_string ("GB 12345-1990"),
2735 build_string ("GB12345\\(\\.1990\\)?-0"),
2737 staticpro (&Vcharset_japanese_jisx0208);
2738 Vcharset_japanese_jisx0208 =
2739 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2740 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2741 build_string ("JISX0208"),
2742 build_string ("JIS X0208:1983 (Japanese)"),
2743 build_string ("JIS X0208:1983 Japanese Kanji"),
2744 build_string ("jisx0208\\.1983"),
2747 staticpro (&Vcharset_japanese_jisx0208_1990);
2748 Vcharset_japanese_jisx0208_1990 =
2749 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2750 Qjapanese_jisx0208_1990, 94, 2,
2751 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2752 build_string ("JISX0208-1990"),
2753 build_string ("JIS X0208:1990 (Japanese)"),
2754 build_string ("JIS X0208:1990 Japanese Kanji"),
2755 build_string ("jisx0208\\.1990"),
2757 MIN_CHAR_JIS_X0208_1990,
2758 MAX_CHAR_JIS_X0208_1990, 0, 33);
2760 staticpro (&Vcharset_korean_ksc5601);
2761 Vcharset_korean_ksc5601 =
2762 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2763 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2764 build_string ("KSC5601"),
2765 build_string ("KSC5601 (Korean"),
2766 build_string ("KSC5601 Korean Hangul and Hanja"),
2767 build_string ("ksc5601"),
2769 staticpro (&Vcharset_japanese_jisx0212);
2770 Vcharset_japanese_jisx0212 =
2771 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2772 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2773 build_string ("JISX0212"),
2774 build_string ("JISX0212 (Japanese)"),
2775 build_string ("JISX0212 Japanese Supplement"),
2776 build_string ("jisx0212"),
2779 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2780 staticpro (&Vcharset_chinese_cns11643_1);
2781 Vcharset_chinese_cns11643_1 =
2782 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2783 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2784 build_string ("CNS11643-1"),
2785 build_string ("CNS11643-1 (Chinese traditional)"),
2787 ("CNS 11643 Plane 1 Chinese traditional"),
2788 build_string (CHINESE_CNS_PLANE_RE("1")),
2790 staticpro (&Vcharset_chinese_cns11643_2);
2791 Vcharset_chinese_cns11643_2 =
2792 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2793 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2794 build_string ("CNS11643-2"),
2795 build_string ("CNS11643-2 (Chinese traditional)"),
2797 ("CNS 11643 Plane 2 Chinese traditional"),
2798 build_string (CHINESE_CNS_PLANE_RE("2")),
2801 staticpro (&Vcharset_latin_tcvn5712);
2802 Vcharset_latin_tcvn5712 =
2803 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2804 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2805 build_string ("TCVN 5712"),
2806 build_string ("TCVN 5712 (VSCII-2)"),
2807 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2808 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2810 staticpro (&Vcharset_latin_viscii_lower);
2811 Vcharset_latin_viscii_lower =
2812 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2813 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2814 build_string ("VISCII lower"),
2815 build_string ("VISCII lower (Vietnamese)"),
2816 build_string ("VISCII lower (Vietnamese)"),
2817 build_string ("MULEVISCII-LOWER"),
2819 staticpro (&Vcharset_latin_viscii_upper);
2820 Vcharset_latin_viscii_upper =
2821 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2822 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2823 build_string ("VISCII upper"),
2824 build_string ("VISCII upper (Vietnamese)"),
2825 build_string ("VISCII upper (Vietnamese)"),
2826 build_string ("MULEVISCII-UPPER"),
2828 staticpro (&Vcharset_latin_viscii);
2829 Vcharset_latin_viscii =
2830 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2831 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2832 build_string ("VISCII"),
2833 build_string ("VISCII 1.1 (Vietnamese)"),
2834 build_string ("VISCII 1.1 (Vietnamese)"),
2835 build_string ("VISCII1\\.1"),
2837 staticpro (&Vcharset_chinese_big5);
2838 Vcharset_chinese_big5 =
2839 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2840 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2841 build_string ("Big5"),
2842 build_string ("Big5"),
2843 build_string ("Big5 Chinese traditional"),
2844 build_string ("big5"),
2846 0 /* MIN_CHAR_BIG5_CDP */,
2847 0 /* MAX_CHAR_BIG5_CDP */, 0, 0);
2848 staticpro (&Vcharset_chinese_big5_cdp);
2849 Vcharset_chinese_big5_cdp =
2850 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2851 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2852 build_string ("Big5-CDP"),
2853 build_string ("Big5 + CDP extension"),
2854 build_string ("Big5 with CDP extension"),
2855 build_string ("big5\\.cdp-0"),
2856 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP, 0, 0);
2857 #define DEF_HANZIKU(n) \
2858 staticpro (&Vcharset_ideograph_hanziku_##n); \
2859 Vcharset_ideograph_hanziku_##n = \
2860 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
2861 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
2862 build_string ("HZK-"#n), \
2863 build_string ("HANZIKU-"#n), \
2864 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
2866 ("hanziku-"#n"$"), \
2867 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, 0, 0);
2880 staticpro (&Vcharset_china3_jef);
2881 Vcharset_china3_jef =
2882 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
2883 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2884 build_string ("JC3"),
2885 build_string ("JEF + CHINA3"),
2886 build_string ("JEF + CHINA3 private characters"),
2887 build_string ("china3jef-0"),
2888 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF, 0, 0);
2889 staticpro (&Vcharset_ideograph_cbeta);
2890 Vcharset_ideograph_cbeta =
2891 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
2892 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2893 build_string ("CB"),
2894 build_string ("CBETA"),
2895 build_string ("CBETA private characters"),
2896 build_string ("cbeta-0"),
2897 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA, 0, 0);
2898 staticpro (&Vcharset_ideograph_gt);
2899 Vcharset_ideograph_gt =
2900 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
2901 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2902 build_string ("GT"),
2903 build_string ("GT"),
2904 build_string ("GT"),
2906 Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0);
2907 #define DEF_GT_PJ(n) \
2908 staticpro (&Vcharset_ideograph_gt_pj_##n); \
2909 Vcharset_ideograph_gt_pj_##n = \
2910 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
2911 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2912 build_string ("GT-PJ-"#n), \
2913 build_string ("GT (pseudo JIS encoding) part "#n), \
2914 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
2916 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
2930 staticpro (&Vcharset_ideograph_daikanwa);
2931 Vcharset_ideograph_daikanwa =
2932 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
2933 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2934 build_string ("Daikanwa"),
2935 build_string ("Morohashi's Daikanwa"),
2936 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
2937 build_string ("Daikanwa"),
2938 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
2939 staticpro (&Vcharset_mojikyo);
2941 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
2942 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2943 build_string ("Mojikyo"),
2944 build_string ("Mojikyo"),
2945 build_string ("Konjaku-Mojikyo"),
2947 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
2948 staticpro (&Vcharset_mojikyo_2022_1);
2949 Vcharset_mojikyo_2022_1 =
2950 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
2951 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
2952 build_string ("Mojikyo-2022-1"),
2953 build_string ("Mojikyo ISO-2022 Part 1"),
2954 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
2958 #define DEF_MOJIKYO_PJ(n) \
2959 staticpro (&Vcharset_mojikyo_pj_##n); \
2960 Vcharset_mojikyo_pj_##n = \
2961 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
2962 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2963 build_string ("Mojikyo-PJ-"#n), \
2964 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
2966 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
2968 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
2980 DEF_MOJIKYO_PJ (10);
2981 DEF_MOJIKYO_PJ (11);
2982 DEF_MOJIKYO_PJ (12);
2983 DEF_MOJIKYO_PJ (13);
2984 DEF_MOJIKYO_PJ (14);
2985 DEF_MOJIKYO_PJ (15);
2986 DEF_MOJIKYO_PJ (16);
2987 DEF_MOJIKYO_PJ (17);
2988 DEF_MOJIKYO_PJ (18);
2989 DEF_MOJIKYO_PJ (19);
2990 DEF_MOJIKYO_PJ (20);
2991 DEF_MOJIKYO_PJ (21);
2993 staticpro (&Vcharset_ethiopic_ucs);
2994 Vcharset_ethiopic_ucs =
2995 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
2996 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2997 build_string ("Ethiopic (UCS)"),
2998 build_string ("Ethiopic (UCS)"),
2999 build_string ("Ethiopic of UCS"),
3000 build_string ("Ethiopic-Unicode"),
3001 Qnil, 0x1200, 0x137F, 0x1200, 0);
3003 staticpro (&Vcharset_chinese_big5_1);
3004 Vcharset_chinese_big5_1 =
3005 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3006 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3007 build_string ("Big5"),
3008 build_string ("Big5 (Level-1)"),
3010 ("Big5 Level-1 Chinese traditional"),
3011 build_string ("big5"),
3013 staticpro (&Vcharset_chinese_big5_2);
3014 Vcharset_chinese_big5_2 =
3015 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3016 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3017 build_string ("Big5"),
3018 build_string ("Big5 (Level-2)"),
3020 ("Big5 Level-2 Chinese traditional"),
3021 build_string ("big5"),
3024 #ifdef ENABLE_COMPOSITE_CHARS
3025 /* #### For simplicity, we put composite chars into a 96x96 charset.
3026 This is going to lead to problems because you can run out of
3027 room, esp. as we don't yet recycle numbers. */
3028 staticpro (&Vcharset_composite);
3029 Vcharset_composite =
3030 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3031 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3032 build_string ("Composite"),
3033 build_string ("Composite characters"),
3034 build_string ("Composite characters"),
3037 /* #### not dumped properly */
3038 composite_char_row_next = 32;
3039 composite_char_col_next = 32;
3041 Vcomposite_char_string2char_hash_table =
3042 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3043 Vcomposite_char_char2string_hash_table =
3044 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3045 staticpro (&Vcomposite_char_string2char_hash_table);
3046 staticpro (&Vcomposite_char_char2string_hash_table);
3047 #endif /* ENABLE_COMPOSITE_CHARS */