1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_cns;
70 Lisp_Object Vcharset_ucs_jis;
71 Lisp_Object Vcharset_ucs_ks;
72 Lisp_Object Vcharset_ucs_big5;
73 Lisp_Object Vcharset_latin_viscii;
74 Lisp_Object Vcharset_latin_tcvn5712;
75 Lisp_Object Vcharset_latin_viscii_lower;
76 Lisp_Object Vcharset_latin_viscii_upper;
77 Lisp_Object Vcharset_chinese_big5;
78 Lisp_Object Vcharset_chinese_big5_cdp;
79 Lisp_Object Vcharset_ideograph_hanziku_1;
80 Lisp_Object Vcharset_ideograph_hanziku_2;
81 Lisp_Object Vcharset_ideograph_hanziku_3;
82 Lisp_Object Vcharset_ideograph_hanziku_4;
83 Lisp_Object Vcharset_ideograph_hanziku_5;
84 Lisp_Object Vcharset_ideograph_hanziku_6;
85 Lisp_Object Vcharset_ideograph_hanziku_7;
86 Lisp_Object Vcharset_ideograph_hanziku_8;
87 Lisp_Object Vcharset_ideograph_hanziku_9;
88 Lisp_Object Vcharset_ideograph_hanziku_10;
89 Lisp_Object Vcharset_ideograph_hanziku_11;
90 Lisp_Object Vcharset_ideograph_hanziku_12;
91 Lisp_Object Vcharset_china3_jef;
92 Lisp_Object Vcharset_ideograph_cbeta;
93 Lisp_Object Vcharset_ideograph_gt;
94 Lisp_Object Vcharset_ideograph_gt_pj_1;
95 Lisp_Object Vcharset_ideograph_gt_pj_2;
96 Lisp_Object Vcharset_ideograph_gt_pj_3;
97 Lisp_Object Vcharset_ideograph_gt_pj_4;
98 Lisp_Object Vcharset_ideograph_gt_pj_5;
99 Lisp_Object Vcharset_ideograph_gt_pj_6;
100 Lisp_Object Vcharset_ideograph_gt_pj_7;
101 Lisp_Object Vcharset_ideograph_gt_pj_8;
102 Lisp_Object Vcharset_ideograph_gt_pj_9;
103 Lisp_Object Vcharset_ideograph_gt_pj_10;
104 Lisp_Object Vcharset_ideograph_gt_pj_11;
105 Lisp_Object Vcharset_ideograph_daikanwa;
106 Lisp_Object Vcharset_mojikyo;
107 Lisp_Object Vcharset_mojikyo_2022_1;
108 Lisp_Object Vcharset_mojikyo_pj_1;
109 Lisp_Object Vcharset_mojikyo_pj_2;
110 Lisp_Object Vcharset_mojikyo_pj_3;
111 Lisp_Object Vcharset_mojikyo_pj_4;
112 Lisp_Object Vcharset_mojikyo_pj_5;
113 Lisp_Object Vcharset_mojikyo_pj_6;
114 Lisp_Object Vcharset_mojikyo_pj_7;
115 Lisp_Object Vcharset_mojikyo_pj_8;
116 Lisp_Object Vcharset_mojikyo_pj_9;
117 Lisp_Object Vcharset_mojikyo_pj_10;
118 Lisp_Object Vcharset_mojikyo_pj_11;
119 Lisp_Object Vcharset_mojikyo_pj_12;
120 Lisp_Object Vcharset_mojikyo_pj_13;
121 Lisp_Object Vcharset_mojikyo_pj_14;
122 Lisp_Object Vcharset_mojikyo_pj_15;
123 Lisp_Object Vcharset_mojikyo_pj_16;
124 Lisp_Object Vcharset_mojikyo_pj_17;
125 Lisp_Object Vcharset_mojikyo_pj_18;
126 Lisp_Object Vcharset_mojikyo_pj_19;
127 Lisp_Object Vcharset_mojikyo_pj_20;
128 Lisp_Object Vcharset_mojikyo_pj_21;
129 Lisp_Object Vcharset_ethiopic_ucs;
131 Lisp_Object Vcharset_chinese_big5_1;
132 Lisp_Object Vcharset_chinese_big5_2;
134 #ifdef ENABLE_COMPOSITE_CHARS
135 Lisp_Object Vcharset_composite;
137 /* Hash tables for composite chars. One maps string representing
138 composed chars to their equivalent chars; one goes the
140 Lisp_Object Vcomposite_char_char2string_hash_table;
141 Lisp_Object Vcomposite_char_string2char_hash_table;
143 static int composite_char_row_next;
144 static int composite_char_col_next;
146 #endif /* ENABLE_COMPOSITE_CHARS */
148 struct charset_lookup *chlook;
150 static const struct lrecord_description charset_lookup_description_1[] = {
151 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
160 static const struct struct_description charset_lookup_description = {
161 sizeof (struct charset_lookup),
162 charset_lookup_description_1
166 /* Table of number of bytes in the string representation of a character
167 indexed by the first byte of that representation.
169 rep_bytes_by_first_byte(c) is more efficient than the equivalent
170 canonical computation:
172 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
174 const Bytecount rep_bytes_by_first_byte[0xA0] =
175 { /* 0x00 - 0x7f are for straight ASCII */
176 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
177 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
178 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
179 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
180 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
181 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
182 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
183 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
184 /* 0x80 - 0x8f are for Dimension-1 official charsets */
186 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
188 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
190 /* 0x90 - 0x9d are for Dimension-2 official charsets */
191 /* 0x9e is for Dimension-1 private charsets */
192 /* 0x9f is for Dimension-2 private charsets */
193 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
199 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
201 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
203 /* ad-hoc method for `ascii' */
204 if ((CHARSET_CHARS (cs) == 94) &&
205 (CHARSET_BYTE_OFFSET (cs) != 33))
206 return 128 - CHARSET_BYTE_OFFSET (cs);
208 return CHARSET_CHARS (cs);
211 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
213 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
215 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
219 if (XVECTOR_LENGTH (v) > ccs_len)
222 for (i = 0; i < XVECTOR_LENGTH (v); i++)
224 Lisp_Object c = XVECTOR_DATA(v)[i];
226 if (!NILP (c) && !CHARP (c))
230 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
242 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
245 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
255 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
256 nv = XVECTOR_DATA(v)[i];
262 XVECTOR_DATA(v)[i] = Qnil;
266 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
267 int code_point, Lisp_Object character);
269 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
270 int code_point, Lisp_Object character)
274 int ccs_len = XVECTOR_LENGTH (v);
279 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
280 nv = XVECTOR_DATA(v)[i];
284 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
290 XVECTOR_DATA(v)[i] = character;
294 put_char_ccs_code_point (Lisp_Object character,
295 Lisp_Object ccs, Lisp_Object value)
297 Lisp_Object encoding_table;
299 if (!EQ (XCHARSET_NAME (ccs), Qucs)
300 || (XCHAR (character) != XINT (value)))
302 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
303 int dim = XCHARSET_DIMENSION (ccs);
304 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
305 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
309 { /* obsolete representation: value must be a list of bytes */
310 Lisp_Object ret = Fcar (value);
314 signal_simple_error ("Invalid value for coded-charset", value);
315 code_point = XINT (ret);
316 if (XCHARSET_GRAPHIC (ccs) == 1)
324 signal_simple_error ("Invalid value for coded-charset",
328 signal_simple_error ("Invalid value for coded-charset",
331 if (XCHARSET_GRAPHIC (ccs) == 1)
333 code_point = (code_point << 8) | j;
336 value = make_int (code_point);
338 else if (INTP (value))
340 code_point = XINT (value);
341 if (XCHARSET_GRAPHIC (ccs) == 1)
343 code_point &= 0x7F7F7F7F;
344 value = make_int (code_point);
348 signal_simple_error ("Invalid value for coded-charset", value);
352 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
355 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
360 XCHARSET_DECODING_TABLE (ccs)
361 = v = make_older_vector (ccs_len, Qnil);
364 decoding_table_put_char (v, dim, byte_offset, code_point, character);
366 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
368 XCHARSET_ENCODING_TABLE (ccs)
369 = encoding_table = make_char_id_table (Qnil);
371 put_char_id_table (XCHAR_TABLE(encoding_table), character, value);
376 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
378 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
379 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
381 if (VECTORP (decoding_table))
383 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
387 decoding_table_remove_char (decoding_table,
388 XCHARSET_DIMENSION (ccs),
389 XCHARSET_BYTE_OFFSET (ccs),
393 if (CHAR_TABLEP (encoding_table))
395 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qnil);
403 int leading_code_private_11;
406 Lisp_Object Qcharsetp;
408 /* Qdoc_string, Qdimension, Qchars defined in general.c */
409 Lisp_Object Qregistry, Qfinal, Qgraphic;
410 Lisp_Object Qdirection;
411 Lisp_Object Qreverse_direction_charset;
412 Lisp_Object Qleading_byte;
413 Lisp_Object Qshort_name, Qlong_name;
429 Qjapanese_jisx0208_1978,
433 Qjapanese_jisx0208_1990,
449 Qvietnamese_viscii_lower,
450 Qvietnamese_viscii_upper,
453 Qideograph_hanziku_1,
454 Qideograph_hanziku_2,
455 Qideograph_hanziku_3,
456 Qideograph_hanziku_4,
457 Qideograph_hanziku_5,
458 Qideograph_hanziku_6,
459 Qideograph_hanziku_7,
460 Qideograph_hanziku_8,
461 Qideograph_hanziku_9,
462 Qideograph_hanziku_10,
463 Qideograph_hanziku_11,
464 Qideograph_hanziku_12,
509 Lisp_Object Ql2r, Qr2l;
511 Lisp_Object Vcharset_hash_table;
513 /* Composite characters are characters constructed by overstriking two
514 or more regular characters.
516 1) The old Mule implementation involves storing composite characters
517 in a buffer as a tag followed by all of the actual characters
518 used to make up the composite character. I think this is a bad
519 idea; it greatly complicates code that wants to handle strings
520 one character at a time because it has to deal with the possibility
521 of great big ungainly characters. It's much more reasonable to
522 simply store an index into a table of composite characters.
524 2) The current implementation only allows for 16,384 separate
525 composite characters over the lifetime of the XEmacs process.
526 This could become a potential problem if the user
527 edited lots of different files that use composite characters.
528 Due to FSF bogosity, increasing the number of allowable
529 composite characters under Mule would decrease the number
530 of possible faces that can exist. Mule already has shrunk
531 this to 2048, and further shrinkage would become uncomfortable.
532 No such problems exist in XEmacs.
534 Composite characters could be represented as 0x80 C1 C2 C3,
535 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
536 for slightly under 2^20 (one million) composite characters
537 over the XEmacs process lifetime, and you only need to
538 increase the size of a Mule character from 19 to 21 bits.
539 Or you could use 0x80 C1 C2 C3 C4, allowing for about
540 85 million (slightly over 2^26) composite characters. */
543 /************************************************************************/
544 /* Basic Emchar functions */
545 /************************************************************************/
547 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
548 string in STR. Returns the number of bytes stored.
549 Do not call this directly. Use the macro set_charptr_emchar() instead.
553 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
568 else if ( c <= 0x7ff )
570 *p++ = (c >> 6) | 0xc0;
571 *p++ = (c & 0x3f) | 0x80;
573 else if ( c <= 0xffff )
575 *p++ = (c >> 12) | 0xe0;
576 *p++ = ((c >> 6) & 0x3f) | 0x80;
577 *p++ = (c & 0x3f) | 0x80;
579 else if ( c <= 0x1fffff )
581 *p++ = (c >> 18) | 0xf0;
582 *p++ = ((c >> 12) & 0x3f) | 0x80;
583 *p++ = ((c >> 6) & 0x3f) | 0x80;
584 *p++ = (c & 0x3f) | 0x80;
586 else if ( c <= 0x3ffffff )
588 *p++ = (c >> 24) | 0xf8;
589 *p++ = ((c >> 18) & 0x3f) | 0x80;
590 *p++ = ((c >> 12) & 0x3f) | 0x80;
591 *p++ = ((c >> 6) & 0x3f) | 0x80;
592 *p++ = (c & 0x3f) | 0x80;
596 *p++ = (c >> 30) | 0xfc;
597 *p++ = ((c >> 24) & 0x3f) | 0x80;
598 *p++ = ((c >> 18) & 0x3f) | 0x80;
599 *p++ = ((c >> 12) & 0x3f) | 0x80;
600 *p++ = ((c >> 6) & 0x3f) | 0x80;
601 *p++ = (c & 0x3f) | 0x80;
604 BREAKUP_CHAR (c, charset, c1, c2);
605 lb = CHAR_LEADING_BYTE (c);
606 if (LEADING_BYTE_PRIVATE_P (lb))
607 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
609 if (EQ (charset, Vcharset_control_1))
618 /* Return the first character from a Mule-encoded string in STR,
619 assuming it's non-ASCII. Do not call this directly.
620 Use the macro charptr_emchar() instead. */
623 non_ascii_charptr_emchar (const Bufbyte *str)
636 else if ( b >= 0xf8 )
641 else if ( b >= 0xf0 )
646 else if ( b >= 0xe0 )
651 else if ( b >= 0xc0 )
661 for( ; len > 0; len-- )
664 ch = ( ch << 6 ) | ( b & 0x3f );
668 Bufbyte i0 = *str, i1, i2 = 0;
671 if (i0 == LEADING_BYTE_CONTROL_1)
672 return (Emchar) (*++str - 0x20);
674 if (LEADING_BYTE_PREFIX_P (i0))
679 charset = CHARSET_BY_LEADING_BYTE (i0);
680 if (XCHARSET_DIMENSION (charset) == 2)
683 return MAKE_CHAR (charset, i1, i2);
687 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
688 Do not call this directly. Use the macro valid_char_p() instead. */
692 non_ascii_valid_char_p (Emchar ch)
696 /* Must have only lowest 19 bits set */
700 f1 = CHAR_FIELD1 (ch);
701 f2 = CHAR_FIELD2 (ch);
702 f3 = CHAR_FIELD3 (ch);
708 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
709 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
710 f2 > MAX_CHAR_FIELD2_PRIVATE)
715 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
716 f2 <= MAX_CHAR_FIELD2_PRIVATE))
720 NOTE: This takes advantage of the fact that
721 FIELD2_TO_OFFICIAL_LEADING_BYTE and
722 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
724 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
725 if (EQ (charset, Qnil))
727 return (XCHARSET_CHARS (charset) == 96);
733 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
734 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
735 f1 > MAX_CHAR_FIELD1_PRIVATE)
737 if (f2 < 0x20 || f3 < 0x20)
740 #ifdef ENABLE_COMPOSITE_CHARS
741 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
743 if (UNBOUNDP (Fgethash (make_int (ch),
744 Vcomposite_char_char2string_hash_table,
749 #endif /* ENABLE_COMPOSITE_CHARS */
751 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
752 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
755 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
757 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
760 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
762 if (EQ (charset, Qnil))
764 return (XCHARSET_CHARS (charset) == 96);
770 /************************************************************************/
771 /* Basic string functions */
772 /************************************************************************/
774 /* Copy the character pointed to by SRC into DST. Do not call this
775 directly. Use the macro charptr_copy_char() instead.
776 Return the number of bytes copied. */
779 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
781 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
783 for (i = bytes; i; i--, dst++, src++)
789 /************************************************************************/
790 /* streams of Emchars */
791 /************************************************************************/
793 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
794 The functions below are not meant to be called directly; use
795 the macros in insdel.h. */
798 Lstream_get_emchar_1 (Lstream *stream, int ch)
800 Bufbyte str[MAX_EMCHAR_LEN];
801 Bufbyte *strptr = str;
804 str[0] = (Bufbyte) ch;
806 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
808 int c = Lstream_getc (stream);
809 bufpos_checking_assert (c >= 0);
810 *++strptr = (Bufbyte) c;
812 return charptr_emchar (str);
816 Lstream_fput_emchar (Lstream *stream, Emchar ch)
818 Bufbyte str[MAX_EMCHAR_LEN];
819 Bytecount len = set_charptr_emchar (str, ch);
820 return Lstream_write (stream, str, len);
824 Lstream_funget_emchar (Lstream *stream, Emchar ch)
826 Bufbyte str[MAX_EMCHAR_LEN];
827 Bytecount len = set_charptr_emchar (str, ch);
828 Lstream_unread (stream, str, len);
832 /************************************************************************/
834 /************************************************************************/
837 mark_charset (Lisp_Object obj)
839 Lisp_Charset *cs = XCHARSET (obj);
841 mark_object (cs->short_name);
842 mark_object (cs->long_name);
843 mark_object (cs->doc_string);
844 mark_object (cs->registry);
845 mark_object (cs->ccl_program);
847 mark_object (cs->encoding_table);
848 /* mark_object (cs->decoding_table); */
854 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
856 Lisp_Charset *cs = XCHARSET (obj);
860 error ("printing unreadable object #<charset %s 0x%x>",
861 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
864 write_c_string ("#<charset ", printcharfun);
865 print_internal (CHARSET_NAME (cs), printcharfun, 0);
866 write_c_string (" ", printcharfun);
867 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
868 write_c_string (" ", printcharfun);
869 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
870 write_c_string (" ", printcharfun);
871 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
872 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
874 CHARSET_DIMENSION (cs),
875 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
876 CHARSET_COLUMNS (cs),
877 CHARSET_GRAPHIC (cs),
879 write_c_string (buf, printcharfun);
880 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
881 sprintf (buf, " 0x%x>", cs->header.uid);
882 write_c_string (buf, printcharfun);
885 static const struct lrecord_description charset_description[] = {
886 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
887 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
888 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
889 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
890 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
891 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
892 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
894 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
895 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
900 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
901 mark_charset, print_charset, 0, 0, 0,
905 /* Make a new charset. */
906 /* #### SJT Should generic properties be allowed? */
908 make_charset (Charset_ID id, Lisp_Object name,
909 unsigned short chars, unsigned char dimension,
910 unsigned char columns, unsigned char graphic,
911 Bufbyte final, unsigned char direction, Lisp_Object short_name,
912 Lisp_Object long_name, Lisp_Object doc,
914 Lisp_Object decoding_table,
915 Emchar ucs_min, Emchar ucs_max,
916 Emchar code_offset, unsigned char byte_offset)
919 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
923 XSETCHARSET (obj, cs);
925 CHARSET_ID (cs) = id;
926 CHARSET_NAME (cs) = name;
927 CHARSET_SHORT_NAME (cs) = short_name;
928 CHARSET_LONG_NAME (cs) = long_name;
929 CHARSET_CHARS (cs) = chars;
930 CHARSET_DIMENSION (cs) = dimension;
931 CHARSET_DIRECTION (cs) = direction;
932 CHARSET_COLUMNS (cs) = columns;
933 CHARSET_GRAPHIC (cs) = graphic;
934 CHARSET_FINAL (cs) = final;
935 CHARSET_DOC_STRING (cs) = doc;
936 CHARSET_REGISTRY (cs) = reg;
937 CHARSET_CCL_PROGRAM (cs) = Qnil;
938 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
940 CHARSET_DECODING_TABLE(cs) = Qnil;
941 CHARSET_ENCODING_TABLE(cs) = Qnil;
942 CHARSET_UCS_MIN(cs) = ucs_min;
943 CHARSET_UCS_MAX(cs) = ucs_max;
944 CHARSET_CODE_OFFSET(cs) = code_offset;
945 CHARSET_BYTE_OFFSET(cs) = byte_offset;
949 if (id == LEADING_BYTE_ASCII)
950 CHARSET_REP_BYTES (cs) = 1;
952 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
954 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
959 /* some charsets do not have final characters. This includes
960 ASCII, Control-1, Composite, and the two faux private
962 unsigned char iso2022_type
963 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
965 if (code_offset == 0)
967 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
968 chlook->charset_by_attributes[iso2022_type][final] = obj;
972 (chlook->charset_by_attributes[iso2022_type][final][direction]));
973 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
977 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
978 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
980 /* Some charsets are "faux" and don't have names or really exist at
981 all except in the leading-byte table. */
983 Fputhash (name, obj, Vcharset_hash_table);
988 get_unallocated_leading_byte (int dimension)
993 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
996 lb = chlook->next_allocated_leading_byte++;
1000 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1003 lb = chlook->next_allocated_1_byte_leading_byte++;
1007 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1010 lb = chlook->next_allocated_2_byte_leading_byte++;
1016 ("No more character sets free for this dimension",
1017 make_int (dimension));
1023 /* Number of Big5 characters which have the same code in 1st byte. */
1025 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
1028 decode_builtin_char (Lisp_Object charset, int code_point)
1032 if (EQ (charset, Vcharset_chinese_big5))
1034 int c1 = code_point >> 8;
1035 int c2 = code_point & 0xFF;
1038 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
1040 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
1041 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
1043 I = (c1 - 0xA1) * BIG5_SAME_ROW
1044 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
1048 charset = Vcharset_chinese_big5_1;
1052 charset = Vcharset_chinese_big5_2;
1053 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1055 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
1058 if ((final = XCHARSET_FINAL (charset)) >= '0')
1060 if (XCHARSET_DIMENSION (charset) == 1)
1062 switch (XCHARSET_CHARS (charset))
1066 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1069 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1077 switch (XCHARSET_CHARS (charset))
1080 return MIN_CHAR_94x94
1081 + (final - '0') * 94 * 94
1082 + (((code_point >> 8) & 0x7F) - 33) * 94
1083 + ((code_point & 0x7F) - 33);
1085 return MIN_CHAR_96x96
1086 + (final - '0') * 96 * 96
1087 + (((code_point >> 8) & 0x7F) - 32) * 96
1088 + ((code_point & 0x7F) - 32);
1095 else if (XCHARSET_UCS_MAX (charset))
1098 = (XCHARSET_DIMENSION (charset) == 1
1100 code_point - XCHARSET_BYTE_OFFSET (charset)
1102 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1103 * XCHARSET_CHARS (charset)
1104 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1105 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1106 if ((cid < XCHARSET_UCS_MIN (charset))
1107 || (XCHARSET_UCS_MAX (charset) < cid))
1116 range_charset_code_point (Lisp_Object charset, Emchar ch)
1120 if ((XCHARSET_UCS_MIN (charset) <= ch)
1121 && (ch <= XCHARSET_UCS_MAX (charset)))
1123 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1125 if (XCHARSET_CHARS (charset) == 256)
1127 else if (XCHARSET_DIMENSION (charset) == 1)
1128 return d + XCHARSET_BYTE_OFFSET (charset);
1129 else if (XCHARSET_DIMENSION (charset) == 2)
1131 ((d / XCHARSET_CHARS (charset)
1132 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1133 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1134 else if (XCHARSET_DIMENSION (charset) == 3)
1136 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1137 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1138 | ((d / XCHARSET_CHARS (charset)
1139 % XCHARSET_CHARS (charset)
1140 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1141 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1142 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1144 ((d / (XCHARSET_CHARS (charset)
1145 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1146 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1147 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1148 % XCHARSET_CHARS (charset)
1149 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1150 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1151 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1152 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1154 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1156 if (XCHARSET_DIMENSION (charset) == 1)
1158 if (XCHARSET_CHARS (charset) == 94)
1160 if (((d = ch - (MIN_CHAR_94
1161 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1165 else if (XCHARSET_CHARS (charset) == 96)
1167 if (((d = ch - (MIN_CHAR_96
1168 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1175 else if (XCHARSET_DIMENSION (charset) == 2)
1177 if (XCHARSET_CHARS (charset) == 94)
1179 if (((d = ch - (MIN_CHAR_94x94
1180 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1183 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1185 else if (XCHARSET_CHARS (charset) == 96)
1187 if (((d = ch - (MIN_CHAR_96x96
1188 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1191 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1197 if (EQ (charset, Vcharset_mojikyo_2022_1)
1198 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
1200 int m = ch - MIN_CHAR_MOJIKYO - 1;
1201 int byte1 = m / (94 * 60) + 33;
1202 int byte2 = (m % (94 * 60)) / 94;
1203 int byte3 = m % 94 + 33;
1209 return (byte1 << 16) | (byte2 << 8) | byte3;
1215 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1217 if (c <= MAX_CHAR_BASIC_LATIN)
1219 *charset = Vcharset_ascii;
1224 *charset = Vcharset_control_1;
1229 *charset = Vcharset_latin_iso8859_1;
1233 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1235 *charset = Vcharset_hebrew_iso8859_8;
1236 return c - MIN_CHAR_HEBREW + 0x20;
1239 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1241 *charset = Vcharset_thai_tis620;
1242 return c - MIN_CHAR_THAI + 0x20;
1245 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1246 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1248 return list2 (Vcharset_katakana_jisx0201,
1249 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1252 else if (c <= MAX_CHAR_BMP)
1254 *charset = Vcharset_ucs_bmp;
1257 else if (c < MIN_CHAR_DAIKANWA)
1259 *charset = Vcharset_ucs;
1262 else if (c <= MAX_CHAR_DAIKANWA)
1264 *charset = Vcharset_ideograph_daikanwa;
1265 return c - MIN_CHAR_DAIKANWA;
1268 else if (c <= MAX_CHAR_MOJIKYO_0)
1270 *charset = Vcharset_mojikyo;
1271 return c - MIN_CHAR_MOJIKYO_0;
1274 else if (c < MIN_CHAR_94)
1276 *charset = Vcharset_ucs;
1279 else if (c <= MAX_CHAR_94)
1281 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1282 ((c - MIN_CHAR_94) / 94) + '0',
1283 CHARSET_LEFT_TO_RIGHT);
1284 if (!NILP (*charset))
1285 return ((c - MIN_CHAR_94) % 94) + 33;
1288 *charset = Vcharset_ucs;
1292 else if (c <= MAX_CHAR_96)
1294 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1295 ((c - MIN_CHAR_96) / 96) + '0',
1296 CHARSET_LEFT_TO_RIGHT);
1297 if (!NILP (*charset))
1298 return ((c - MIN_CHAR_96) % 96) + 32;
1301 *charset = Vcharset_ucs;
1305 else if (c <= MAX_CHAR_94x94)
1308 = CHARSET_BY_ATTRIBUTES (94, 2,
1309 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1310 CHARSET_LEFT_TO_RIGHT);
1311 if (!NILP (*charset))
1312 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1313 | (((c - MIN_CHAR_94x94) % 94) + 33);
1316 *charset = Vcharset_ucs;
1320 else if (c <= MAX_CHAR_96x96)
1323 = CHARSET_BY_ATTRIBUTES (96, 2,
1324 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1325 CHARSET_LEFT_TO_RIGHT);
1326 if (!NILP (*charset))
1327 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1328 | (((c - MIN_CHAR_96x96) % 96) + 32);
1331 *charset = Vcharset_ucs;
1336 else if (c < MIN_CHAR_MOJIKYO)
1338 *charset = Vcharset_ucs;
1341 else if (c <= MAX_CHAR_MOJIKYO)
1343 *charset = Vcharset_mojikyo;
1344 return c - MIN_CHAR_MOJIKYO;
1346 else if (c < MIN_CHAR_CHINA3_JEF)
1348 *charset = Vcharset_ucs;
1351 else if (c <= MAX_CHAR_CHINA3_JEF)
1353 *charset = Vcharset_china3_jef;
1354 return c - MIN_CHAR_CHINA3_JEF;
1356 else if (c <= MAX_CHAR_CBETA)
1358 *charset = Vcharset_ideograph_cbeta;
1359 return c - MIN_CHAR_CBETA;
1364 *charset = Vcharset_ucs;
1369 Lisp_Object Vdefault_coded_charset_priority_list;
1373 /************************************************************************/
1374 /* Basic charset Lisp functions */
1375 /************************************************************************/
1377 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1378 Return non-nil if OBJECT is a charset.
1382 return CHARSETP (object) ? Qt : Qnil;
1385 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1386 Retrieve the charset of the given name.
1387 If CHARSET-OR-NAME is a charset object, it is simply returned.
1388 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1389 nil is returned. Otherwise the associated charset object is returned.
1393 if (CHARSETP (charset_or_name))
1394 return charset_or_name;
1396 CHECK_SYMBOL (charset_or_name);
1397 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1400 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1401 Retrieve the charset of the given name.
1402 Same as `find-charset' except an error is signalled if there is no such
1403 charset instead of returning nil.
1407 Lisp_Object charset = Ffind_charset (name);
1410 signal_simple_error ("No such charset", name);
1414 /* We store the charsets in hash tables with the names as the key and the
1415 actual charset object as the value. Occasionally we need to use them
1416 in a list format. These routines provide us with that. */
1417 struct charset_list_closure
1419 Lisp_Object *charset_list;
1423 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1424 void *charset_list_closure)
1426 /* This function can GC */
1427 struct charset_list_closure *chcl =
1428 (struct charset_list_closure*) charset_list_closure;
1429 Lisp_Object *charset_list = chcl->charset_list;
1431 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1435 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1436 Return a list of the names of all defined charsets.
1440 Lisp_Object charset_list = Qnil;
1441 struct gcpro gcpro1;
1442 struct charset_list_closure charset_list_closure;
1444 GCPRO1 (charset_list);
1445 charset_list_closure.charset_list = &charset_list;
1446 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1447 &charset_list_closure);
1450 return charset_list;
1453 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1454 Return the name of charset CHARSET.
1458 return XCHARSET_NAME (Fget_charset (charset));
1461 /* #### SJT Should generic properties be allowed? */
1462 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1463 Define a new character set.
1464 This function is for use with Mule support.
1465 NAME is a symbol, the name by which the character set is normally referred.
1466 DOC-STRING is a string describing the character set.
1467 PROPS is a property list, describing the specific nature of the
1468 character set. Recognized properties are:
1470 'short-name Short version of the charset name (ex: Latin-1)
1471 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1472 'registry A regular expression matching the font registry field for
1474 'dimension Number of octets used to index a character in this charset.
1475 Either 1 or 2. Defaults to 1.
1476 'columns Number of columns used to display a character in this charset.
1477 Only used in TTY mode. (Under X, the actual width of a
1478 character can be derived from the font used to display the
1479 characters.) If unspecified, defaults to the dimension
1480 (this is almost always the correct value).
1481 'chars Number of characters in each dimension (94 or 96).
1482 Defaults to 94. Note that if the dimension is 2, the
1483 character set thus described is 94x94 or 96x96.
1484 'final Final byte of ISO 2022 escape sequence. Must be
1485 supplied. Each combination of (DIMENSION, CHARS) defines a
1486 separate namespace for final bytes. Note that ISO
1487 2022 restricts the final byte to the range
1488 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1489 dimension == 2. Note also that final bytes in the range
1490 0x30 - 0x3F are reserved for user-defined (not official)
1492 'graphic 0 (use left half of font on output) or 1 (use right half
1493 of font on output). Defaults to 0. For example, for
1494 a font whose registry is ISO8859-1, the left half
1495 (octets 0x20 - 0x7F) is the `ascii' character set, while
1496 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1497 character set. With 'graphic set to 0, the octets
1498 will have their high bit cleared; with it set to 1,
1499 the octets will have their high bit set.
1500 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1502 'ccl-program A compiled CCL program used to convert a character in
1503 this charset into an index into the font. This is in
1504 addition to the 'graphic property. The CCL program
1505 is passed the octets of the character, with the high
1506 bit cleared and set depending upon whether the value
1507 of the 'graphic property is 0 or 1.
1509 (name, doc_string, props))
1511 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1512 int direction = CHARSET_LEFT_TO_RIGHT;
1513 Lisp_Object registry = Qnil;
1514 Lisp_Object charset;
1515 Lisp_Object ccl_program = Qnil;
1516 Lisp_Object short_name = Qnil, long_name = Qnil;
1517 int byte_offset = -1;
1519 CHECK_SYMBOL (name);
1520 if (!NILP (doc_string))
1521 CHECK_STRING (doc_string);
1523 charset = Ffind_charset (name);
1524 if (!NILP (charset))
1525 signal_simple_error ("Cannot redefine existing charset", name);
1528 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1530 if (EQ (keyword, Qshort_name))
1532 CHECK_STRING (value);
1536 if (EQ (keyword, Qlong_name))
1538 CHECK_STRING (value);
1542 else if (EQ (keyword, Qdimension))
1545 dimension = XINT (value);
1546 if (dimension < 1 || dimension > 2)
1547 signal_simple_error ("Invalid value for 'dimension", value);
1550 else if (EQ (keyword, Qchars))
1553 chars = XINT (value);
1554 if (chars != 94 && chars != 96)
1555 signal_simple_error ("Invalid value for 'chars", value);
1558 else if (EQ (keyword, Qcolumns))
1561 columns = XINT (value);
1562 if (columns != 1 && columns != 2)
1563 signal_simple_error ("Invalid value for 'columns", value);
1566 else if (EQ (keyword, Qgraphic))
1569 graphic = XINT (value);
1571 if (graphic < 0 || graphic > 2)
1573 if (graphic < 0 || graphic > 1)
1575 signal_simple_error ("Invalid value for 'graphic", value);
1578 else if (EQ (keyword, Qregistry))
1580 CHECK_STRING (value);
1584 else if (EQ (keyword, Qdirection))
1586 if (EQ (value, Ql2r))
1587 direction = CHARSET_LEFT_TO_RIGHT;
1588 else if (EQ (value, Qr2l))
1589 direction = CHARSET_RIGHT_TO_LEFT;
1591 signal_simple_error ("Invalid value for 'direction", value);
1594 else if (EQ (keyword, Qfinal))
1596 CHECK_CHAR_COERCE_INT (value);
1597 final = XCHAR (value);
1598 if (final < '0' || final > '~')
1599 signal_simple_error ("Invalid value for 'final", value);
1602 else if (EQ (keyword, Qccl_program))
1604 struct ccl_program test_ccl;
1606 if (setup_ccl_program (&test_ccl, value) < 0)
1607 signal_simple_error ("Invalid value for 'ccl-program", value);
1608 ccl_program = value;
1612 signal_simple_error ("Unrecognized property", keyword);
1617 error ("'final must be specified");
1618 if (dimension == 2 && final > 0x5F)
1620 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1623 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1624 CHARSET_LEFT_TO_RIGHT)) ||
1625 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1626 CHARSET_RIGHT_TO_LEFT)))
1628 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1630 id = get_unallocated_leading_byte (dimension);
1632 if (NILP (doc_string))
1633 doc_string = build_string ("");
1635 if (NILP (registry))
1636 registry = build_string ("");
1638 if (NILP (short_name))
1639 XSETSTRING (short_name, XSYMBOL (name)->name);
1641 if (NILP (long_name))
1642 long_name = doc_string;
1645 columns = dimension;
1647 if (byte_offset < 0)
1651 else if (chars == 96)
1657 charset = make_charset (id, name, chars, dimension, columns, graphic,
1658 final, direction, short_name, long_name,
1659 doc_string, registry,
1660 Qnil, 0, 0, 0, byte_offset);
1661 if (!NILP (ccl_program))
1662 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1666 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1668 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1669 NEW-NAME is the name of the new charset. Return the new charset.
1671 (charset, new_name))
1673 Lisp_Object new_charset = Qnil;
1674 int id, chars, dimension, columns, graphic, final;
1676 Lisp_Object registry, doc_string, short_name, long_name;
1679 charset = Fget_charset (charset);
1680 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1681 signal_simple_error ("Charset already has reverse-direction charset",
1684 CHECK_SYMBOL (new_name);
1685 if (!NILP (Ffind_charset (new_name)))
1686 signal_simple_error ("Cannot redefine existing charset", new_name);
1688 cs = XCHARSET (charset);
1690 chars = CHARSET_CHARS (cs);
1691 dimension = CHARSET_DIMENSION (cs);
1692 columns = CHARSET_COLUMNS (cs);
1693 id = get_unallocated_leading_byte (dimension);
1695 graphic = CHARSET_GRAPHIC (cs);
1696 final = CHARSET_FINAL (cs);
1697 direction = CHARSET_RIGHT_TO_LEFT;
1698 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1699 direction = CHARSET_LEFT_TO_RIGHT;
1700 doc_string = CHARSET_DOC_STRING (cs);
1701 short_name = CHARSET_SHORT_NAME (cs);
1702 long_name = CHARSET_LONG_NAME (cs);
1703 registry = CHARSET_REGISTRY (cs);
1705 new_charset = make_charset (id, new_name, chars, dimension, columns,
1706 graphic, final, direction, short_name, long_name,
1707 doc_string, registry,
1709 CHARSET_DECODING_TABLE(cs),
1710 CHARSET_UCS_MIN(cs),
1711 CHARSET_UCS_MAX(cs),
1712 CHARSET_CODE_OFFSET(cs),
1713 CHARSET_BYTE_OFFSET(cs)
1719 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1720 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1725 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1726 Define symbol ALIAS as an alias for CHARSET.
1730 CHECK_SYMBOL (alias);
1731 charset = Fget_charset (charset);
1732 return Fputhash (alias, charset, Vcharset_hash_table);
1735 /* #### Reverse direction charsets not yet implemented. */
1737 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1739 Return the reverse-direction charset parallel to CHARSET, if any.
1740 This is the charset with the same properties (in particular, the same
1741 dimension, number of characters per dimension, and final byte) as
1742 CHARSET but whose characters are displayed in the opposite direction.
1746 charset = Fget_charset (charset);
1747 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1751 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1752 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1753 If DIRECTION is omitted, both directions will be checked (left-to-right
1754 will be returned if character sets exist for both directions).
1756 (dimension, chars, final, direction))
1758 int dm, ch, fi, di = -1;
1759 Lisp_Object obj = Qnil;
1761 CHECK_INT (dimension);
1762 dm = XINT (dimension);
1763 if (dm < 1 || dm > 2)
1764 signal_simple_error ("Invalid value for DIMENSION", dimension);
1768 if (ch != 94 && ch != 96)
1769 signal_simple_error ("Invalid value for CHARS", chars);
1771 CHECK_CHAR_COERCE_INT (final);
1773 if (fi < '0' || fi > '~')
1774 signal_simple_error ("Invalid value for FINAL", final);
1776 if (EQ (direction, Ql2r))
1777 di = CHARSET_LEFT_TO_RIGHT;
1778 else if (EQ (direction, Qr2l))
1779 di = CHARSET_RIGHT_TO_LEFT;
1780 else if (!NILP (direction))
1781 signal_simple_error ("Invalid value for DIRECTION", direction);
1783 if (dm == 2 && fi > 0x5F)
1785 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1789 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1791 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1794 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1797 return XCHARSET_NAME (obj);
1801 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1802 Return short name of CHARSET.
1806 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1809 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1810 Return long name of CHARSET.
1814 return XCHARSET_LONG_NAME (Fget_charset (charset));
1817 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1818 Return description of CHARSET.
1822 return XCHARSET_DOC_STRING (Fget_charset (charset));
1825 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1826 Return dimension of CHARSET.
1830 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1833 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1834 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1835 Recognized properties are those listed in `make-charset', as well as
1836 'name and 'doc-string.
1842 charset = Fget_charset (charset);
1843 cs = XCHARSET (charset);
1845 CHECK_SYMBOL (prop);
1846 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1847 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1848 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1849 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1850 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1851 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1852 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1853 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1854 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1855 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1856 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1857 if (EQ (prop, Qdirection))
1858 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1859 if (EQ (prop, Qreverse_direction_charset))
1861 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1862 /* #### Is this translation OK? If so, error checking sufficient? */
1863 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1865 signal_simple_error ("Unrecognized charset property name", prop);
1866 return Qnil; /* not reached */
1869 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1870 Return charset identification number of CHARSET.
1874 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1877 /* #### We need to figure out which properties we really want to
1880 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1881 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1883 (charset, ccl_program))
1885 struct ccl_program test_ccl;
1887 charset = Fget_charset (charset);
1888 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1889 signal_simple_error ("Invalid ccl-program", ccl_program);
1890 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1895 invalidate_charset_font_caches (Lisp_Object charset)
1897 /* Invalidate font cache entries for charset on all devices. */
1898 Lisp_Object devcons, concons, hash_table;
1899 DEVICE_LOOP_NO_BREAK (devcons, concons)
1901 struct device *d = XDEVICE (XCAR (devcons));
1902 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1903 if (!UNBOUNDP (hash_table))
1904 Fclrhash (hash_table);
1908 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1909 Set the 'registry property of CHARSET to REGISTRY.
1911 (charset, registry))
1913 charset = Fget_charset (charset);
1914 CHECK_STRING (registry);
1915 XCHARSET_REGISTRY (charset) = registry;
1916 invalidate_charset_font_caches (charset);
1917 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1922 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1923 Return mapping-table of CHARSET.
1927 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1930 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1931 Set mapping-table of CHARSET to TABLE.
1935 struct Lisp_Charset *cs;
1939 charset = Fget_charset (charset);
1940 cs = XCHARSET (charset);
1944 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
1945 make_vector_newer (CHARSET_DECODING_TABLE(cs));
1946 CHARSET_DECODING_TABLE(cs) = Qnil;
1949 else if (VECTORP (table))
1951 int ccs_len = CHARSET_BYTE_SIZE (cs);
1952 int ret = decoding_table_check_elements (table,
1953 CHARSET_DIMENSION (cs),
1958 signal_simple_error ("Too big table", table);
1960 signal_simple_error ("Invalid element is found", table);
1962 signal_simple_error ("Something wrong", table);
1964 CHARSET_DECODING_TABLE(cs) = Qnil;
1967 signal_error (Qwrong_type_argument,
1968 list2 (build_translated_string ("vector-or-nil-p"),
1971 byte_offset = CHARSET_BYTE_OFFSET (cs);
1972 switch (CHARSET_DIMENSION (cs))
1975 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1977 Lisp_Object c = XVECTOR_DATA(table)[i];
1980 put_char_ccs_code_point (c, charset,
1981 make_int (i + byte_offset));
1985 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1987 Lisp_Object v = XVECTOR_DATA(table)[i];
1993 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1995 Lisp_Object c = XVECTOR_DATA(v)[j];
1998 put_char_ccs_code_point
2000 make_int ( ( (i + byte_offset) << 8 )
2006 put_char_ccs_code_point (v, charset,
2007 make_int (i + byte_offset));
2016 /************************************************************************/
2017 /* Lisp primitives for working with characters */
2018 /************************************************************************/
2021 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2022 Make a character from CHARSET and code-point CODE.
2023 If DEFINED_ONLY is non-nil, builtin character is not returned.
2024 If corresponding character is not found, nil is returned.
2026 (charset, code, defined_only))
2030 charset = Fget_charset (charset);
2033 if (XCHARSET_GRAPHIC (charset) == 1)
2035 if (NILP (defined_only))
2036 c = DECODE_CHAR (charset, c);
2038 c = DECODE_DEFINED_CHAR (charset, c);
2039 return c >= 0 ? make_char (c) : Qnil;
2042 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2043 Make a builtin character from CHARSET and code-point CODE.
2049 charset = Fget_charset (charset);
2051 if (EQ (charset, Vcharset_latin_viscii))
2053 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2059 (ret = Fget_char_attribute (chr,
2060 Vcharset_latin_viscii_lower,
2063 charset = Vcharset_latin_viscii_lower;
2067 (ret = Fget_char_attribute (chr,
2068 Vcharset_latin_viscii_upper,
2071 charset = Vcharset_latin_viscii_upper;
2078 if (XCHARSET_GRAPHIC (charset) == 1)
2081 c = decode_builtin_char (charset, c);
2082 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2086 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2087 Make a character from CHARSET and octets ARG1 and ARG2.
2088 ARG2 is required only for characters from two-dimensional charsets.
2089 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2090 character s with caron.
2092 (charset, arg1, arg2))
2096 int lowlim, highlim;
2098 charset = Fget_charset (charset);
2099 cs = XCHARSET (charset);
2101 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2102 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2104 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2106 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2107 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2110 /* It is useful (and safe, according to Olivier Galibert) to strip
2111 the 8th bit off ARG1 and ARG2 because it allows programmers to
2112 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2113 Latin 2 code of the character. */
2121 if (a1 < lowlim || a1 > highlim)
2122 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2124 if (CHARSET_DIMENSION (cs) == 1)
2128 ("Charset is of dimension one; second octet must be nil", arg2);
2129 return make_char (MAKE_CHAR (charset, a1, 0));
2138 a2 = XINT (arg2) & 0x7f;
2140 if (a2 < lowlim || a2 > highlim)
2141 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2143 return make_char (MAKE_CHAR (charset, a1, a2));
2146 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2147 Return the character set of CHARACTER.
2151 CHECK_CHAR_COERCE_INT (character);
2153 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2156 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2157 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2158 N defaults to 0 if omitted.
2162 Lisp_Object charset;
2165 CHECK_CHAR_COERCE_INT (character);
2167 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2169 if (NILP (n) || EQ (n, Qzero))
2170 return make_int (octet0);
2171 else if (EQ (n, make_int (1)))
2172 return make_int (octet1);
2174 signal_simple_error ("Octet number must be 0 or 1", n);
2177 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2178 Return list of charset and one or two position-codes of CHARACTER.
2182 /* This function can GC */
2183 struct gcpro gcpro1, gcpro2;
2184 Lisp_Object charset = Qnil;
2185 Lisp_Object rc = Qnil;
2193 GCPRO2 (charset, rc);
2194 CHECK_CHAR_COERCE_INT (character);
2197 code_point = ENCODE_CHAR (XCHAR (character), charset);
2198 dimension = XCHARSET_DIMENSION (charset);
2199 while (dimension > 0)
2201 rc = Fcons (make_int (code_point & 255), rc);
2205 rc = Fcons (XCHARSET_NAME (charset), rc);
2207 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2209 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2211 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2215 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2224 #ifdef ENABLE_COMPOSITE_CHARS
2225 /************************************************************************/
2226 /* composite character functions */
2227 /************************************************************************/
2230 lookup_composite_char (Bufbyte *str, int len)
2232 Lisp_Object lispstr = make_string (str, len);
2233 Lisp_Object ch = Fgethash (lispstr,
2234 Vcomposite_char_string2char_hash_table,
2240 if (composite_char_row_next >= 128)
2241 signal_simple_error ("No more composite chars available", lispstr);
2242 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2243 composite_char_col_next);
2244 Fputhash (make_char (emch), lispstr,
2245 Vcomposite_char_char2string_hash_table);
2246 Fputhash (lispstr, make_char (emch),
2247 Vcomposite_char_string2char_hash_table);
2248 composite_char_col_next++;
2249 if (composite_char_col_next >= 128)
2251 composite_char_col_next = 32;
2252 composite_char_row_next++;
2261 composite_char_string (Emchar ch)
2263 Lisp_Object str = Fgethash (make_char (ch),
2264 Vcomposite_char_char2string_hash_table,
2266 assert (!UNBOUNDP (str));
2270 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2271 Convert a string into a single composite character.
2272 The character is the result of overstriking all the characters in
2277 CHECK_STRING (string);
2278 return make_char (lookup_composite_char (XSTRING_DATA (string),
2279 XSTRING_LENGTH (string)));
2282 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2283 Return a string of the characters comprising a composite character.
2291 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2292 signal_simple_error ("Must be composite char", ch);
2293 return composite_char_string (emch);
2295 #endif /* ENABLE_COMPOSITE_CHARS */
2298 /************************************************************************/
2299 /* initialization */
2300 /************************************************************************/
2303 syms_of_mule_charset (void)
2305 INIT_LRECORD_IMPLEMENTATION (charset);
2307 DEFSUBR (Fcharsetp);
2308 DEFSUBR (Ffind_charset);
2309 DEFSUBR (Fget_charset);
2310 DEFSUBR (Fcharset_list);
2311 DEFSUBR (Fcharset_name);
2312 DEFSUBR (Fmake_charset);
2313 DEFSUBR (Fmake_reverse_direction_charset);
2314 /* DEFSUBR (Freverse_direction_charset); */
2315 DEFSUBR (Fdefine_charset_alias);
2316 DEFSUBR (Fcharset_from_attributes);
2317 DEFSUBR (Fcharset_short_name);
2318 DEFSUBR (Fcharset_long_name);
2319 DEFSUBR (Fcharset_description);
2320 DEFSUBR (Fcharset_dimension);
2321 DEFSUBR (Fcharset_property);
2322 DEFSUBR (Fcharset_id);
2323 DEFSUBR (Fset_charset_ccl_program);
2324 DEFSUBR (Fset_charset_registry);
2326 DEFSUBR (Fcharset_mapping_table);
2327 DEFSUBR (Fset_charset_mapping_table);
2331 DEFSUBR (Fdecode_char);
2332 DEFSUBR (Fdecode_builtin_char);
2334 DEFSUBR (Fmake_char);
2335 DEFSUBR (Fchar_charset);
2336 DEFSUBR (Fchar_octet);
2337 DEFSUBR (Fsplit_char);
2339 #ifdef ENABLE_COMPOSITE_CHARS
2340 DEFSUBR (Fmake_composite_char);
2341 DEFSUBR (Fcomposite_char_string);
2344 defsymbol (&Qcharsetp, "charsetp");
2345 defsymbol (&Qregistry, "registry");
2346 defsymbol (&Qfinal, "final");
2347 defsymbol (&Qgraphic, "graphic");
2348 defsymbol (&Qdirection, "direction");
2349 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2350 defsymbol (&Qshort_name, "short-name");
2351 defsymbol (&Qlong_name, "long-name");
2353 defsymbol (&Ql2r, "l2r");
2354 defsymbol (&Qr2l, "r2l");
2356 /* Charsets, compatible with FSF 20.3
2357 Naming convention is Script-Charset[-Edition] */
2358 defsymbol (&Qascii, "ascii");
2359 defsymbol (&Qcontrol_1, "control-1");
2360 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2361 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2362 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2363 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2364 defsymbol (&Qthai_tis620, "thai-tis620");
2365 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2366 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2367 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2368 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2369 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2370 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2371 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2372 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2373 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2374 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2375 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2376 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2377 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2378 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2379 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2380 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2382 defsymbol (&Qucs, "ucs");
2383 defsymbol (&Qucs_bmp, "ucs-bmp");
2384 defsymbol (&Qucs_cns, "ucs-cns");
2385 defsymbol (&Qucs_jis, "ucs-jis");
2386 defsymbol (&Qucs_ks, "ucs-ks");
2387 defsymbol (&Qucs_big5, "ucs-big5");
2388 defsymbol (&Qlatin_viscii, "latin-viscii");
2389 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2390 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2391 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2392 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2393 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2394 defsymbol (&Qideograph_gt, "ideograph-gt");
2395 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2396 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2397 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2398 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2399 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2400 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2401 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2402 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2403 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2404 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2405 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2406 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2407 defsymbol (&Qchinese_big5, "chinese-big5");
2408 defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp");
2409 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2410 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2411 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2412 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2413 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2414 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2415 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2416 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2417 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2418 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2419 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2420 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2421 defsymbol (&Qchina3_jef, "china3-jef");
2422 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2423 defsymbol (&Qmojikyo, "mojikyo");
2424 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
2425 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2426 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2427 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2428 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2429 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2430 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2431 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2432 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2433 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2434 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2435 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2436 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2437 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2438 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2439 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2440 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2441 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2442 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2443 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2444 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2445 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2446 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2448 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2449 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2451 defsymbol (&Qcomposite, "composite");
2455 vars_of_mule_charset (void)
2462 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2463 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2465 /* Table of charsets indexed by leading byte. */
2466 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2467 chlook->charset_by_leading_byte[i] = Qnil;
2470 /* Table of charsets indexed by type/final-byte. */
2471 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2472 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2473 chlook->charset_by_attributes[i][j] = Qnil;
2475 /* Table of charsets indexed by type/final-byte/direction. */
2476 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2477 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2478 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2479 chlook->charset_by_attributes[i][j][k] = Qnil;
2483 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2485 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2486 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2490 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2491 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2492 Leading-code of private TYPE9N charset of column-width 1.
2494 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2498 Vdefault_coded_charset_priority_list = Qnil;
2499 DEFVAR_LISP ("default-coded-charset-priority-list",
2500 &Vdefault_coded_charset_priority_list /*
2501 Default order of preferred coded-character-sets.
2507 complex_vars_of_mule_charset (void)
2509 staticpro (&Vcharset_hash_table);
2510 Vcharset_hash_table =
2511 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2513 /* Predefined character sets. We store them into variables for
2517 staticpro (&Vcharset_ucs);
2519 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2520 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2521 build_string ("UCS"),
2522 build_string ("UCS"),
2523 build_string ("ISO/IEC 10646"),
2525 Qnil, 0, 0xFFFFFFF, 0, 0);
2526 staticpro (&Vcharset_ucs_bmp);
2528 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2529 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2530 build_string ("BMP"),
2531 build_string ("BMP"),
2532 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2533 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2534 Qnil, 0, 0xFFFF, 0, 0);
2535 staticpro (&Vcharset_ucs_cns);
2537 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2538 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2539 build_string ("UCS for CNS"),
2540 build_string ("UCS for CNS 11643"),
2541 build_string ("ISO/IEC 10646 for CNS 11643"),
2544 staticpro (&Vcharset_ucs_jis);
2546 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2547 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2548 build_string ("UCS for JIS"),
2549 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2550 build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2553 staticpro (&Vcharset_ucs_ks);
2555 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2556 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2557 build_string ("UCS for KS"),
2558 build_string ("UCS for CCS defined by KS"),
2559 build_string ("ISO/IEC 10646 for Korean Standards"),
2562 staticpro (&Vcharset_ucs_big5);
2564 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2565 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2566 build_string ("UCS for Big5"),
2567 build_string ("UCS for Big5"),
2568 build_string ("ISO/IEC 10646 for Big5"),
2572 # define MIN_CHAR_THAI 0
2573 # define MAX_CHAR_THAI 0
2574 /* # define MIN_CHAR_HEBREW 0 */
2575 /* # define MAX_CHAR_HEBREW 0 */
2576 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2577 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2579 staticpro (&Vcharset_ascii);
2581 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2582 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2583 build_string ("ASCII"),
2584 build_string ("ASCII)"),
2585 build_string ("ASCII (ISO646 IRV)"),
2586 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2587 Qnil, 0, 0x7F, 0, 0);
2588 staticpro (&Vcharset_control_1);
2589 Vcharset_control_1 =
2590 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2591 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2592 build_string ("C1"),
2593 build_string ("Control characters"),
2594 build_string ("Control characters 128-191"),
2596 Qnil, 0x80, 0x9F, 0, 0);
2597 staticpro (&Vcharset_latin_iso8859_1);
2598 Vcharset_latin_iso8859_1 =
2599 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2600 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2601 build_string ("Latin-1"),
2602 build_string ("ISO8859-1 (Latin-1)"),
2603 build_string ("ISO8859-1 (Latin-1)"),
2604 build_string ("iso8859-1"),
2605 Qnil, 0xA0, 0xFF, 0, 32);
2606 staticpro (&Vcharset_latin_iso8859_2);
2607 Vcharset_latin_iso8859_2 =
2608 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2609 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2610 build_string ("Latin-2"),
2611 build_string ("ISO8859-2 (Latin-2)"),
2612 build_string ("ISO8859-2 (Latin-2)"),
2613 build_string ("iso8859-2"),
2615 staticpro (&Vcharset_latin_iso8859_3);
2616 Vcharset_latin_iso8859_3 =
2617 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2618 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2619 build_string ("Latin-3"),
2620 build_string ("ISO8859-3 (Latin-3)"),
2621 build_string ("ISO8859-3 (Latin-3)"),
2622 build_string ("iso8859-3"),
2624 staticpro (&Vcharset_latin_iso8859_4);
2625 Vcharset_latin_iso8859_4 =
2626 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2627 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2628 build_string ("Latin-4"),
2629 build_string ("ISO8859-4 (Latin-4)"),
2630 build_string ("ISO8859-4 (Latin-4)"),
2631 build_string ("iso8859-4"),
2633 staticpro (&Vcharset_thai_tis620);
2634 Vcharset_thai_tis620 =
2635 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2636 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2637 build_string ("TIS620"),
2638 build_string ("TIS620 (Thai)"),
2639 build_string ("TIS620.2529 (Thai)"),
2640 build_string ("tis620"),
2641 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2642 staticpro (&Vcharset_greek_iso8859_7);
2643 Vcharset_greek_iso8859_7 =
2644 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2645 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2646 build_string ("ISO8859-7"),
2647 build_string ("ISO8859-7 (Greek)"),
2648 build_string ("ISO8859-7 (Greek)"),
2649 build_string ("iso8859-7"),
2651 staticpro (&Vcharset_arabic_iso8859_6);
2652 Vcharset_arabic_iso8859_6 =
2653 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2654 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2655 build_string ("ISO8859-6"),
2656 build_string ("ISO8859-6 (Arabic)"),
2657 build_string ("ISO8859-6 (Arabic)"),
2658 build_string ("iso8859-6"),
2660 staticpro (&Vcharset_hebrew_iso8859_8);
2661 Vcharset_hebrew_iso8859_8 =
2662 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2663 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2664 build_string ("ISO8859-8"),
2665 build_string ("ISO8859-8 (Hebrew)"),
2666 build_string ("ISO8859-8 (Hebrew)"),
2667 build_string ("iso8859-8"),
2669 0 /* MIN_CHAR_HEBREW */,
2670 0 /* MAX_CHAR_HEBREW */, 0, 32);
2671 staticpro (&Vcharset_katakana_jisx0201);
2672 Vcharset_katakana_jisx0201 =
2673 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2674 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2675 build_string ("JISX0201 Kana"),
2676 build_string ("JISX0201.1976 (Japanese Kana)"),
2677 build_string ("JISX0201.1976 Japanese Kana"),
2678 build_string ("jisx0201\\.1976"),
2680 staticpro (&Vcharset_latin_jisx0201);
2681 Vcharset_latin_jisx0201 =
2682 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2683 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2684 build_string ("JISX0201 Roman"),
2685 build_string ("JISX0201.1976 (Japanese Roman)"),
2686 build_string ("JISX0201.1976 Japanese Roman"),
2687 build_string ("jisx0201\\.1976"),
2689 staticpro (&Vcharset_cyrillic_iso8859_5);
2690 Vcharset_cyrillic_iso8859_5 =
2691 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2692 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2693 build_string ("ISO8859-5"),
2694 build_string ("ISO8859-5 (Cyrillic)"),
2695 build_string ("ISO8859-5 (Cyrillic)"),
2696 build_string ("iso8859-5"),
2698 staticpro (&Vcharset_latin_iso8859_9);
2699 Vcharset_latin_iso8859_9 =
2700 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2701 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2702 build_string ("Latin-5"),
2703 build_string ("ISO8859-9 (Latin-5)"),
2704 build_string ("ISO8859-9 (Latin-5)"),
2705 build_string ("iso8859-9"),
2707 staticpro (&Vcharset_japanese_jisx0208_1978);
2708 Vcharset_japanese_jisx0208_1978 =
2709 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2710 Qjapanese_jisx0208_1978, 94, 2,
2711 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2712 build_string ("JIS X0208:1978"),
2713 build_string ("JIS X0208:1978 (Japanese)"),
2715 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2716 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2718 staticpro (&Vcharset_chinese_gb2312);
2719 Vcharset_chinese_gb2312 =
2720 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2721 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2722 build_string ("GB2312"),
2723 build_string ("GB2312)"),
2724 build_string ("GB2312 Chinese simplified"),
2725 build_string ("gb2312"),
2727 staticpro (&Vcharset_chinese_gb12345);
2728 Vcharset_chinese_gb12345 =
2729 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2730 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2731 build_string ("G1"),
2732 build_string ("GB 12345)"),
2733 build_string ("GB 12345-1990"),
2734 build_string ("GB12345\\(\\.1990\\)?-0"),
2736 staticpro (&Vcharset_japanese_jisx0208);
2737 Vcharset_japanese_jisx0208 =
2738 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2739 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2740 build_string ("JISX0208"),
2741 build_string ("JIS X0208:1983 (Japanese)"),
2742 build_string ("JIS X0208:1983 Japanese Kanji"),
2743 build_string ("jisx0208\\.1983"),
2746 staticpro (&Vcharset_japanese_jisx0208_1990);
2747 Vcharset_japanese_jisx0208_1990 =
2748 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2749 Qjapanese_jisx0208_1990, 94, 2,
2750 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2751 build_string ("JISX0208-1990"),
2752 build_string ("JIS X0208:1990 (Japanese)"),
2753 build_string ("JIS X0208:1990 Japanese Kanji"),
2754 build_string ("jisx0208\\.1990"),
2756 MIN_CHAR_JIS_X0208_1990,
2757 MAX_CHAR_JIS_X0208_1990, 0, 33);
2759 staticpro (&Vcharset_korean_ksc5601);
2760 Vcharset_korean_ksc5601 =
2761 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2762 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2763 build_string ("KSC5601"),
2764 build_string ("KSC5601 (Korean"),
2765 build_string ("KSC5601 Korean Hangul and Hanja"),
2766 build_string ("ksc5601"),
2768 staticpro (&Vcharset_japanese_jisx0212);
2769 Vcharset_japanese_jisx0212 =
2770 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2771 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2772 build_string ("JISX0212"),
2773 build_string ("JISX0212 (Japanese)"),
2774 build_string ("JISX0212 Japanese Supplement"),
2775 build_string ("jisx0212"),
2778 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2779 staticpro (&Vcharset_chinese_cns11643_1);
2780 Vcharset_chinese_cns11643_1 =
2781 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2782 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2783 build_string ("CNS11643-1"),
2784 build_string ("CNS11643-1 (Chinese traditional)"),
2786 ("CNS 11643 Plane 1 Chinese traditional"),
2787 build_string (CHINESE_CNS_PLANE_RE("1")),
2789 staticpro (&Vcharset_chinese_cns11643_2);
2790 Vcharset_chinese_cns11643_2 =
2791 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2792 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2793 build_string ("CNS11643-2"),
2794 build_string ("CNS11643-2 (Chinese traditional)"),
2796 ("CNS 11643 Plane 2 Chinese traditional"),
2797 build_string (CHINESE_CNS_PLANE_RE("2")),
2800 staticpro (&Vcharset_latin_tcvn5712);
2801 Vcharset_latin_tcvn5712 =
2802 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2803 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2804 build_string ("TCVN 5712"),
2805 build_string ("TCVN 5712 (VSCII-2)"),
2806 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2807 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2809 staticpro (&Vcharset_latin_viscii_lower);
2810 Vcharset_latin_viscii_lower =
2811 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2812 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2813 build_string ("VISCII lower"),
2814 build_string ("VISCII lower (Vietnamese)"),
2815 build_string ("VISCII lower (Vietnamese)"),
2816 build_string ("MULEVISCII-LOWER"),
2818 staticpro (&Vcharset_latin_viscii_upper);
2819 Vcharset_latin_viscii_upper =
2820 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2821 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2822 build_string ("VISCII upper"),
2823 build_string ("VISCII upper (Vietnamese)"),
2824 build_string ("VISCII upper (Vietnamese)"),
2825 build_string ("MULEVISCII-UPPER"),
2827 staticpro (&Vcharset_latin_viscii);
2828 Vcharset_latin_viscii =
2829 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2830 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2831 build_string ("VISCII"),
2832 build_string ("VISCII 1.1 (Vietnamese)"),
2833 build_string ("VISCII 1.1 (Vietnamese)"),
2834 build_string ("VISCII1\\.1"),
2836 staticpro (&Vcharset_chinese_big5);
2837 Vcharset_chinese_big5 =
2838 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2839 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2840 build_string ("Big5"),
2841 build_string ("Big5"),
2842 build_string ("Big5 Chinese traditional"),
2843 build_string ("big5"),
2845 0 /* MIN_CHAR_BIG5_CDP */,
2846 0 /* MAX_CHAR_BIG5_CDP */, 0, 0);
2847 staticpro (&Vcharset_chinese_big5_cdp);
2848 Vcharset_chinese_big5_cdp =
2849 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2850 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2851 build_string ("Big5-CDP"),
2852 build_string ("Big5 + CDP extension"),
2853 build_string ("Big5 with CDP extension"),
2854 build_string ("big5\\.cdp-0"),
2855 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP, 0, 0);
2856 #define DEF_HANZIKU(n) \
2857 staticpro (&Vcharset_ideograph_hanziku_##n); \
2858 Vcharset_ideograph_hanziku_##n = \
2859 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
2860 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
2861 build_string ("HZK-"#n), \
2862 build_string ("HANZIKU-"#n), \
2863 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
2865 ("hanziku-"#n"$"), \
2866 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, 0, 0);
2879 staticpro (&Vcharset_china3_jef);
2880 Vcharset_china3_jef =
2881 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
2882 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2883 build_string ("JC3"),
2884 build_string ("JEF + CHINA3"),
2885 build_string ("JEF + CHINA3 private characters"),
2886 build_string ("china3jef-0"),
2887 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF, 0, 0);
2888 staticpro (&Vcharset_ideograph_cbeta);
2889 Vcharset_ideograph_cbeta =
2890 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
2891 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2892 build_string ("CB"),
2893 build_string ("CBETA"),
2894 build_string ("CBETA private characters"),
2895 build_string ("cbeta-0"),
2896 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA, 0, 0);
2897 staticpro (&Vcharset_ideograph_gt);
2898 Vcharset_ideograph_gt =
2899 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
2900 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2901 build_string ("GT"),
2902 build_string ("GT"),
2903 build_string ("GT"),
2905 Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0);
2906 #define DEF_GT_PJ(n) \
2907 staticpro (&Vcharset_ideograph_gt_pj_##n); \
2908 Vcharset_ideograph_gt_pj_##n = \
2909 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
2910 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2911 build_string ("GT-PJ-"#n), \
2912 build_string ("GT (pseudo JIS encoding) part "#n), \
2913 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
2915 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
2929 staticpro (&Vcharset_ideograph_daikanwa);
2930 Vcharset_ideograph_daikanwa =
2931 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
2932 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2933 build_string ("Daikanwa"),
2934 build_string ("Morohashi's Daikanwa"),
2935 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
2936 build_string ("Daikanwa"),
2937 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
2938 staticpro (&Vcharset_mojikyo);
2940 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
2941 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2942 build_string ("Mojikyo"),
2943 build_string ("Mojikyo"),
2944 build_string ("Konjaku-Mojikyo"),
2946 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
2947 staticpro (&Vcharset_mojikyo_2022_1);
2948 Vcharset_mojikyo_2022_1 =
2949 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
2950 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
2951 build_string ("Mojikyo-2022-1"),
2952 build_string ("Mojikyo ISO-2022 Part 1"),
2953 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
2957 #define DEF_MOJIKYO_PJ(n) \
2958 staticpro (&Vcharset_mojikyo_pj_##n); \
2959 Vcharset_mojikyo_pj_##n = \
2960 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
2961 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2962 build_string ("Mojikyo-PJ-"#n), \
2963 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
2965 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
2967 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
2979 DEF_MOJIKYO_PJ (10);
2980 DEF_MOJIKYO_PJ (11);
2981 DEF_MOJIKYO_PJ (12);
2982 DEF_MOJIKYO_PJ (13);
2983 DEF_MOJIKYO_PJ (14);
2984 DEF_MOJIKYO_PJ (15);
2985 DEF_MOJIKYO_PJ (16);
2986 DEF_MOJIKYO_PJ (17);
2987 DEF_MOJIKYO_PJ (18);
2988 DEF_MOJIKYO_PJ (19);
2989 DEF_MOJIKYO_PJ (20);
2990 DEF_MOJIKYO_PJ (21);
2992 staticpro (&Vcharset_ethiopic_ucs);
2993 Vcharset_ethiopic_ucs =
2994 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
2995 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2996 build_string ("Ethiopic (UCS)"),
2997 build_string ("Ethiopic (UCS)"),
2998 build_string ("Ethiopic of UCS"),
2999 build_string ("Ethiopic-Unicode"),
3000 Qnil, 0x1200, 0x137F, 0x1200, 0);
3002 staticpro (&Vcharset_chinese_big5_1);
3003 Vcharset_chinese_big5_1 =
3004 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
3005 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
3006 build_string ("Big5"),
3007 build_string ("Big5 (Level-1)"),
3009 ("Big5 Level-1 Chinese traditional"),
3010 build_string ("big5"),
3012 staticpro (&Vcharset_chinese_big5_2);
3013 Vcharset_chinese_big5_2 =
3014 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3015 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3016 build_string ("Big5"),
3017 build_string ("Big5 (Level-2)"),
3019 ("Big5 Level-2 Chinese traditional"),
3020 build_string ("big5"),
3023 #ifdef ENABLE_COMPOSITE_CHARS
3024 /* #### For simplicity, we put composite chars into a 96x96 charset.
3025 This is going to lead to problems because you can run out of
3026 room, esp. as we don't yet recycle numbers. */
3027 staticpro (&Vcharset_composite);
3028 Vcharset_composite =
3029 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3030 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3031 build_string ("Composite"),
3032 build_string ("Composite characters"),
3033 build_string ("Composite characters"),
3036 /* #### not dumped properly */
3037 composite_char_row_next = 32;
3038 composite_char_col_next = 32;
3040 Vcomposite_char_string2char_hash_table =
3041 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3042 Vcomposite_char_char2string_hash_table =
3043 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3044 staticpro (&Vcomposite_char_string2char_hash_table);
3045 staticpro (&Vcomposite_char_char2string_hash_table);
3046 #endif /* ENABLE_COMPOSITE_CHARS */