1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_cns;
70 Lisp_Object Vcharset_ucs_jis;
71 Lisp_Object Vcharset_ucs_big5;
72 Lisp_Object Vcharset_latin_viscii;
73 Lisp_Object Vcharset_latin_tcvn5712;
74 Lisp_Object Vcharset_latin_viscii_lower;
75 Lisp_Object Vcharset_latin_viscii_upper;
76 Lisp_Object Vcharset_chinese_big5;
77 Lisp_Object Vcharset_chinese_big5_cdp;
78 Lisp_Object Vcharset_ideograph_hanziku_1;
79 Lisp_Object Vcharset_ideograph_hanziku_2;
80 Lisp_Object Vcharset_ideograph_hanziku_3;
81 Lisp_Object Vcharset_ideograph_hanziku_4;
82 Lisp_Object Vcharset_ideograph_hanziku_5;
83 Lisp_Object Vcharset_ideograph_hanziku_6;
84 Lisp_Object Vcharset_ideograph_hanziku_7;
85 Lisp_Object Vcharset_ideograph_hanziku_8;
86 Lisp_Object Vcharset_ideograph_hanziku_9;
87 Lisp_Object Vcharset_ideograph_hanziku_10;
88 Lisp_Object Vcharset_ideograph_hanziku_11;
89 Lisp_Object Vcharset_ideograph_hanziku_12;
90 Lisp_Object Vcharset_china3_jef;
91 Lisp_Object Vcharset_ideograph_cbeta;
92 Lisp_Object Vcharset_ideograph_gt;
93 Lisp_Object Vcharset_ideograph_gt_pj_1;
94 Lisp_Object Vcharset_ideograph_gt_pj_2;
95 Lisp_Object Vcharset_ideograph_gt_pj_3;
96 Lisp_Object Vcharset_ideograph_gt_pj_4;
97 Lisp_Object Vcharset_ideograph_gt_pj_5;
98 Lisp_Object Vcharset_ideograph_gt_pj_6;
99 Lisp_Object Vcharset_ideograph_gt_pj_7;
100 Lisp_Object Vcharset_ideograph_gt_pj_8;
101 Lisp_Object Vcharset_ideograph_gt_pj_9;
102 Lisp_Object Vcharset_ideograph_gt_pj_10;
103 Lisp_Object Vcharset_ideograph_gt_pj_11;
104 Lisp_Object Vcharset_ideograph_daikanwa;
105 Lisp_Object Vcharset_mojikyo;
106 Lisp_Object Vcharset_mojikyo_2022_1;
107 Lisp_Object Vcharset_mojikyo_pj_1;
108 Lisp_Object Vcharset_mojikyo_pj_2;
109 Lisp_Object Vcharset_mojikyo_pj_3;
110 Lisp_Object Vcharset_mojikyo_pj_4;
111 Lisp_Object Vcharset_mojikyo_pj_5;
112 Lisp_Object Vcharset_mojikyo_pj_6;
113 Lisp_Object Vcharset_mojikyo_pj_7;
114 Lisp_Object Vcharset_mojikyo_pj_8;
115 Lisp_Object Vcharset_mojikyo_pj_9;
116 Lisp_Object Vcharset_mojikyo_pj_10;
117 Lisp_Object Vcharset_mojikyo_pj_11;
118 Lisp_Object Vcharset_mojikyo_pj_12;
119 Lisp_Object Vcharset_mojikyo_pj_13;
120 Lisp_Object Vcharset_mojikyo_pj_14;
121 Lisp_Object Vcharset_mojikyo_pj_15;
122 Lisp_Object Vcharset_mojikyo_pj_16;
123 Lisp_Object Vcharset_mojikyo_pj_17;
124 Lisp_Object Vcharset_mojikyo_pj_18;
125 Lisp_Object Vcharset_mojikyo_pj_19;
126 Lisp_Object Vcharset_mojikyo_pj_20;
127 Lisp_Object Vcharset_mojikyo_pj_21;
128 Lisp_Object Vcharset_ethiopic_ucs;
130 Lisp_Object Vcharset_chinese_big5_1;
131 Lisp_Object Vcharset_chinese_big5_2;
133 #ifdef ENABLE_COMPOSITE_CHARS
134 Lisp_Object Vcharset_composite;
136 /* Hash tables for composite chars. One maps string representing
137 composed chars to their equivalent chars; one goes the
139 Lisp_Object Vcomposite_char_char2string_hash_table;
140 Lisp_Object Vcomposite_char_string2char_hash_table;
142 static int composite_char_row_next;
143 static int composite_char_col_next;
145 #endif /* ENABLE_COMPOSITE_CHARS */
147 struct charset_lookup *chlook;
149 static const struct lrecord_description charset_lookup_description_1[] = {
150 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
159 static const struct struct_description charset_lookup_description = {
160 sizeof (struct charset_lookup),
161 charset_lookup_description_1
165 /* Table of number of bytes in the string representation of a character
166 indexed by the first byte of that representation.
168 rep_bytes_by_first_byte(c) is more efficient than the equivalent
169 canonical computation:
171 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
173 const Bytecount rep_bytes_by_first_byte[0xA0] =
174 { /* 0x00 - 0x7f are for straight ASCII */
175 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
176 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
177 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
178 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
179 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
180 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
181 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
182 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
183 /* 0x80 - 0x8f are for Dimension-1 official charsets */
185 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
187 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
189 /* 0x90 - 0x9d are for Dimension-2 official charsets */
190 /* 0x9e is for Dimension-1 private charsets */
191 /* 0x9f is for Dimension-2 private charsets */
192 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
198 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
200 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
202 /* ad-hoc method for `ascii' */
203 if ((CHARSET_CHARS (cs) == 94) &&
204 (CHARSET_BYTE_OFFSET (cs) != 33))
205 return 128 - CHARSET_BYTE_OFFSET (cs);
207 return CHARSET_CHARS (cs);
210 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
212 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
214 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
218 if (XVECTOR_LENGTH (v) > ccs_len)
221 for (i = 0; i < XVECTOR_LENGTH (v); i++)
223 Lisp_Object c = XVECTOR_DATA(v)[i];
225 if (!NILP (c) && !CHARP (c))
229 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
241 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
244 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
254 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
255 nv = XVECTOR_DATA(v)[i];
261 XVECTOR_DATA(v)[i] = Qnil;
265 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
266 int code_point, Lisp_Object character);
268 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
269 int code_point, Lisp_Object character)
273 int ccs_len = XVECTOR_LENGTH (v);
278 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
279 nv = XVECTOR_DATA(v)[i];
283 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
289 XVECTOR_DATA(v)[i] = character;
293 put_char_ccs_code_point (Lisp_Object character,
294 Lisp_Object ccs, Lisp_Object value)
296 Lisp_Object encoding_table;
298 if (!EQ (XCHARSET_NAME (ccs), Qucs)
299 || (XCHAR (character) != XINT (value)))
301 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
302 int dim = XCHARSET_DIMENSION (ccs);
303 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
304 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
308 { /* obsolete representation: value must be a list of bytes */
309 Lisp_Object ret = Fcar (value);
313 signal_simple_error ("Invalid value for coded-charset", value);
314 code_point = XINT (ret);
315 if (XCHARSET_GRAPHIC (ccs) == 1)
323 signal_simple_error ("Invalid value for coded-charset",
327 signal_simple_error ("Invalid value for coded-charset",
330 if (XCHARSET_GRAPHIC (ccs) == 1)
332 code_point = (code_point << 8) | j;
335 value = make_int (code_point);
337 else if (INTP (value))
339 code_point = XINT (value);
340 if (XCHARSET_GRAPHIC (ccs) == 1)
342 code_point &= 0x7F7F7F7F;
343 value = make_int (code_point);
347 signal_simple_error ("Invalid value for coded-charset", value);
351 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
354 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
359 XCHARSET_DECODING_TABLE (ccs)
360 = v = make_older_vector (ccs_len, Qnil);
363 decoding_table_put_char (v, dim, byte_offset, code_point, character);
365 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
367 XCHARSET_ENCODING_TABLE (ccs)
368 = encoding_table = make_char_id_table (Qnil);
370 put_char_id_table (XCHAR_TABLE(encoding_table), character, value);
375 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
377 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
378 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
380 if (VECTORP (decoding_table))
382 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
386 decoding_table_remove_char (decoding_table,
387 XCHARSET_DIMENSION (ccs),
388 XCHARSET_BYTE_OFFSET (ccs),
392 if (CHAR_TABLEP (encoding_table))
394 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qnil);
402 int leading_code_private_11;
405 Lisp_Object Qcharsetp;
407 /* Qdoc_string, Qdimension, Qchars defined in general.c */
408 Lisp_Object Qregistry, Qfinal, Qgraphic;
409 Lisp_Object Qdirection;
410 Lisp_Object Qreverse_direction_charset;
411 Lisp_Object Qleading_byte;
412 Lisp_Object Qshort_name, Qlong_name;
428 Qjapanese_jisx0208_1978,
432 Qjapanese_jisx0208_1990,
447 Qvietnamese_viscii_lower,
448 Qvietnamese_viscii_upper,
451 Qideograph_hanziku_1,
452 Qideograph_hanziku_2,
453 Qideograph_hanziku_3,
454 Qideograph_hanziku_4,
455 Qideograph_hanziku_5,
456 Qideograph_hanziku_6,
457 Qideograph_hanziku_7,
458 Qideograph_hanziku_8,
459 Qideograph_hanziku_9,
460 Qideograph_hanziku_10,
461 Qideograph_hanziku_11,
462 Qideograph_hanziku_12,
507 Lisp_Object Ql2r, Qr2l;
509 Lisp_Object Vcharset_hash_table;
511 /* Composite characters are characters constructed by overstriking two
512 or more regular characters.
514 1) The old Mule implementation involves storing composite characters
515 in a buffer as a tag followed by all of the actual characters
516 used to make up the composite character. I think this is a bad
517 idea; it greatly complicates code that wants to handle strings
518 one character at a time because it has to deal with the possibility
519 of great big ungainly characters. It's much more reasonable to
520 simply store an index into a table of composite characters.
522 2) The current implementation only allows for 16,384 separate
523 composite characters over the lifetime of the XEmacs process.
524 This could become a potential problem if the user
525 edited lots of different files that use composite characters.
526 Due to FSF bogosity, increasing the number of allowable
527 composite characters under Mule would decrease the number
528 of possible faces that can exist. Mule already has shrunk
529 this to 2048, and further shrinkage would become uncomfortable.
530 No such problems exist in XEmacs.
532 Composite characters could be represented as 0x80 C1 C2 C3,
533 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
534 for slightly under 2^20 (one million) composite characters
535 over the XEmacs process lifetime, and you only need to
536 increase the size of a Mule character from 19 to 21 bits.
537 Or you could use 0x80 C1 C2 C3 C4, allowing for about
538 85 million (slightly over 2^26) composite characters. */
541 /************************************************************************/
542 /* Basic Emchar functions */
543 /************************************************************************/
545 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
546 string in STR. Returns the number of bytes stored.
547 Do not call this directly. Use the macro set_charptr_emchar() instead.
551 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
566 else if ( c <= 0x7ff )
568 *p++ = (c >> 6) | 0xc0;
569 *p++ = (c & 0x3f) | 0x80;
571 else if ( c <= 0xffff )
573 *p++ = (c >> 12) | 0xe0;
574 *p++ = ((c >> 6) & 0x3f) | 0x80;
575 *p++ = (c & 0x3f) | 0x80;
577 else if ( c <= 0x1fffff )
579 *p++ = (c >> 18) | 0xf0;
580 *p++ = ((c >> 12) & 0x3f) | 0x80;
581 *p++ = ((c >> 6) & 0x3f) | 0x80;
582 *p++ = (c & 0x3f) | 0x80;
584 else if ( c <= 0x3ffffff )
586 *p++ = (c >> 24) | 0xf8;
587 *p++ = ((c >> 18) & 0x3f) | 0x80;
588 *p++ = ((c >> 12) & 0x3f) | 0x80;
589 *p++ = ((c >> 6) & 0x3f) | 0x80;
590 *p++ = (c & 0x3f) | 0x80;
594 *p++ = (c >> 30) | 0xfc;
595 *p++ = ((c >> 24) & 0x3f) | 0x80;
596 *p++ = ((c >> 18) & 0x3f) | 0x80;
597 *p++ = ((c >> 12) & 0x3f) | 0x80;
598 *p++ = ((c >> 6) & 0x3f) | 0x80;
599 *p++ = (c & 0x3f) | 0x80;
602 BREAKUP_CHAR (c, charset, c1, c2);
603 lb = CHAR_LEADING_BYTE (c);
604 if (LEADING_BYTE_PRIVATE_P (lb))
605 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
607 if (EQ (charset, Vcharset_control_1))
616 /* Return the first character from a Mule-encoded string in STR,
617 assuming it's non-ASCII. Do not call this directly.
618 Use the macro charptr_emchar() instead. */
621 non_ascii_charptr_emchar (const Bufbyte *str)
634 else if ( b >= 0xf8 )
639 else if ( b >= 0xf0 )
644 else if ( b >= 0xe0 )
649 else if ( b >= 0xc0 )
659 for( ; len > 0; len-- )
662 ch = ( ch << 6 ) | ( b & 0x3f );
666 Bufbyte i0 = *str, i1, i2 = 0;
669 if (i0 == LEADING_BYTE_CONTROL_1)
670 return (Emchar) (*++str - 0x20);
672 if (LEADING_BYTE_PREFIX_P (i0))
677 charset = CHARSET_BY_LEADING_BYTE (i0);
678 if (XCHARSET_DIMENSION (charset) == 2)
681 return MAKE_CHAR (charset, i1, i2);
685 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
686 Do not call this directly. Use the macro valid_char_p() instead. */
690 non_ascii_valid_char_p (Emchar ch)
694 /* Must have only lowest 19 bits set */
698 f1 = CHAR_FIELD1 (ch);
699 f2 = CHAR_FIELD2 (ch);
700 f3 = CHAR_FIELD3 (ch);
706 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
707 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
708 f2 > MAX_CHAR_FIELD2_PRIVATE)
713 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
714 f2 <= MAX_CHAR_FIELD2_PRIVATE))
718 NOTE: This takes advantage of the fact that
719 FIELD2_TO_OFFICIAL_LEADING_BYTE and
720 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
722 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
723 if (EQ (charset, Qnil))
725 return (XCHARSET_CHARS (charset) == 96);
731 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
732 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
733 f1 > MAX_CHAR_FIELD1_PRIVATE)
735 if (f2 < 0x20 || f3 < 0x20)
738 #ifdef ENABLE_COMPOSITE_CHARS
739 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
741 if (UNBOUNDP (Fgethash (make_int (ch),
742 Vcomposite_char_char2string_hash_table,
747 #endif /* ENABLE_COMPOSITE_CHARS */
749 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
750 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
753 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
755 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
758 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
760 if (EQ (charset, Qnil))
762 return (XCHARSET_CHARS (charset) == 96);
768 /************************************************************************/
769 /* Basic string functions */
770 /************************************************************************/
772 /* Copy the character pointed to by SRC into DST. Do not call this
773 directly. Use the macro charptr_copy_char() instead.
774 Return the number of bytes copied. */
777 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
779 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
781 for (i = bytes; i; i--, dst++, src++)
787 /************************************************************************/
788 /* streams of Emchars */
789 /************************************************************************/
791 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
792 The functions below are not meant to be called directly; use
793 the macros in insdel.h. */
796 Lstream_get_emchar_1 (Lstream *stream, int ch)
798 Bufbyte str[MAX_EMCHAR_LEN];
799 Bufbyte *strptr = str;
802 str[0] = (Bufbyte) ch;
804 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
806 int c = Lstream_getc (stream);
807 bufpos_checking_assert (c >= 0);
808 *++strptr = (Bufbyte) c;
810 return charptr_emchar (str);
814 Lstream_fput_emchar (Lstream *stream, Emchar ch)
816 Bufbyte str[MAX_EMCHAR_LEN];
817 Bytecount len = set_charptr_emchar (str, ch);
818 return Lstream_write (stream, str, len);
822 Lstream_funget_emchar (Lstream *stream, Emchar ch)
824 Bufbyte str[MAX_EMCHAR_LEN];
825 Bytecount len = set_charptr_emchar (str, ch);
826 Lstream_unread (stream, str, len);
830 /************************************************************************/
832 /************************************************************************/
835 mark_charset (Lisp_Object obj)
837 Lisp_Charset *cs = XCHARSET (obj);
839 mark_object (cs->short_name);
840 mark_object (cs->long_name);
841 mark_object (cs->doc_string);
842 mark_object (cs->registry);
843 mark_object (cs->ccl_program);
845 mark_object (cs->encoding_table);
846 /* mark_object (cs->decoding_table); */
852 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
854 Lisp_Charset *cs = XCHARSET (obj);
858 error ("printing unreadable object #<charset %s 0x%x>",
859 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
862 write_c_string ("#<charset ", printcharfun);
863 print_internal (CHARSET_NAME (cs), printcharfun, 0);
864 write_c_string (" ", printcharfun);
865 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
866 write_c_string (" ", printcharfun);
867 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
868 write_c_string (" ", printcharfun);
869 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
870 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
872 CHARSET_DIMENSION (cs),
873 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
874 CHARSET_COLUMNS (cs),
875 CHARSET_GRAPHIC (cs),
877 write_c_string (buf, printcharfun);
878 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
879 sprintf (buf, " 0x%x>", cs->header.uid);
880 write_c_string (buf, printcharfun);
883 static const struct lrecord_description charset_description[] = {
884 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
885 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
886 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
887 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
888 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
889 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
890 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
892 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
893 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
898 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
899 mark_charset, print_charset, 0, 0, 0,
903 /* Make a new charset. */
904 /* #### SJT Should generic properties be allowed? */
906 make_charset (Charset_ID id, Lisp_Object name,
907 unsigned short chars, unsigned char dimension,
908 unsigned char columns, unsigned char graphic,
909 Bufbyte final, unsigned char direction, Lisp_Object short_name,
910 Lisp_Object long_name, Lisp_Object doc,
912 Lisp_Object decoding_table,
913 Emchar ucs_min, Emchar ucs_max,
914 Emchar code_offset, unsigned char byte_offset)
917 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
921 XSETCHARSET (obj, cs);
923 CHARSET_ID (cs) = id;
924 CHARSET_NAME (cs) = name;
925 CHARSET_SHORT_NAME (cs) = short_name;
926 CHARSET_LONG_NAME (cs) = long_name;
927 CHARSET_CHARS (cs) = chars;
928 CHARSET_DIMENSION (cs) = dimension;
929 CHARSET_DIRECTION (cs) = direction;
930 CHARSET_COLUMNS (cs) = columns;
931 CHARSET_GRAPHIC (cs) = graphic;
932 CHARSET_FINAL (cs) = final;
933 CHARSET_DOC_STRING (cs) = doc;
934 CHARSET_REGISTRY (cs) = reg;
935 CHARSET_CCL_PROGRAM (cs) = Qnil;
936 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
938 CHARSET_DECODING_TABLE(cs) = Qnil;
939 CHARSET_ENCODING_TABLE(cs) = Qnil;
940 CHARSET_UCS_MIN(cs) = ucs_min;
941 CHARSET_UCS_MAX(cs) = ucs_max;
942 CHARSET_CODE_OFFSET(cs) = code_offset;
943 CHARSET_BYTE_OFFSET(cs) = byte_offset;
947 if (id == LEADING_BYTE_ASCII)
948 CHARSET_REP_BYTES (cs) = 1;
950 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
952 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
957 /* some charsets do not have final characters. This includes
958 ASCII, Control-1, Composite, and the two faux private
960 unsigned char iso2022_type
961 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
963 if (code_offset == 0)
965 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
966 chlook->charset_by_attributes[iso2022_type][final] = obj;
970 (chlook->charset_by_attributes[iso2022_type][final][direction]));
971 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
975 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
976 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
978 /* Some charsets are "faux" and don't have names or really exist at
979 all except in the leading-byte table. */
981 Fputhash (name, obj, Vcharset_hash_table);
986 get_unallocated_leading_byte (int dimension)
991 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
994 lb = chlook->next_allocated_leading_byte++;
998 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1001 lb = chlook->next_allocated_1_byte_leading_byte++;
1005 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1008 lb = chlook->next_allocated_2_byte_leading_byte++;
1014 ("No more character sets free for this dimension",
1015 make_int (dimension));
1021 /* Number of Big5 characters which have the same code in 1st byte. */
1023 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
1026 decode_builtin_char (Lisp_Object charset, int code_point)
1030 if (EQ (charset, Vcharset_chinese_big5))
1032 int c1 = code_point >> 8;
1033 int c2 = code_point & 0xFF;
1036 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
1038 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
1039 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
1041 I = (c1 - 0xA1) * BIG5_SAME_ROW
1042 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
1046 charset = Vcharset_chinese_big5_1;
1050 charset = Vcharset_chinese_big5_2;
1051 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1053 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
1056 if ((final = XCHARSET_FINAL (charset)) >= '0')
1058 if (XCHARSET_DIMENSION (charset) == 1)
1060 switch (XCHARSET_CHARS (charset))
1064 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1067 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1075 switch (XCHARSET_CHARS (charset))
1078 return MIN_CHAR_94x94
1079 + (final - '0') * 94 * 94
1080 + (((code_point >> 8) & 0x7F) - 33) * 94
1081 + ((code_point & 0x7F) - 33);
1083 return MIN_CHAR_96x96
1084 + (final - '0') * 96 * 96
1085 + (((code_point >> 8) & 0x7F) - 32) * 96
1086 + ((code_point & 0x7F) - 32);
1093 else if (XCHARSET_UCS_MAX (charset))
1096 = (XCHARSET_DIMENSION (charset) == 1
1098 code_point - XCHARSET_BYTE_OFFSET (charset)
1100 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1101 * XCHARSET_CHARS (charset)
1102 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1103 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1104 if ((cid < XCHARSET_UCS_MIN (charset))
1105 || (XCHARSET_UCS_MAX (charset) < cid))
1114 range_charset_code_point (Lisp_Object charset, Emchar ch)
1118 if ((XCHARSET_UCS_MIN (charset) <= ch)
1119 && (ch <= XCHARSET_UCS_MAX (charset)))
1121 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1123 if (XCHARSET_CHARS (charset) == 256)
1125 else if (XCHARSET_DIMENSION (charset) == 1)
1126 return d + XCHARSET_BYTE_OFFSET (charset);
1127 else if (XCHARSET_DIMENSION (charset) == 2)
1129 ((d / XCHARSET_CHARS (charset)
1130 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1131 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1132 else if (XCHARSET_DIMENSION (charset) == 3)
1134 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1135 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1136 | ((d / XCHARSET_CHARS (charset)
1137 % XCHARSET_CHARS (charset)
1138 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1139 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1140 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1142 ((d / (XCHARSET_CHARS (charset)
1143 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1144 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1145 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1146 % XCHARSET_CHARS (charset)
1147 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1148 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1149 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1150 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1152 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1154 if (XCHARSET_DIMENSION (charset) == 1)
1156 if (XCHARSET_CHARS (charset) == 94)
1158 if (((d = ch - (MIN_CHAR_94
1159 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1163 else if (XCHARSET_CHARS (charset) == 96)
1165 if (((d = ch - (MIN_CHAR_96
1166 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1173 else if (XCHARSET_DIMENSION (charset) == 2)
1175 if (XCHARSET_CHARS (charset) == 94)
1177 if (((d = ch - (MIN_CHAR_94x94
1178 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1181 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1183 else if (XCHARSET_CHARS (charset) == 96)
1185 if (((d = ch - (MIN_CHAR_96x96
1186 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1189 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1195 if (EQ (charset, Vcharset_mojikyo_2022_1)
1196 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
1198 int m = ch - MIN_CHAR_MOJIKYO - 1;
1199 int byte1 = m / (94 * 60) + 33;
1200 int byte2 = (m % (94 * 60)) / 94;
1201 int byte3 = m % 94 + 33;
1207 return (byte1 << 16) | (byte2 << 8) | byte3;
1213 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1215 if (c <= MAX_CHAR_BASIC_LATIN)
1217 *charset = Vcharset_ascii;
1222 *charset = Vcharset_control_1;
1227 *charset = Vcharset_latin_iso8859_1;
1231 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1233 *charset = Vcharset_hebrew_iso8859_8;
1234 return c - MIN_CHAR_HEBREW + 0x20;
1237 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1239 *charset = Vcharset_thai_tis620;
1240 return c - MIN_CHAR_THAI + 0x20;
1243 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1244 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1246 return list2 (Vcharset_katakana_jisx0201,
1247 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1250 else if (c <= MAX_CHAR_BMP)
1252 *charset = Vcharset_ucs_bmp;
1255 else if (c < MIN_CHAR_DAIKANWA)
1257 *charset = Vcharset_ucs;
1260 else if (c <= MAX_CHAR_DAIKANWA)
1262 *charset = Vcharset_ideograph_daikanwa;
1263 return c - MIN_CHAR_DAIKANWA;
1266 else if (c <= MAX_CHAR_MOJIKYO_0)
1268 *charset = Vcharset_mojikyo;
1269 return c - MIN_CHAR_MOJIKYO_0;
1272 else if (c < MIN_CHAR_94)
1274 *charset = Vcharset_ucs;
1277 else if (c <= MAX_CHAR_94)
1279 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1280 ((c - MIN_CHAR_94) / 94) + '0',
1281 CHARSET_LEFT_TO_RIGHT);
1282 if (!NILP (*charset))
1283 return ((c - MIN_CHAR_94) % 94) + 33;
1286 *charset = Vcharset_ucs;
1290 else if (c <= MAX_CHAR_96)
1292 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1293 ((c - MIN_CHAR_96) / 96) + '0',
1294 CHARSET_LEFT_TO_RIGHT);
1295 if (!NILP (*charset))
1296 return ((c - MIN_CHAR_96) % 96) + 32;
1299 *charset = Vcharset_ucs;
1303 else if (c <= MAX_CHAR_94x94)
1306 = CHARSET_BY_ATTRIBUTES (94, 2,
1307 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1308 CHARSET_LEFT_TO_RIGHT);
1309 if (!NILP (*charset))
1310 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1311 | (((c - MIN_CHAR_94x94) % 94) + 33);
1314 *charset = Vcharset_ucs;
1318 else if (c <= MAX_CHAR_96x96)
1321 = CHARSET_BY_ATTRIBUTES (96, 2,
1322 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1323 CHARSET_LEFT_TO_RIGHT);
1324 if (!NILP (*charset))
1325 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1326 | (((c - MIN_CHAR_96x96) % 96) + 32);
1329 *charset = Vcharset_ucs;
1334 else if (c < MIN_CHAR_MOJIKYO)
1336 *charset = Vcharset_ucs;
1339 else if (c <= MAX_CHAR_MOJIKYO)
1341 *charset = Vcharset_mojikyo;
1342 return c - MIN_CHAR_MOJIKYO;
1344 else if (c < MIN_CHAR_CHINA3_JEF)
1346 *charset = Vcharset_ucs;
1349 else if (c <= MAX_CHAR_CHINA3_JEF)
1351 *charset = Vcharset_china3_jef;
1352 return c - MIN_CHAR_CHINA3_JEF;
1354 else if (c <= MAX_CHAR_CBETA)
1356 *charset = Vcharset_ideograph_cbeta;
1357 return c - MIN_CHAR_CBETA;
1362 *charset = Vcharset_ucs;
1367 Lisp_Object Vdefault_coded_charset_priority_list;
1371 /************************************************************************/
1372 /* Basic charset Lisp functions */
1373 /************************************************************************/
1375 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1376 Return non-nil if OBJECT is a charset.
1380 return CHARSETP (object) ? Qt : Qnil;
1383 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1384 Retrieve the charset of the given name.
1385 If CHARSET-OR-NAME is a charset object, it is simply returned.
1386 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1387 nil is returned. Otherwise the associated charset object is returned.
1391 if (CHARSETP (charset_or_name))
1392 return charset_or_name;
1394 CHECK_SYMBOL (charset_or_name);
1395 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1398 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1399 Retrieve the charset of the given name.
1400 Same as `find-charset' except an error is signalled if there is no such
1401 charset instead of returning nil.
1405 Lisp_Object charset = Ffind_charset (name);
1408 signal_simple_error ("No such charset", name);
1412 /* We store the charsets in hash tables with the names as the key and the
1413 actual charset object as the value. Occasionally we need to use them
1414 in a list format. These routines provide us with that. */
1415 struct charset_list_closure
1417 Lisp_Object *charset_list;
1421 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1422 void *charset_list_closure)
1424 /* This function can GC */
1425 struct charset_list_closure *chcl =
1426 (struct charset_list_closure*) charset_list_closure;
1427 Lisp_Object *charset_list = chcl->charset_list;
1429 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1433 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1434 Return a list of the names of all defined charsets.
1438 Lisp_Object charset_list = Qnil;
1439 struct gcpro gcpro1;
1440 struct charset_list_closure charset_list_closure;
1442 GCPRO1 (charset_list);
1443 charset_list_closure.charset_list = &charset_list;
1444 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1445 &charset_list_closure);
1448 return charset_list;
1451 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1452 Return the name of charset CHARSET.
1456 return XCHARSET_NAME (Fget_charset (charset));
1459 /* #### SJT Should generic properties be allowed? */
1460 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1461 Define a new character set.
1462 This function is for use with Mule support.
1463 NAME is a symbol, the name by which the character set is normally referred.
1464 DOC-STRING is a string describing the character set.
1465 PROPS is a property list, describing the specific nature of the
1466 character set. Recognized properties are:
1468 'short-name Short version of the charset name (ex: Latin-1)
1469 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1470 'registry A regular expression matching the font registry field for
1472 'dimension Number of octets used to index a character in this charset.
1473 Either 1 or 2. Defaults to 1.
1474 'columns Number of columns used to display a character in this charset.
1475 Only used in TTY mode. (Under X, the actual width of a
1476 character can be derived from the font used to display the
1477 characters.) If unspecified, defaults to the dimension
1478 (this is almost always the correct value).
1479 'chars Number of characters in each dimension (94 or 96).
1480 Defaults to 94. Note that if the dimension is 2, the
1481 character set thus described is 94x94 or 96x96.
1482 'final Final byte of ISO 2022 escape sequence. Must be
1483 supplied. Each combination of (DIMENSION, CHARS) defines a
1484 separate namespace for final bytes. Note that ISO
1485 2022 restricts the final byte to the range
1486 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1487 dimension == 2. Note also that final bytes in the range
1488 0x30 - 0x3F are reserved for user-defined (not official)
1490 'graphic 0 (use left half of font on output) or 1 (use right half
1491 of font on output). Defaults to 0. For example, for
1492 a font whose registry is ISO8859-1, the left half
1493 (octets 0x20 - 0x7F) is the `ascii' character set, while
1494 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1495 character set. With 'graphic set to 0, the octets
1496 will have their high bit cleared; with it set to 1,
1497 the octets will have their high bit set.
1498 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1500 'ccl-program A compiled CCL program used to convert a character in
1501 this charset into an index into the font. This is in
1502 addition to the 'graphic property. The CCL program
1503 is passed the octets of the character, with the high
1504 bit cleared and set depending upon whether the value
1505 of the 'graphic property is 0 or 1.
1507 (name, doc_string, props))
1509 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1510 int direction = CHARSET_LEFT_TO_RIGHT;
1511 Lisp_Object registry = Qnil;
1512 Lisp_Object charset;
1513 Lisp_Object ccl_program = Qnil;
1514 Lisp_Object short_name = Qnil, long_name = Qnil;
1515 int byte_offset = -1;
1517 CHECK_SYMBOL (name);
1518 if (!NILP (doc_string))
1519 CHECK_STRING (doc_string);
1521 charset = Ffind_charset (name);
1522 if (!NILP (charset))
1523 signal_simple_error ("Cannot redefine existing charset", name);
1526 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1528 if (EQ (keyword, Qshort_name))
1530 CHECK_STRING (value);
1534 if (EQ (keyword, Qlong_name))
1536 CHECK_STRING (value);
1540 else if (EQ (keyword, Qdimension))
1543 dimension = XINT (value);
1544 if (dimension < 1 || dimension > 2)
1545 signal_simple_error ("Invalid value for 'dimension", value);
1548 else if (EQ (keyword, Qchars))
1551 chars = XINT (value);
1552 if (chars != 94 && chars != 96)
1553 signal_simple_error ("Invalid value for 'chars", value);
1556 else if (EQ (keyword, Qcolumns))
1559 columns = XINT (value);
1560 if (columns != 1 && columns != 2)
1561 signal_simple_error ("Invalid value for 'columns", value);
1564 else if (EQ (keyword, Qgraphic))
1567 graphic = XINT (value);
1569 if (graphic < 0 || graphic > 2)
1571 if (graphic < 0 || graphic > 1)
1573 signal_simple_error ("Invalid value for 'graphic", value);
1576 else if (EQ (keyword, Qregistry))
1578 CHECK_STRING (value);
1582 else if (EQ (keyword, Qdirection))
1584 if (EQ (value, Ql2r))
1585 direction = CHARSET_LEFT_TO_RIGHT;
1586 else if (EQ (value, Qr2l))
1587 direction = CHARSET_RIGHT_TO_LEFT;
1589 signal_simple_error ("Invalid value for 'direction", value);
1592 else if (EQ (keyword, Qfinal))
1594 CHECK_CHAR_COERCE_INT (value);
1595 final = XCHAR (value);
1596 if (final < '0' || final > '~')
1597 signal_simple_error ("Invalid value for 'final", value);
1600 else if (EQ (keyword, Qccl_program))
1602 struct ccl_program test_ccl;
1604 if (setup_ccl_program (&test_ccl, value) < 0)
1605 signal_simple_error ("Invalid value for 'ccl-program", value);
1606 ccl_program = value;
1610 signal_simple_error ("Unrecognized property", keyword);
1615 error ("'final must be specified");
1616 if (dimension == 2 && final > 0x5F)
1618 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1621 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1622 CHARSET_LEFT_TO_RIGHT)) ||
1623 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1624 CHARSET_RIGHT_TO_LEFT)))
1626 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1628 id = get_unallocated_leading_byte (dimension);
1630 if (NILP (doc_string))
1631 doc_string = build_string ("");
1633 if (NILP (registry))
1634 registry = build_string ("");
1636 if (NILP (short_name))
1637 XSETSTRING (short_name, XSYMBOL (name)->name);
1639 if (NILP (long_name))
1640 long_name = doc_string;
1643 columns = dimension;
1645 if (byte_offset < 0)
1649 else if (chars == 96)
1655 charset = make_charset (id, name, chars, dimension, columns, graphic,
1656 final, direction, short_name, long_name,
1657 doc_string, registry,
1658 Qnil, 0, 0, 0, byte_offset);
1659 if (!NILP (ccl_program))
1660 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1664 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1666 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1667 NEW-NAME is the name of the new charset. Return the new charset.
1669 (charset, new_name))
1671 Lisp_Object new_charset = Qnil;
1672 int id, chars, dimension, columns, graphic, final;
1674 Lisp_Object registry, doc_string, short_name, long_name;
1677 charset = Fget_charset (charset);
1678 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1679 signal_simple_error ("Charset already has reverse-direction charset",
1682 CHECK_SYMBOL (new_name);
1683 if (!NILP (Ffind_charset (new_name)))
1684 signal_simple_error ("Cannot redefine existing charset", new_name);
1686 cs = XCHARSET (charset);
1688 chars = CHARSET_CHARS (cs);
1689 dimension = CHARSET_DIMENSION (cs);
1690 columns = CHARSET_COLUMNS (cs);
1691 id = get_unallocated_leading_byte (dimension);
1693 graphic = CHARSET_GRAPHIC (cs);
1694 final = CHARSET_FINAL (cs);
1695 direction = CHARSET_RIGHT_TO_LEFT;
1696 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1697 direction = CHARSET_LEFT_TO_RIGHT;
1698 doc_string = CHARSET_DOC_STRING (cs);
1699 short_name = CHARSET_SHORT_NAME (cs);
1700 long_name = CHARSET_LONG_NAME (cs);
1701 registry = CHARSET_REGISTRY (cs);
1703 new_charset = make_charset (id, new_name, chars, dimension, columns,
1704 graphic, final, direction, short_name, long_name,
1705 doc_string, registry,
1707 CHARSET_DECODING_TABLE(cs),
1708 CHARSET_UCS_MIN(cs),
1709 CHARSET_UCS_MAX(cs),
1710 CHARSET_CODE_OFFSET(cs),
1711 CHARSET_BYTE_OFFSET(cs)
1717 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1718 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1723 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1724 Define symbol ALIAS as an alias for CHARSET.
1728 CHECK_SYMBOL (alias);
1729 charset = Fget_charset (charset);
1730 return Fputhash (alias, charset, Vcharset_hash_table);
1733 /* #### Reverse direction charsets not yet implemented. */
1735 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1737 Return the reverse-direction charset parallel to CHARSET, if any.
1738 This is the charset with the same properties (in particular, the same
1739 dimension, number of characters per dimension, and final byte) as
1740 CHARSET but whose characters are displayed in the opposite direction.
1744 charset = Fget_charset (charset);
1745 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1749 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1750 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1751 If DIRECTION is omitted, both directions will be checked (left-to-right
1752 will be returned if character sets exist for both directions).
1754 (dimension, chars, final, direction))
1756 int dm, ch, fi, di = -1;
1757 Lisp_Object obj = Qnil;
1759 CHECK_INT (dimension);
1760 dm = XINT (dimension);
1761 if (dm < 1 || dm > 2)
1762 signal_simple_error ("Invalid value for DIMENSION", dimension);
1766 if (ch != 94 && ch != 96)
1767 signal_simple_error ("Invalid value for CHARS", chars);
1769 CHECK_CHAR_COERCE_INT (final);
1771 if (fi < '0' || fi > '~')
1772 signal_simple_error ("Invalid value for FINAL", final);
1774 if (EQ (direction, Ql2r))
1775 di = CHARSET_LEFT_TO_RIGHT;
1776 else if (EQ (direction, Qr2l))
1777 di = CHARSET_RIGHT_TO_LEFT;
1778 else if (!NILP (direction))
1779 signal_simple_error ("Invalid value for DIRECTION", direction);
1781 if (dm == 2 && fi > 0x5F)
1783 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1787 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1789 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1792 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1795 return XCHARSET_NAME (obj);
1799 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1800 Return short name of CHARSET.
1804 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1807 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1808 Return long name of CHARSET.
1812 return XCHARSET_LONG_NAME (Fget_charset (charset));
1815 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1816 Return description of CHARSET.
1820 return XCHARSET_DOC_STRING (Fget_charset (charset));
1823 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1824 Return dimension of CHARSET.
1828 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1831 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1832 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1833 Recognized properties are those listed in `make-charset', as well as
1834 'name and 'doc-string.
1840 charset = Fget_charset (charset);
1841 cs = XCHARSET (charset);
1843 CHECK_SYMBOL (prop);
1844 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1845 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1846 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1847 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1848 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1849 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1850 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1851 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1852 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1853 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1854 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1855 if (EQ (prop, Qdirection))
1856 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1857 if (EQ (prop, Qreverse_direction_charset))
1859 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1860 /* #### Is this translation OK? If so, error checking sufficient? */
1861 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1863 signal_simple_error ("Unrecognized charset property name", prop);
1864 return Qnil; /* not reached */
1867 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1868 Return charset identification number of CHARSET.
1872 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1875 /* #### We need to figure out which properties we really want to
1878 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1879 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1881 (charset, ccl_program))
1883 struct ccl_program test_ccl;
1885 charset = Fget_charset (charset);
1886 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1887 signal_simple_error ("Invalid ccl-program", ccl_program);
1888 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1893 invalidate_charset_font_caches (Lisp_Object charset)
1895 /* Invalidate font cache entries for charset on all devices. */
1896 Lisp_Object devcons, concons, hash_table;
1897 DEVICE_LOOP_NO_BREAK (devcons, concons)
1899 struct device *d = XDEVICE (XCAR (devcons));
1900 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1901 if (!UNBOUNDP (hash_table))
1902 Fclrhash (hash_table);
1906 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1907 Set the 'registry property of CHARSET to REGISTRY.
1909 (charset, registry))
1911 charset = Fget_charset (charset);
1912 CHECK_STRING (registry);
1913 XCHARSET_REGISTRY (charset) = registry;
1914 invalidate_charset_font_caches (charset);
1915 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1920 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1921 Return mapping-table of CHARSET.
1925 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1928 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1929 Set mapping-table of CHARSET to TABLE.
1933 struct Lisp_Charset *cs;
1937 charset = Fget_charset (charset);
1938 cs = XCHARSET (charset);
1942 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
1943 make_vector_newer (CHARSET_DECODING_TABLE(cs));
1944 CHARSET_DECODING_TABLE(cs) = Qnil;
1947 else if (VECTORP (table))
1949 int ccs_len = CHARSET_BYTE_SIZE (cs);
1950 int ret = decoding_table_check_elements (table,
1951 CHARSET_DIMENSION (cs),
1956 signal_simple_error ("Too big table", table);
1958 signal_simple_error ("Invalid element is found", table);
1960 signal_simple_error ("Something wrong", table);
1962 CHARSET_DECODING_TABLE(cs) = Qnil;
1965 signal_error (Qwrong_type_argument,
1966 list2 (build_translated_string ("vector-or-nil-p"),
1969 byte_offset = CHARSET_BYTE_OFFSET (cs);
1970 switch (CHARSET_DIMENSION (cs))
1973 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1975 Lisp_Object c = XVECTOR_DATA(table)[i];
1978 put_char_ccs_code_point (c, charset,
1979 make_int (i + byte_offset));
1983 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1985 Lisp_Object v = XVECTOR_DATA(table)[i];
1991 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1993 Lisp_Object c = XVECTOR_DATA(v)[j];
1996 put_char_ccs_code_point
1998 make_int ( ( (i + byte_offset) << 8 )
2004 put_char_ccs_code_point (v, charset,
2005 make_int (i + byte_offset));
2014 /************************************************************************/
2015 /* Lisp primitives for working with characters */
2016 /************************************************************************/
2019 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
2020 Make a character from CHARSET and code-point CODE.
2026 charset = Fget_charset (charset);
2029 if (XCHARSET_GRAPHIC (charset) == 1)
2031 c = DECODE_CHAR (charset, c);
2032 return c >= 0 ? make_char (c) : Qnil;
2035 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2036 Make a builtin character from CHARSET and code-point CODE.
2042 charset = Fget_charset (charset);
2044 if (EQ (charset, Vcharset_latin_viscii))
2046 Lisp_Object chr = Fdecode_char (charset, code);
2052 (ret = Fget_char_attribute (chr,
2053 Vcharset_latin_viscii_lower,
2056 charset = Vcharset_latin_viscii_lower;
2060 (ret = Fget_char_attribute (chr,
2061 Vcharset_latin_viscii_upper,
2064 charset = Vcharset_latin_viscii_upper;
2071 if (XCHARSET_GRAPHIC (charset) == 1)
2074 c = decode_builtin_char (charset, c);
2075 return c >= 0 ? make_char (c) : Fdecode_char (charset, code);
2079 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2080 Make a character from CHARSET and octets ARG1 and ARG2.
2081 ARG2 is required only for characters from two-dimensional charsets.
2082 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2083 character s with caron.
2085 (charset, arg1, arg2))
2089 int lowlim, highlim;
2091 charset = Fget_charset (charset);
2092 cs = XCHARSET (charset);
2094 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2095 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2097 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2099 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2100 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2103 /* It is useful (and safe, according to Olivier Galibert) to strip
2104 the 8th bit off ARG1 and ARG2 because it allows programmers to
2105 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2106 Latin 2 code of the character. */
2114 if (a1 < lowlim || a1 > highlim)
2115 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2117 if (CHARSET_DIMENSION (cs) == 1)
2121 ("Charset is of dimension one; second octet must be nil", arg2);
2122 return make_char (MAKE_CHAR (charset, a1, 0));
2131 a2 = XINT (arg2) & 0x7f;
2133 if (a2 < lowlim || a2 > highlim)
2134 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2136 return make_char (MAKE_CHAR (charset, a1, a2));
2139 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2140 Return the character set of CHARACTER.
2144 CHECK_CHAR_COERCE_INT (character);
2146 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2149 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2150 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2151 N defaults to 0 if omitted.
2155 Lisp_Object charset;
2158 CHECK_CHAR_COERCE_INT (character);
2160 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2162 if (NILP (n) || EQ (n, Qzero))
2163 return make_int (octet0);
2164 else if (EQ (n, make_int (1)))
2165 return make_int (octet1);
2167 signal_simple_error ("Octet number must be 0 or 1", n);
2170 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2171 Return list of charset and one or two position-codes of CHARACTER.
2175 /* This function can GC */
2176 struct gcpro gcpro1, gcpro2;
2177 Lisp_Object charset = Qnil;
2178 Lisp_Object rc = Qnil;
2186 GCPRO2 (charset, rc);
2187 CHECK_CHAR_COERCE_INT (character);
2190 code_point = ENCODE_CHAR (XCHAR (character), charset);
2191 dimension = XCHARSET_DIMENSION (charset);
2192 while (dimension > 0)
2194 rc = Fcons (make_int (code_point & 255), rc);
2198 rc = Fcons (XCHARSET_NAME (charset), rc);
2200 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2202 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2204 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2208 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2217 #ifdef ENABLE_COMPOSITE_CHARS
2218 /************************************************************************/
2219 /* composite character functions */
2220 /************************************************************************/
2223 lookup_composite_char (Bufbyte *str, int len)
2225 Lisp_Object lispstr = make_string (str, len);
2226 Lisp_Object ch = Fgethash (lispstr,
2227 Vcomposite_char_string2char_hash_table,
2233 if (composite_char_row_next >= 128)
2234 signal_simple_error ("No more composite chars available", lispstr);
2235 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2236 composite_char_col_next);
2237 Fputhash (make_char (emch), lispstr,
2238 Vcomposite_char_char2string_hash_table);
2239 Fputhash (lispstr, make_char (emch),
2240 Vcomposite_char_string2char_hash_table);
2241 composite_char_col_next++;
2242 if (composite_char_col_next >= 128)
2244 composite_char_col_next = 32;
2245 composite_char_row_next++;
2254 composite_char_string (Emchar ch)
2256 Lisp_Object str = Fgethash (make_char (ch),
2257 Vcomposite_char_char2string_hash_table,
2259 assert (!UNBOUNDP (str));
2263 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2264 Convert a string into a single composite character.
2265 The character is the result of overstriking all the characters in
2270 CHECK_STRING (string);
2271 return make_char (lookup_composite_char (XSTRING_DATA (string),
2272 XSTRING_LENGTH (string)));
2275 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2276 Return a string of the characters comprising a composite character.
2284 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2285 signal_simple_error ("Must be composite char", ch);
2286 return composite_char_string (emch);
2288 #endif /* ENABLE_COMPOSITE_CHARS */
2291 /************************************************************************/
2292 /* initialization */
2293 /************************************************************************/
2296 syms_of_mule_charset (void)
2298 INIT_LRECORD_IMPLEMENTATION (charset);
2300 DEFSUBR (Fcharsetp);
2301 DEFSUBR (Ffind_charset);
2302 DEFSUBR (Fget_charset);
2303 DEFSUBR (Fcharset_list);
2304 DEFSUBR (Fcharset_name);
2305 DEFSUBR (Fmake_charset);
2306 DEFSUBR (Fmake_reverse_direction_charset);
2307 /* DEFSUBR (Freverse_direction_charset); */
2308 DEFSUBR (Fdefine_charset_alias);
2309 DEFSUBR (Fcharset_from_attributes);
2310 DEFSUBR (Fcharset_short_name);
2311 DEFSUBR (Fcharset_long_name);
2312 DEFSUBR (Fcharset_description);
2313 DEFSUBR (Fcharset_dimension);
2314 DEFSUBR (Fcharset_property);
2315 DEFSUBR (Fcharset_id);
2316 DEFSUBR (Fset_charset_ccl_program);
2317 DEFSUBR (Fset_charset_registry);
2319 DEFSUBR (Fcharset_mapping_table);
2320 DEFSUBR (Fset_charset_mapping_table);
2324 DEFSUBR (Fdecode_char);
2325 DEFSUBR (Fdecode_builtin_char);
2327 DEFSUBR (Fmake_char);
2328 DEFSUBR (Fchar_charset);
2329 DEFSUBR (Fchar_octet);
2330 DEFSUBR (Fsplit_char);
2332 #ifdef ENABLE_COMPOSITE_CHARS
2333 DEFSUBR (Fmake_composite_char);
2334 DEFSUBR (Fcomposite_char_string);
2337 defsymbol (&Qcharsetp, "charsetp");
2338 defsymbol (&Qregistry, "registry");
2339 defsymbol (&Qfinal, "final");
2340 defsymbol (&Qgraphic, "graphic");
2341 defsymbol (&Qdirection, "direction");
2342 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2343 defsymbol (&Qshort_name, "short-name");
2344 defsymbol (&Qlong_name, "long-name");
2346 defsymbol (&Ql2r, "l2r");
2347 defsymbol (&Qr2l, "r2l");
2349 /* Charsets, compatible with FSF 20.3
2350 Naming convention is Script-Charset[-Edition] */
2351 defsymbol (&Qascii, "ascii");
2352 defsymbol (&Qcontrol_1, "control-1");
2353 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2354 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2355 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2356 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2357 defsymbol (&Qthai_tis620, "thai-tis620");
2358 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2359 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2360 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2361 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2362 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2363 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2364 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2365 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2366 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2367 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2368 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2369 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2370 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2371 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2372 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2373 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2375 defsymbol (&Qucs, "ucs");
2376 defsymbol (&Qucs_bmp, "ucs-bmp");
2377 defsymbol (&Qucs_cns, "ucs-cns");
2378 defsymbol (&Qucs_jis, "ucs-jis");
2379 defsymbol (&Qucs_big5, "ucs-big5");
2380 defsymbol (&Qlatin_viscii, "latin-viscii");
2381 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2382 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2383 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2384 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2385 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2386 defsymbol (&Qideograph_gt, "ideograph-gt");
2387 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2388 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2389 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2390 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2391 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2392 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2393 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2394 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2395 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2396 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2397 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2398 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2399 defsymbol (&Qchinese_big5, "chinese-big5");
2400 defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp");
2401 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2402 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2403 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2404 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2405 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2406 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2407 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2408 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2409 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2410 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2411 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2412 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2413 defsymbol (&Qchina3_jef, "china3-jef");
2414 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2415 defsymbol (&Qmojikyo, "mojikyo");
2416 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
2417 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2418 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2419 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2420 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2421 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2422 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2423 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2424 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2425 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2426 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2427 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2428 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2429 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2430 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2431 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2432 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2433 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2434 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2435 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2436 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2437 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2438 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2440 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2441 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2443 defsymbol (&Qcomposite, "composite");
2447 vars_of_mule_charset (void)
2454 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2455 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2457 /* Table of charsets indexed by leading byte. */
2458 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2459 chlook->charset_by_leading_byte[i] = Qnil;
2462 /* Table of charsets indexed by type/final-byte. */
2463 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2464 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2465 chlook->charset_by_attributes[i][j] = Qnil;
2467 /* Table of charsets indexed by type/final-byte/direction. */
2468 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2469 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2470 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2471 chlook->charset_by_attributes[i][j][k] = Qnil;
2475 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2477 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2478 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2482 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2483 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2484 Leading-code of private TYPE9N charset of column-width 1.
2486 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2490 Vdefault_coded_charset_priority_list = Qnil;
2491 DEFVAR_LISP ("default-coded-charset-priority-list",
2492 &Vdefault_coded_charset_priority_list /*
2493 Default order of preferred coded-character-sets.
2499 complex_vars_of_mule_charset (void)
2501 staticpro (&Vcharset_hash_table);
2502 Vcharset_hash_table =
2503 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2505 /* Predefined character sets. We store them into variables for
2509 staticpro (&Vcharset_ucs);
2511 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2512 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2513 build_string ("UCS"),
2514 build_string ("UCS"),
2515 build_string ("ISO/IEC 10646"),
2517 Qnil, 0, 0xFFFFFFF, 0, 0);
2518 staticpro (&Vcharset_ucs_bmp);
2520 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2521 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2522 build_string ("BMP"),
2523 build_string ("BMP"),
2524 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2525 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2526 Qnil, 0, 0xFFFF, 0, 0);
2527 staticpro (&Vcharset_ucs_cns);
2529 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2530 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2531 build_string ("UCS for CNS"),
2532 build_string ("UCS for CNS 11643"),
2533 build_string ("ISO/IEC 10646 for CNS 11643"),
2536 staticpro (&Vcharset_ucs_jis);
2538 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2539 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2540 build_string ("UCS for JIS"),
2541 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2542 build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2545 staticpro (&Vcharset_ucs_big5);
2547 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2548 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2549 build_string ("UCS for Big5"),
2550 build_string ("UCS for Big5"),
2551 build_string ("ISO/IEC 10646 for Big5"),
2555 # define MIN_CHAR_THAI 0
2556 # define MAX_CHAR_THAI 0
2557 /* # define MIN_CHAR_HEBREW 0 */
2558 /* # define MAX_CHAR_HEBREW 0 */
2559 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2560 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2562 staticpro (&Vcharset_ascii);
2564 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2565 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2566 build_string ("ASCII"),
2567 build_string ("ASCII)"),
2568 build_string ("ASCII (ISO646 IRV)"),
2569 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2570 Qnil, 0, 0x7F, 0, 0);
2571 staticpro (&Vcharset_control_1);
2572 Vcharset_control_1 =
2573 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2574 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2575 build_string ("C1"),
2576 build_string ("Control characters"),
2577 build_string ("Control characters 128-191"),
2579 Qnil, 0x80, 0x9F, 0, 0);
2580 staticpro (&Vcharset_latin_iso8859_1);
2581 Vcharset_latin_iso8859_1 =
2582 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2583 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2584 build_string ("Latin-1"),
2585 build_string ("ISO8859-1 (Latin-1)"),
2586 build_string ("ISO8859-1 (Latin-1)"),
2587 build_string ("iso8859-1"),
2588 Qnil, 0xA0, 0xFF, 0, 32);
2589 staticpro (&Vcharset_latin_iso8859_2);
2590 Vcharset_latin_iso8859_2 =
2591 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2592 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2593 build_string ("Latin-2"),
2594 build_string ("ISO8859-2 (Latin-2)"),
2595 build_string ("ISO8859-2 (Latin-2)"),
2596 build_string ("iso8859-2"),
2598 staticpro (&Vcharset_latin_iso8859_3);
2599 Vcharset_latin_iso8859_3 =
2600 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2601 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2602 build_string ("Latin-3"),
2603 build_string ("ISO8859-3 (Latin-3)"),
2604 build_string ("ISO8859-3 (Latin-3)"),
2605 build_string ("iso8859-3"),
2607 staticpro (&Vcharset_latin_iso8859_4);
2608 Vcharset_latin_iso8859_4 =
2609 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2610 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2611 build_string ("Latin-4"),
2612 build_string ("ISO8859-4 (Latin-4)"),
2613 build_string ("ISO8859-4 (Latin-4)"),
2614 build_string ("iso8859-4"),
2616 staticpro (&Vcharset_thai_tis620);
2617 Vcharset_thai_tis620 =
2618 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2619 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2620 build_string ("TIS620"),
2621 build_string ("TIS620 (Thai)"),
2622 build_string ("TIS620.2529 (Thai)"),
2623 build_string ("tis620"),
2624 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2625 staticpro (&Vcharset_greek_iso8859_7);
2626 Vcharset_greek_iso8859_7 =
2627 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2628 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2629 build_string ("ISO8859-7"),
2630 build_string ("ISO8859-7 (Greek)"),
2631 build_string ("ISO8859-7 (Greek)"),
2632 build_string ("iso8859-7"),
2634 staticpro (&Vcharset_arabic_iso8859_6);
2635 Vcharset_arabic_iso8859_6 =
2636 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2637 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2638 build_string ("ISO8859-6"),
2639 build_string ("ISO8859-6 (Arabic)"),
2640 build_string ("ISO8859-6 (Arabic)"),
2641 build_string ("iso8859-6"),
2643 staticpro (&Vcharset_hebrew_iso8859_8);
2644 Vcharset_hebrew_iso8859_8 =
2645 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2646 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2647 build_string ("ISO8859-8"),
2648 build_string ("ISO8859-8 (Hebrew)"),
2649 build_string ("ISO8859-8 (Hebrew)"),
2650 build_string ("iso8859-8"),
2652 0 /* MIN_CHAR_HEBREW */,
2653 0 /* MAX_CHAR_HEBREW */, 0, 32);
2654 staticpro (&Vcharset_katakana_jisx0201);
2655 Vcharset_katakana_jisx0201 =
2656 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2657 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2658 build_string ("JISX0201 Kana"),
2659 build_string ("JISX0201.1976 (Japanese Kana)"),
2660 build_string ("JISX0201.1976 Japanese Kana"),
2661 build_string ("jisx0201\\.1976"),
2663 staticpro (&Vcharset_latin_jisx0201);
2664 Vcharset_latin_jisx0201 =
2665 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2666 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2667 build_string ("JISX0201 Roman"),
2668 build_string ("JISX0201.1976 (Japanese Roman)"),
2669 build_string ("JISX0201.1976 Japanese Roman"),
2670 build_string ("jisx0201\\.1976"),
2672 staticpro (&Vcharset_cyrillic_iso8859_5);
2673 Vcharset_cyrillic_iso8859_5 =
2674 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2675 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2676 build_string ("ISO8859-5"),
2677 build_string ("ISO8859-5 (Cyrillic)"),
2678 build_string ("ISO8859-5 (Cyrillic)"),
2679 build_string ("iso8859-5"),
2681 staticpro (&Vcharset_latin_iso8859_9);
2682 Vcharset_latin_iso8859_9 =
2683 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2684 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2685 build_string ("Latin-5"),
2686 build_string ("ISO8859-9 (Latin-5)"),
2687 build_string ("ISO8859-9 (Latin-5)"),
2688 build_string ("iso8859-9"),
2690 staticpro (&Vcharset_japanese_jisx0208_1978);
2691 Vcharset_japanese_jisx0208_1978 =
2692 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2693 Qjapanese_jisx0208_1978, 94, 2,
2694 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2695 build_string ("JIS X0208:1978"),
2696 build_string ("JIS X0208:1978 (Japanese)"),
2698 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2699 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2701 staticpro (&Vcharset_chinese_gb2312);
2702 Vcharset_chinese_gb2312 =
2703 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2704 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2705 build_string ("GB2312"),
2706 build_string ("GB2312)"),
2707 build_string ("GB2312 Chinese simplified"),
2708 build_string ("gb2312"),
2710 staticpro (&Vcharset_chinese_gb12345);
2711 Vcharset_chinese_gb12345 =
2712 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2713 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2714 build_string ("G1"),
2715 build_string ("GB 12345)"),
2716 build_string ("GB 12345-1990"),
2717 build_string ("GB12345\\(\\.1990\\)?-0"),
2719 staticpro (&Vcharset_japanese_jisx0208);
2720 Vcharset_japanese_jisx0208 =
2721 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2722 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2723 build_string ("JISX0208"),
2724 build_string ("JIS X0208:1983 (Japanese)"),
2725 build_string ("JIS X0208:1983 Japanese Kanji"),
2726 build_string ("jisx0208\\.1983"),
2729 staticpro (&Vcharset_japanese_jisx0208_1990);
2730 Vcharset_japanese_jisx0208_1990 =
2731 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2732 Qjapanese_jisx0208_1990, 94, 2,
2733 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2734 build_string ("JISX0208-1990"),
2735 build_string ("JIS X0208:1990 (Japanese)"),
2736 build_string ("JIS X0208:1990 Japanese Kanji"),
2737 build_string ("jisx0208\\.1990"),
2739 MIN_CHAR_JIS_X0208_1990,
2740 MAX_CHAR_JIS_X0208_1990, 0, 33);
2742 staticpro (&Vcharset_korean_ksc5601);
2743 Vcharset_korean_ksc5601 =
2744 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2745 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2746 build_string ("KSC5601"),
2747 build_string ("KSC5601 (Korean"),
2748 build_string ("KSC5601 Korean Hangul and Hanja"),
2749 build_string ("ksc5601"),
2751 staticpro (&Vcharset_japanese_jisx0212);
2752 Vcharset_japanese_jisx0212 =
2753 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2754 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2755 build_string ("JISX0212"),
2756 build_string ("JISX0212 (Japanese)"),
2757 build_string ("JISX0212 Japanese Supplement"),
2758 build_string ("jisx0212"),
2761 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2762 staticpro (&Vcharset_chinese_cns11643_1);
2763 Vcharset_chinese_cns11643_1 =
2764 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2765 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2766 build_string ("CNS11643-1"),
2767 build_string ("CNS11643-1 (Chinese traditional)"),
2769 ("CNS 11643 Plane 1 Chinese traditional"),
2770 build_string (CHINESE_CNS_PLANE_RE("1")),
2772 staticpro (&Vcharset_chinese_cns11643_2);
2773 Vcharset_chinese_cns11643_2 =
2774 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2775 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2776 build_string ("CNS11643-2"),
2777 build_string ("CNS11643-2 (Chinese traditional)"),
2779 ("CNS 11643 Plane 2 Chinese traditional"),
2780 build_string (CHINESE_CNS_PLANE_RE("2")),
2783 staticpro (&Vcharset_latin_tcvn5712);
2784 Vcharset_latin_tcvn5712 =
2785 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2786 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2787 build_string ("TCVN 5712"),
2788 build_string ("TCVN 5712 (VSCII-2)"),
2789 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2790 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2792 staticpro (&Vcharset_latin_viscii_lower);
2793 Vcharset_latin_viscii_lower =
2794 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2795 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2796 build_string ("VISCII lower"),
2797 build_string ("VISCII lower (Vietnamese)"),
2798 build_string ("VISCII lower (Vietnamese)"),
2799 build_string ("MULEVISCII-LOWER"),
2801 staticpro (&Vcharset_latin_viscii_upper);
2802 Vcharset_latin_viscii_upper =
2803 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2804 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2805 build_string ("VISCII upper"),
2806 build_string ("VISCII upper (Vietnamese)"),
2807 build_string ("VISCII upper (Vietnamese)"),
2808 build_string ("MULEVISCII-UPPER"),
2810 staticpro (&Vcharset_latin_viscii);
2811 Vcharset_latin_viscii =
2812 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2813 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2814 build_string ("VISCII"),
2815 build_string ("VISCII 1.1 (Vietnamese)"),
2816 build_string ("VISCII 1.1 (Vietnamese)"),
2817 build_string ("VISCII1\\.1"),
2819 staticpro (&Vcharset_chinese_big5);
2820 Vcharset_chinese_big5 =
2821 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2822 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2823 build_string ("Big5"),
2824 build_string ("Big5"),
2825 build_string ("Big5 Chinese traditional"),
2826 build_string ("big5"),
2828 /* 0 */ MIN_CHAR_BIG5_CDP,
2829 /* 0 */ MAX_CHAR_BIG5_CDP, 0, 0);
2830 staticpro (&Vcharset_chinese_big5_cdp);
2831 Vcharset_chinese_big5_cdp =
2832 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2833 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2834 build_string ("Big5-CDP"),
2835 build_string ("Big5 + CDP extension"),
2836 build_string ("Big5 with CDP extension"),
2837 build_string ("big5\\.cdp-0"),
2838 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP, 0, 0);
2839 #define DEF_HANZIKU(n) \
2840 staticpro (&Vcharset_ideograph_hanziku_##n); \
2841 Vcharset_ideograph_hanziku_##n = \
2842 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
2843 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
2844 build_string ("HZK-"#n), \
2845 build_string ("HANZIKU-"#n), \
2846 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
2848 ("hanziku-"#n"$"), \
2849 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, 0, 0);
2862 staticpro (&Vcharset_china3_jef);
2863 Vcharset_china3_jef =
2864 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
2865 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2866 build_string ("JC3"),
2867 build_string ("JEF + CHINA3"),
2868 build_string ("JEF + CHINA3 private characters"),
2869 build_string ("china3jef-0"),
2870 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF, 0, 0);
2871 staticpro (&Vcharset_ideograph_cbeta);
2872 Vcharset_ideograph_cbeta =
2873 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
2874 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2875 build_string ("CB"),
2876 build_string ("CBETA"),
2877 build_string ("CBETA private characters"),
2878 build_string ("cbeta-0"),
2879 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA, 0, 0);
2880 staticpro (&Vcharset_ideograph_gt);
2881 Vcharset_ideograph_gt =
2882 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
2883 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2884 build_string ("GT"),
2885 build_string ("GT"),
2886 build_string ("GT"),
2888 Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0);
2889 #define DEF_GT_PJ(n) \
2890 staticpro (&Vcharset_ideograph_gt_pj_##n); \
2891 Vcharset_ideograph_gt_pj_##n = \
2892 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
2893 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2894 build_string ("GT-PJ-"#n), \
2895 build_string ("GT (pseudo JIS encoding) part "#n), \
2896 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
2898 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
2912 staticpro (&Vcharset_ideograph_daikanwa);
2913 Vcharset_ideograph_daikanwa =
2914 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
2915 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2916 build_string ("Daikanwa"),
2917 build_string ("Morohashi's Daikanwa"),
2918 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
2919 build_string ("Daikanwa"),
2920 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
2921 staticpro (&Vcharset_mojikyo);
2923 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
2924 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2925 build_string ("Mojikyo"),
2926 build_string ("Mojikyo"),
2927 build_string ("Konjaku-Mojikyo"),
2929 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
2930 staticpro (&Vcharset_mojikyo_2022_1);
2931 Vcharset_mojikyo_2022_1 =
2932 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
2933 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
2934 build_string ("Mojikyo-2022-1"),
2935 build_string ("Mojikyo ISO-2022 Part 1"),
2936 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
2940 #define DEF_MOJIKYO_PJ(n) \
2941 staticpro (&Vcharset_mojikyo_pj_##n); \
2942 Vcharset_mojikyo_pj_##n = \
2943 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
2944 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2945 build_string ("Mojikyo-PJ-"#n), \
2946 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
2948 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
2950 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
2962 DEF_MOJIKYO_PJ (10);
2963 DEF_MOJIKYO_PJ (11);
2964 DEF_MOJIKYO_PJ (12);
2965 DEF_MOJIKYO_PJ (13);
2966 DEF_MOJIKYO_PJ (14);
2967 DEF_MOJIKYO_PJ (15);
2968 DEF_MOJIKYO_PJ (16);
2969 DEF_MOJIKYO_PJ (17);
2970 DEF_MOJIKYO_PJ (18);
2971 DEF_MOJIKYO_PJ (19);
2972 DEF_MOJIKYO_PJ (20);
2973 DEF_MOJIKYO_PJ (21);
2975 staticpro (&Vcharset_ethiopic_ucs);
2976 Vcharset_ethiopic_ucs =
2977 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
2978 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2979 build_string ("Ethiopic (UCS)"),
2980 build_string ("Ethiopic (UCS)"),
2981 build_string ("Ethiopic of UCS"),
2982 build_string ("Ethiopic-Unicode"),
2983 Qnil, 0x1200, 0x137F, 0x1200, 0);
2985 staticpro (&Vcharset_chinese_big5_1);
2986 Vcharset_chinese_big5_1 =
2987 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
2988 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
2989 build_string ("Big5"),
2990 build_string ("Big5 (Level-1)"),
2992 ("Big5 Level-1 Chinese traditional"),
2993 build_string ("big5"),
2995 staticpro (&Vcharset_chinese_big5_2);
2996 Vcharset_chinese_big5_2 =
2997 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
2998 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
2999 build_string ("Big5"),
3000 build_string ("Big5 (Level-2)"),
3002 ("Big5 Level-2 Chinese traditional"),
3003 build_string ("big5"),
3006 #ifdef ENABLE_COMPOSITE_CHARS
3007 /* #### For simplicity, we put composite chars into a 96x96 charset.
3008 This is going to lead to problems because you can run out of
3009 room, esp. as we don't yet recycle numbers. */
3010 staticpro (&Vcharset_composite);
3011 Vcharset_composite =
3012 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3013 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3014 build_string ("Composite"),
3015 build_string ("Composite characters"),
3016 build_string ("Composite characters"),
3019 /* #### not dumped properly */
3020 composite_char_row_next = 32;
3021 composite_char_col_next = 32;
3023 Vcomposite_char_string2char_hash_table =
3024 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3025 Vcomposite_char_char2string_hash_table =
3026 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3027 staticpro (&Vcomposite_char_string2char_hash_table);
3028 staticpro (&Vcomposite_char_char2string_hash_table);
3029 #endif /* ENABLE_COMPOSITE_CHARS */