1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_cns;
70 Lisp_Object Vcharset_ucs_jis;
71 Lisp_Object Vcharset_ucs_big5;
72 Lisp_Object Vcharset_latin_viscii;
73 Lisp_Object Vcharset_latin_tcvn5712;
74 Lisp_Object Vcharset_latin_viscii_lower;
75 Lisp_Object Vcharset_latin_viscii_upper;
76 Lisp_Object Vcharset_chinese_big5;
77 Lisp_Object Vcharset_chinese_big5_cdp;
78 Lisp_Object Vcharset_ideograph_hanziku_1;
79 Lisp_Object Vcharset_ideograph_hanziku_2;
80 Lisp_Object Vcharset_ideograph_hanziku_3;
81 Lisp_Object Vcharset_ideograph_hanziku_4;
82 Lisp_Object Vcharset_ideograph_hanziku_5;
83 Lisp_Object Vcharset_ideograph_hanziku_6;
84 Lisp_Object Vcharset_ideograph_hanziku_7;
85 Lisp_Object Vcharset_ideograph_hanziku_8;
86 Lisp_Object Vcharset_ideograph_hanziku_9;
87 Lisp_Object Vcharset_ideograph_hanziku_10;
88 Lisp_Object Vcharset_ideograph_hanziku_11;
89 Lisp_Object Vcharset_ideograph_hanziku_12;
90 Lisp_Object Vcharset_china3_jef;
91 Lisp_Object Vcharset_ideograph_cbeta;
92 Lisp_Object Vcharset_ideograph_gt;
93 Lisp_Object Vcharset_ideograph_gt_pj_1;
94 Lisp_Object Vcharset_ideograph_gt_pj_2;
95 Lisp_Object Vcharset_ideograph_gt_pj_3;
96 Lisp_Object Vcharset_ideograph_gt_pj_4;
97 Lisp_Object Vcharset_ideograph_gt_pj_5;
98 Lisp_Object Vcharset_ideograph_gt_pj_6;
99 Lisp_Object Vcharset_ideograph_gt_pj_7;
100 Lisp_Object Vcharset_ideograph_gt_pj_8;
101 Lisp_Object Vcharset_ideograph_gt_pj_9;
102 Lisp_Object Vcharset_ideograph_gt_pj_10;
103 Lisp_Object Vcharset_ideograph_gt_pj_11;
104 Lisp_Object Vcharset_ideograph_daikanwa;
105 Lisp_Object Vcharset_mojikyo;
106 Lisp_Object Vcharset_mojikyo_2022_1;
107 Lisp_Object Vcharset_mojikyo_pj_1;
108 Lisp_Object Vcharset_mojikyo_pj_2;
109 Lisp_Object Vcharset_mojikyo_pj_3;
110 Lisp_Object Vcharset_mojikyo_pj_4;
111 Lisp_Object Vcharset_mojikyo_pj_5;
112 Lisp_Object Vcharset_mojikyo_pj_6;
113 Lisp_Object Vcharset_mojikyo_pj_7;
114 Lisp_Object Vcharset_mojikyo_pj_8;
115 Lisp_Object Vcharset_mojikyo_pj_9;
116 Lisp_Object Vcharset_mojikyo_pj_10;
117 Lisp_Object Vcharset_mojikyo_pj_11;
118 Lisp_Object Vcharset_mojikyo_pj_12;
119 Lisp_Object Vcharset_mojikyo_pj_13;
120 Lisp_Object Vcharset_mojikyo_pj_14;
121 Lisp_Object Vcharset_mojikyo_pj_15;
122 Lisp_Object Vcharset_mojikyo_pj_16;
123 Lisp_Object Vcharset_mojikyo_pj_17;
124 Lisp_Object Vcharset_mojikyo_pj_18;
125 Lisp_Object Vcharset_mojikyo_pj_19;
126 Lisp_Object Vcharset_mojikyo_pj_20;
127 Lisp_Object Vcharset_mojikyo_pj_21;
128 Lisp_Object Vcharset_ethiopic_ucs;
130 Lisp_Object Vcharset_chinese_big5_1;
131 Lisp_Object Vcharset_chinese_big5_2;
133 #ifdef ENABLE_COMPOSITE_CHARS
134 Lisp_Object Vcharset_composite;
136 /* Hash tables for composite chars. One maps string representing
137 composed chars to their equivalent chars; one goes the
139 Lisp_Object Vcomposite_char_char2string_hash_table;
140 Lisp_Object Vcomposite_char_string2char_hash_table;
142 static int composite_char_row_next;
143 static int composite_char_col_next;
145 #endif /* ENABLE_COMPOSITE_CHARS */
147 struct charset_lookup *chlook;
149 static const struct lrecord_description charset_lookup_description_1[] = {
150 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
159 static const struct struct_description charset_lookup_description = {
160 sizeof (struct charset_lookup),
161 charset_lookup_description_1
165 /* Table of number of bytes in the string representation of a character
166 indexed by the first byte of that representation.
168 rep_bytes_by_first_byte(c) is more efficient than the equivalent
169 canonical computation:
171 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
173 const Bytecount rep_bytes_by_first_byte[0xA0] =
174 { /* 0x00 - 0x7f are for straight ASCII */
175 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
176 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
177 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
178 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
179 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
180 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
181 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
182 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
183 /* 0x80 - 0x8f are for Dimension-1 official charsets */
185 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
187 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
189 /* 0x90 - 0x9d are for Dimension-2 official charsets */
190 /* 0x9e is for Dimension-1 private charsets */
191 /* 0x9f is for Dimension-2 private charsets */
192 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
198 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
200 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
202 /* ad-hoc method for `ascii' */
203 if ((CHARSET_CHARS (cs) == 94) &&
204 (CHARSET_BYTE_OFFSET (cs) != 33))
205 return 128 - CHARSET_BYTE_OFFSET (cs);
207 return CHARSET_CHARS (cs);
210 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
212 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
214 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
218 if (XVECTOR_LENGTH (v) > ccs_len)
221 for (i = 0; i < XVECTOR_LENGTH (v); i++)
223 Lisp_Object c = XVECTOR_DATA(v)[i];
225 if (!NILP (c) && !CHARP (c))
229 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
241 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
244 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
254 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
255 nv = XVECTOR_DATA(v)[i];
261 XVECTOR_DATA(v)[i] = Qnil;
265 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
266 int code_point, Lisp_Object character);
268 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
269 int code_point, Lisp_Object character)
273 int ccs_len = XVECTOR_LENGTH (v);
278 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
279 nv = XVECTOR_DATA(v)[i];
283 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
289 XVECTOR_DATA(v)[i] = character;
293 put_char_ccs_code_point (Lisp_Object character,
294 Lisp_Object ccs, Lisp_Object value)
296 Lisp_Object encoding_table;
298 if (!EQ (XCHARSET_NAME (ccs), Qucs)
299 || (XCHAR (character) != XINT (value)))
301 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
302 int dim = XCHARSET_DIMENSION (ccs);
303 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
304 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
308 { /* obsolete representation: value must be a list of bytes */
309 Lisp_Object ret = Fcar (value);
313 signal_simple_error ("Invalid value for coded-charset", value);
314 code_point = XINT (ret);
315 if (XCHARSET_GRAPHIC (ccs) == 1)
323 signal_simple_error ("Invalid value for coded-charset",
327 signal_simple_error ("Invalid value for coded-charset",
330 if (XCHARSET_GRAPHIC (ccs) == 1)
332 code_point = (code_point << 8) | j;
335 value = make_int (code_point);
337 else if (INTP (value))
339 code_point = XINT (value);
340 if (XCHARSET_GRAPHIC (ccs) == 1)
342 code_point &= 0x7F7F7F7F;
343 value = make_int (code_point);
347 signal_simple_error ("Invalid value for coded-charset", value);
351 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
354 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
359 XCHARSET_DECODING_TABLE (ccs)
360 = v = make_older_vector (ccs_len, Qnil);
363 decoding_table_put_char (v, dim, byte_offset, code_point, character);
365 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
367 XCHARSET_ENCODING_TABLE (ccs)
368 = encoding_table = make_char_id_table (Qnil);
370 put_char_id_table (XCHAR_TABLE(encoding_table), character, value);
375 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
377 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
378 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
380 if (VECTORP (decoding_table))
382 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
386 decoding_table_remove_char (decoding_table,
387 XCHARSET_DIMENSION (ccs),
388 XCHARSET_BYTE_OFFSET (ccs),
392 if (CHAR_TABLEP (encoding_table))
394 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qnil);
402 int leading_code_private_11;
405 Lisp_Object Qcharsetp;
407 /* Qdoc_string, Qdimension, Qchars defined in general.c */
408 Lisp_Object Qregistry, Qfinal, Qgraphic;
409 Lisp_Object Qdirection;
410 Lisp_Object Qreverse_direction_charset;
411 Lisp_Object Qleading_byte;
412 Lisp_Object Qshort_name, Qlong_name;
428 Qjapanese_jisx0208_1978,
432 Qjapanese_jisx0208_1990,
447 Qvietnamese_viscii_lower,
448 Qvietnamese_viscii_upper,
451 Qideograph_hanziku_1,
452 Qideograph_hanziku_2,
453 Qideograph_hanziku_3,
454 Qideograph_hanziku_4,
455 Qideograph_hanziku_5,
456 Qideograph_hanziku_6,
457 Qideograph_hanziku_7,
458 Qideograph_hanziku_8,
459 Qideograph_hanziku_9,
460 Qideograph_hanziku_10,
461 Qideograph_hanziku_11,
462 Qideograph_hanziku_12,
507 Lisp_Object Ql2r, Qr2l;
509 Lisp_Object Vcharset_hash_table;
511 /* Composite characters are characters constructed by overstriking two
512 or more regular characters.
514 1) The old Mule implementation involves storing composite characters
515 in a buffer as a tag followed by all of the actual characters
516 used to make up the composite character. I think this is a bad
517 idea; it greatly complicates code that wants to handle strings
518 one character at a time because it has to deal with the possibility
519 of great big ungainly characters. It's much more reasonable to
520 simply store an index into a table of composite characters.
522 2) The current implementation only allows for 16,384 separate
523 composite characters over the lifetime of the XEmacs process.
524 This could become a potential problem if the user
525 edited lots of different files that use composite characters.
526 Due to FSF bogosity, increasing the number of allowable
527 composite characters under Mule would decrease the number
528 of possible faces that can exist. Mule already has shrunk
529 this to 2048, and further shrinkage would become uncomfortable.
530 No such problems exist in XEmacs.
532 Composite characters could be represented as 0x80 C1 C2 C3,
533 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
534 for slightly under 2^20 (one million) composite characters
535 over the XEmacs process lifetime, and you only need to
536 increase the size of a Mule character from 19 to 21 bits.
537 Or you could use 0x80 C1 C2 C3 C4, allowing for about
538 85 million (slightly over 2^26) composite characters. */
541 /************************************************************************/
542 /* Basic Emchar functions */
543 /************************************************************************/
545 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
546 string in STR. Returns the number of bytes stored.
547 Do not call this directly. Use the macro set_charptr_emchar() instead.
551 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
566 else if ( c <= 0x7ff )
568 *p++ = (c >> 6) | 0xc0;
569 *p++ = (c & 0x3f) | 0x80;
571 else if ( c <= 0xffff )
573 *p++ = (c >> 12) | 0xe0;
574 *p++ = ((c >> 6) & 0x3f) | 0x80;
575 *p++ = (c & 0x3f) | 0x80;
577 else if ( c <= 0x1fffff )
579 *p++ = (c >> 18) | 0xf0;
580 *p++ = ((c >> 12) & 0x3f) | 0x80;
581 *p++ = ((c >> 6) & 0x3f) | 0x80;
582 *p++ = (c & 0x3f) | 0x80;
584 else if ( c <= 0x3ffffff )
586 *p++ = (c >> 24) | 0xf8;
587 *p++ = ((c >> 18) & 0x3f) | 0x80;
588 *p++ = ((c >> 12) & 0x3f) | 0x80;
589 *p++ = ((c >> 6) & 0x3f) | 0x80;
590 *p++ = (c & 0x3f) | 0x80;
594 *p++ = (c >> 30) | 0xfc;
595 *p++ = ((c >> 24) & 0x3f) | 0x80;
596 *p++ = ((c >> 18) & 0x3f) | 0x80;
597 *p++ = ((c >> 12) & 0x3f) | 0x80;
598 *p++ = ((c >> 6) & 0x3f) | 0x80;
599 *p++ = (c & 0x3f) | 0x80;
602 BREAKUP_CHAR (c, charset, c1, c2);
603 lb = CHAR_LEADING_BYTE (c);
604 if (LEADING_BYTE_PRIVATE_P (lb))
605 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
607 if (EQ (charset, Vcharset_control_1))
616 /* Return the first character from a Mule-encoded string in STR,
617 assuming it's non-ASCII. Do not call this directly.
618 Use the macro charptr_emchar() instead. */
621 non_ascii_charptr_emchar (const Bufbyte *str)
634 else if ( b >= 0xf8 )
639 else if ( b >= 0xf0 )
644 else if ( b >= 0xe0 )
649 else if ( b >= 0xc0 )
659 for( ; len > 0; len-- )
662 ch = ( ch << 6 ) | ( b & 0x3f );
666 Bufbyte i0 = *str, i1, i2 = 0;
669 if (i0 == LEADING_BYTE_CONTROL_1)
670 return (Emchar) (*++str - 0x20);
672 if (LEADING_BYTE_PREFIX_P (i0))
677 charset = CHARSET_BY_LEADING_BYTE (i0);
678 if (XCHARSET_DIMENSION (charset) == 2)
681 return MAKE_CHAR (charset, i1, i2);
685 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
686 Do not call this directly. Use the macro valid_char_p() instead. */
690 non_ascii_valid_char_p (Emchar ch)
694 /* Must have only lowest 19 bits set */
698 f1 = CHAR_FIELD1 (ch);
699 f2 = CHAR_FIELD2 (ch);
700 f3 = CHAR_FIELD3 (ch);
706 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
707 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
708 f2 > MAX_CHAR_FIELD2_PRIVATE)
713 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
714 f2 <= MAX_CHAR_FIELD2_PRIVATE))
718 NOTE: This takes advantage of the fact that
719 FIELD2_TO_OFFICIAL_LEADING_BYTE and
720 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
722 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
723 if (EQ (charset, Qnil))
725 return (XCHARSET_CHARS (charset) == 96);
731 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
732 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
733 f1 > MAX_CHAR_FIELD1_PRIVATE)
735 if (f2 < 0x20 || f3 < 0x20)
738 #ifdef ENABLE_COMPOSITE_CHARS
739 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
741 if (UNBOUNDP (Fgethash (make_int (ch),
742 Vcomposite_char_char2string_hash_table,
747 #endif /* ENABLE_COMPOSITE_CHARS */
749 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
750 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
753 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
755 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
758 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
760 if (EQ (charset, Qnil))
762 return (XCHARSET_CHARS (charset) == 96);
768 /************************************************************************/
769 /* Basic string functions */
770 /************************************************************************/
772 /* Copy the character pointed to by SRC into DST. Do not call this
773 directly. Use the macro charptr_copy_char() instead.
774 Return the number of bytes copied. */
777 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
779 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
781 for (i = bytes; i; i--, dst++, src++)
787 /************************************************************************/
788 /* streams of Emchars */
789 /************************************************************************/
791 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
792 The functions below are not meant to be called directly; use
793 the macros in insdel.h. */
796 Lstream_get_emchar_1 (Lstream *stream, int ch)
798 Bufbyte str[MAX_EMCHAR_LEN];
799 Bufbyte *strptr = str;
802 str[0] = (Bufbyte) ch;
804 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
806 int c = Lstream_getc (stream);
807 bufpos_checking_assert (c >= 0);
808 *++strptr = (Bufbyte) c;
810 return charptr_emchar (str);
814 Lstream_fput_emchar (Lstream *stream, Emchar ch)
816 Bufbyte str[MAX_EMCHAR_LEN];
817 Bytecount len = set_charptr_emchar (str, ch);
818 return Lstream_write (stream, str, len);
822 Lstream_funget_emchar (Lstream *stream, Emchar ch)
824 Bufbyte str[MAX_EMCHAR_LEN];
825 Bytecount len = set_charptr_emchar (str, ch);
826 Lstream_unread (stream, str, len);
830 /************************************************************************/
832 /************************************************************************/
835 mark_charset (Lisp_Object obj)
837 Lisp_Charset *cs = XCHARSET (obj);
839 mark_object (cs->short_name);
840 mark_object (cs->long_name);
841 mark_object (cs->doc_string);
842 mark_object (cs->registry);
843 mark_object (cs->ccl_program);
845 mark_object (cs->encoding_table);
846 /* mark_object (cs->decoding_table); */
852 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
854 Lisp_Charset *cs = XCHARSET (obj);
858 error ("printing unreadable object #<charset %s 0x%x>",
859 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
862 write_c_string ("#<charset ", printcharfun);
863 print_internal (CHARSET_NAME (cs), printcharfun, 0);
864 write_c_string (" ", printcharfun);
865 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
866 write_c_string (" ", printcharfun);
867 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
868 write_c_string (" ", printcharfun);
869 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
870 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
872 CHARSET_DIMENSION (cs),
873 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
874 CHARSET_COLUMNS (cs),
875 CHARSET_GRAPHIC (cs),
877 write_c_string (buf, printcharfun);
878 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
879 sprintf (buf, " 0x%x>", cs->header.uid);
880 write_c_string (buf, printcharfun);
883 static const struct lrecord_description charset_description[] = {
884 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
885 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
886 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
887 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
888 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
889 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
890 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
892 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
893 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
898 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
899 mark_charset, print_charset, 0, 0, 0,
903 /* Make a new charset. */
904 /* #### SJT Should generic properties be allowed? */
906 make_charset (Charset_ID id, Lisp_Object name,
907 unsigned short chars, unsigned char dimension,
908 unsigned char columns, unsigned char graphic,
909 Bufbyte final, unsigned char direction, Lisp_Object short_name,
910 Lisp_Object long_name, Lisp_Object doc,
912 Lisp_Object decoding_table,
913 Emchar ucs_min, Emchar ucs_max,
914 Emchar code_offset, unsigned char byte_offset)
917 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
921 XSETCHARSET (obj, cs);
923 CHARSET_ID (cs) = id;
924 CHARSET_NAME (cs) = name;
925 CHARSET_SHORT_NAME (cs) = short_name;
926 CHARSET_LONG_NAME (cs) = long_name;
927 CHARSET_CHARS (cs) = chars;
928 CHARSET_DIMENSION (cs) = dimension;
929 CHARSET_DIRECTION (cs) = direction;
930 CHARSET_COLUMNS (cs) = columns;
931 CHARSET_GRAPHIC (cs) = graphic;
932 CHARSET_FINAL (cs) = final;
933 CHARSET_DOC_STRING (cs) = doc;
934 CHARSET_REGISTRY (cs) = reg;
935 CHARSET_CCL_PROGRAM (cs) = Qnil;
936 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
938 CHARSET_DECODING_TABLE(cs) = Qnil;
939 CHARSET_ENCODING_TABLE(cs) = Qnil;
940 CHARSET_UCS_MIN(cs) = ucs_min;
941 CHARSET_UCS_MAX(cs) = ucs_max;
942 CHARSET_CODE_OFFSET(cs) = code_offset;
943 CHARSET_BYTE_OFFSET(cs) = byte_offset;
947 if (id == LEADING_BYTE_ASCII)
948 CHARSET_REP_BYTES (cs) = 1;
950 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
952 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
957 /* some charsets do not have final characters. This includes
958 ASCII, Control-1, Composite, and the two faux private
960 unsigned char iso2022_type
961 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
963 if (code_offset == 0)
965 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
966 chlook->charset_by_attributes[iso2022_type][final] = obj;
970 (chlook->charset_by_attributes[iso2022_type][final][direction]));
971 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
975 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
976 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
978 /* Some charsets are "faux" and don't have names or really exist at
979 all except in the leading-byte table. */
981 Fputhash (name, obj, Vcharset_hash_table);
986 get_unallocated_leading_byte (int dimension)
991 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
994 lb = chlook->next_allocated_leading_byte++;
998 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1001 lb = chlook->next_allocated_1_byte_leading_byte++;
1005 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1008 lb = chlook->next_allocated_2_byte_leading_byte++;
1014 ("No more character sets free for this dimension",
1015 make_int (dimension));
1021 /* Number of Big5 characters which have the same code in 1st byte. */
1023 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
1026 decode_builtin_char (Lisp_Object charset, int code_point)
1030 if (EQ (charset, Vcharset_chinese_big5))
1032 int c1 = code_point >> 8;
1033 int c2 = code_point & 0xFF;
1036 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
1038 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
1039 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
1041 I = (c1 - 0xA1) * BIG5_SAME_ROW
1042 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
1046 charset = Vcharset_chinese_big5_1;
1050 charset = Vcharset_chinese_big5_2;
1051 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
1053 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
1056 if ((final = XCHARSET_FINAL (charset)) >= '0')
1058 if (XCHARSET_DIMENSION (charset) == 1)
1060 switch (XCHARSET_CHARS (charset))
1064 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
1067 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
1075 switch (XCHARSET_CHARS (charset))
1078 return MIN_CHAR_94x94
1079 + (final - '0') * 94 * 94
1080 + (((code_point >> 8) & 0x7F) - 33) * 94
1081 + ((code_point & 0x7F) - 33);
1083 return MIN_CHAR_96x96
1084 + (final - '0') * 96 * 96
1085 + (((code_point >> 8) & 0x7F) - 32) * 96
1086 + ((code_point & 0x7F) - 32);
1093 else if (XCHARSET_UCS_MAX (charset))
1096 = (XCHARSET_DIMENSION (charset) == 1
1098 code_point - XCHARSET_BYTE_OFFSET (charset)
1100 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
1101 * XCHARSET_CHARS (charset)
1102 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
1103 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
1104 if ((cid < XCHARSET_UCS_MIN (charset))
1105 || (XCHARSET_UCS_MAX (charset) < cid))
1114 range_charset_code_point (Lisp_Object charset, Emchar ch)
1118 if ((XCHARSET_UCS_MIN (charset) <= ch)
1119 && (ch <= XCHARSET_UCS_MAX (charset)))
1121 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1123 if (XCHARSET_CHARS (charset) == 256)
1125 else if (XCHARSET_DIMENSION (charset) == 1)
1126 return d + XCHARSET_BYTE_OFFSET (charset);
1127 else if (XCHARSET_DIMENSION (charset) == 2)
1129 ((d / XCHARSET_CHARS (charset)
1130 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1131 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1132 else if (XCHARSET_DIMENSION (charset) == 3)
1134 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1135 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1136 | ((d / XCHARSET_CHARS (charset)
1137 % XCHARSET_CHARS (charset)
1138 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1139 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1140 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1142 ((d / (XCHARSET_CHARS (charset)
1143 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1144 + XCHARSET_BYTE_OFFSET (charset)) << 24)
1145 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
1146 % XCHARSET_CHARS (charset)
1147 + XCHARSET_BYTE_OFFSET (charset)) << 16)
1148 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
1149 + XCHARSET_BYTE_OFFSET (charset)) << 8)
1150 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
1152 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1154 if (XCHARSET_DIMENSION (charset) == 1)
1156 if (XCHARSET_CHARS (charset) == 94)
1158 if (((d = ch - (MIN_CHAR_94
1159 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1163 else if (XCHARSET_CHARS (charset) == 96)
1165 if (((d = ch - (MIN_CHAR_96
1166 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1173 else if (XCHARSET_DIMENSION (charset) == 2)
1175 if (XCHARSET_CHARS (charset) == 94)
1177 if (((d = ch - (MIN_CHAR_94x94
1178 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1181 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1183 else if (XCHARSET_CHARS (charset) == 96)
1185 if (((d = ch - (MIN_CHAR_96x96
1186 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1189 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1195 if (EQ (charset, Vcharset_mojikyo_2022_1)
1196 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
1198 int m = ch - MIN_CHAR_MOJIKYO - 1;
1199 int byte1 = m / (94 * 60) + 33;
1200 int byte2 = (m % (94 * 60)) / 94;
1201 int byte3 = m % 94 + 33;
1207 return (byte1 << 16) | (byte2 << 8) | byte3;
1213 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1215 if (c <= MAX_CHAR_BASIC_LATIN)
1217 *charset = Vcharset_ascii;
1222 *charset = Vcharset_control_1;
1227 *charset = Vcharset_latin_iso8859_1;
1231 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1233 *charset = Vcharset_hebrew_iso8859_8;
1234 return c - MIN_CHAR_HEBREW + 0x20;
1237 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1239 *charset = Vcharset_thai_tis620;
1240 return c - MIN_CHAR_THAI + 0x20;
1243 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1244 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1246 return list2 (Vcharset_katakana_jisx0201,
1247 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1250 else if (c <= MAX_CHAR_BMP)
1252 *charset = Vcharset_ucs_bmp;
1255 else if (c < MIN_CHAR_DAIKANWA)
1257 *charset = Vcharset_ucs;
1260 else if (c <= MAX_CHAR_DAIKANWA)
1262 *charset = Vcharset_ideograph_daikanwa;
1263 return c - MIN_CHAR_DAIKANWA;
1266 else if (c <= MAX_CHAR_MOJIKYO_0)
1268 *charset = Vcharset_mojikyo;
1269 return c - MIN_CHAR_MOJIKYO_0;
1272 else if (c < MIN_CHAR_94)
1274 *charset = Vcharset_ucs;
1277 else if (c <= MAX_CHAR_94)
1279 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1280 ((c - MIN_CHAR_94) / 94) + '0',
1281 CHARSET_LEFT_TO_RIGHT);
1282 if (!NILP (*charset))
1283 return ((c - MIN_CHAR_94) % 94) + 33;
1286 *charset = Vcharset_ucs;
1290 else if (c <= MAX_CHAR_96)
1292 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1293 ((c - MIN_CHAR_96) / 96) + '0',
1294 CHARSET_LEFT_TO_RIGHT);
1295 if (!NILP (*charset))
1296 return ((c - MIN_CHAR_96) % 96) + 32;
1299 *charset = Vcharset_ucs;
1303 else if (c <= MAX_CHAR_94x94)
1306 = CHARSET_BY_ATTRIBUTES (94, 2,
1307 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1308 CHARSET_LEFT_TO_RIGHT);
1309 if (!NILP (*charset))
1310 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1311 | (((c - MIN_CHAR_94x94) % 94) + 33);
1314 *charset = Vcharset_ucs;
1318 else if (c <= MAX_CHAR_96x96)
1321 = CHARSET_BY_ATTRIBUTES (96, 2,
1322 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1323 CHARSET_LEFT_TO_RIGHT);
1324 if (!NILP (*charset))
1325 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1326 | (((c - MIN_CHAR_96x96) % 96) + 32);
1329 *charset = Vcharset_ucs;
1334 else if (c < MIN_CHAR_MOJIKYO)
1336 *charset = Vcharset_ucs;
1339 else if (c <= MAX_CHAR_MOJIKYO)
1341 *charset = Vcharset_mojikyo;
1342 return c - MIN_CHAR_MOJIKYO;
1344 else if (c < MIN_CHAR_CHINA3_JEF)
1346 *charset = Vcharset_ucs;
1349 else if (c <= MAX_CHAR_CHINA3_JEF)
1351 *charset = Vcharset_china3_jef;
1352 return c - MIN_CHAR_CHINA3_JEF;
1354 else if (c <= MAX_CHAR_CBETA)
1356 *charset = Vcharset_ideograph_cbeta;
1357 return c - MIN_CHAR_CBETA;
1362 *charset = Vcharset_ucs;
1367 Lisp_Object Vdefault_coded_charset_priority_list;
1371 /************************************************************************/
1372 /* Basic charset Lisp functions */
1373 /************************************************************************/
1375 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1376 Return non-nil if OBJECT is a charset.
1380 return CHARSETP (object) ? Qt : Qnil;
1383 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1384 Retrieve the charset of the given name.
1385 If CHARSET-OR-NAME is a charset object, it is simply returned.
1386 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1387 nil is returned. Otherwise the associated charset object is returned.
1391 if (CHARSETP (charset_or_name))
1392 return charset_or_name;
1394 CHECK_SYMBOL (charset_or_name);
1395 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1398 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1399 Retrieve the charset of the given name.
1400 Same as `find-charset' except an error is signalled if there is no such
1401 charset instead of returning nil.
1405 Lisp_Object charset = Ffind_charset (name);
1408 signal_simple_error ("No such charset", name);
1412 /* We store the charsets in hash tables with the names as the key and the
1413 actual charset object as the value. Occasionally we need to use them
1414 in a list format. These routines provide us with that. */
1415 struct charset_list_closure
1417 Lisp_Object *charset_list;
1421 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1422 void *charset_list_closure)
1424 /* This function can GC */
1425 struct charset_list_closure *chcl =
1426 (struct charset_list_closure*) charset_list_closure;
1427 Lisp_Object *charset_list = chcl->charset_list;
1429 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1433 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1434 Return a list of the names of all defined charsets.
1438 Lisp_Object charset_list = Qnil;
1439 struct gcpro gcpro1;
1440 struct charset_list_closure charset_list_closure;
1442 GCPRO1 (charset_list);
1443 charset_list_closure.charset_list = &charset_list;
1444 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1445 &charset_list_closure);
1448 return charset_list;
1451 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1452 Return the name of charset CHARSET.
1456 return XCHARSET_NAME (Fget_charset (charset));
1459 /* #### SJT Should generic properties be allowed? */
1460 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1461 Define a new character set.
1462 This function is for use with Mule support.
1463 NAME is a symbol, the name by which the character set is normally referred.
1464 DOC-STRING is a string describing the character set.
1465 PROPS is a property list, describing the specific nature of the
1466 character set. Recognized properties are:
1468 'short-name Short version of the charset name (ex: Latin-1)
1469 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1470 'registry A regular expression matching the font registry field for
1472 'dimension Number of octets used to index a character in this charset.
1473 Either 1 or 2. Defaults to 1.
1474 'columns Number of columns used to display a character in this charset.
1475 Only used in TTY mode. (Under X, the actual width of a
1476 character can be derived from the font used to display the
1477 characters.) If unspecified, defaults to the dimension
1478 (this is almost always the correct value).
1479 'chars Number of characters in each dimension (94 or 96).
1480 Defaults to 94. Note that if the dimension is 2, the
1481 character set thus described is 94x94 or 96x96.
1482 'final Final byte of ISO 2022 escape sequence. Must be
1483 supplied. Each combination of (DIMENSION, CHARS) defines a
1484 separate namespace for final bytes. Note that ISO
1485 2022 restricts the final byte to the range
1486 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1487 dimension == 2. Note also that final bytes in the range
1488 0x30 - 0x3F are reserved for user-defined (not official)
1490 'graphic 0 (use left half of font on output) or 1 (use right half
1491 of font on output). Defaults to 0. For example, for
1492 a font whose registry is ISO8859-1, the left half
1493 (octets 0x20 - 0x7F) is the `ascii' character set, while
1494 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1495 character set. With 'graphic set to 0, the octets
1496 will have their high bit cleared; with it set to 1,
1497 the octets will have their high bit set.
1498 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1500 'ccl-program A compiled CCL program used to convert a character in
1501 this charset into an index into the font. This is in
1502 addition to the 'graphic property. The CCL program
1503 is passed the octets of the character, with the high
1504 bit cleared and set depending upon whether the value
1505 of the 'graphic property is 0 or 1.
1507 (name, doc_string, props))
1509 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1510 int direction = CHARSET_LEFT_TO_RIGHT;
1511 Lisp_Object registry = Qnil;
1512 Lisp_Object charset;
1513 Lisp_Object ccl_program = Qnil;
1514 Lisp_Object short_name = Qnil, long_name = Qnil;
1515 int byte_offset = -1;
1517 CHECK_SYMBOL (name);
1518 if (!NILP (doc_string))
1519 CHECK_STRING (doc_string);
1521 charset = Ffind_charset (name);
1522 if (!NILP (charset))
1523 signal_simple_error ("Cannot redefine existing charset", name);
1526 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1528 if (EQ (keyword, Qshort_name))
1530 CHECK_STRING (value);
1534 if (EQ (keyword, Qlong_name))
1536 CHECK_STRING (value);
1540 else if (EQ (keyword, Qdimension))
1543 dimension = XINT (value);
1544 if (dimension < 1 || dimension > 2)
1545 signal_simple_error ("Invalid value for 'dimension", value);
1548 else if (EQ (keyword, Qchars))
1551 chars = XINT (value);
1552 if (chars != 94 && chars != 96)
1553 signal_simple_error ("Invalid value for 'chars", value);
1556 else if (EQ (keyword, Qcolumns))
1559 columns = XINT (value);
1560 if (columns != 1 && columns != 2)
1561 signal_simple_error ("Invalid value for 'columns", value);
1564 else if (EQ (keyword, Qgraphic))
1567 graphic = XINT (value);
1569 if (graphic < 0 || graphic > 2)
1571 if (graphic < 0 || graphic > 1)
1573 signal_simple_error ("Invalid value for 'graphic", value);
1576 else if (EQ (keyword, Qregistry))
1578 CHECK_STRING (value);
1582 else if (EQ (keyword, Qdirection))
1584 if (EQ (value, Ql2r))
1585 direction = CHARSET_LEFT_TO_RIGHT;
1586 else if (EQ (value, Qr2l))
1587 direction = CHARSET_RIGHT_TO_LEFT;
1589 signal_simple_error ("Invalid value for 'direction", value);
1592 else if (EQ (keyword, Qfinal))
1594 CHECK_CHAR_COERCE_INT (value);
1595 final = XCHAR (value);
1596 if (final < '0' || final > '~')
1597 signal_simple_error ("Invalid value for 'final", value);
1600 else if (EQ (keyword, Qccl_program))
1602 struct ccl_program test_ccl;
1604 if (setup_ccl_program (&test_ccl, value) < 0)
1605 signal_simple_error ("Invalid value for 'ccl-program", value);
1606 ccl_program = value;
1610 signal_simple_error ("Unrecognized property", keyword);
1615 error ("'final must be specified");
1616 if (dimension == 2 && final > 0x5F)
1618 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1621 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1622 CHARSET_LEFT_TO_RIGHT)) ||
1623 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1624 CHARSET_RIGHT_TO_LEFT)))
1626 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1628 id = get_unallocated_leading_byte (dimension);
1630 if (NILP (doc_string))
1631 doc_string = build_string ("");
1633 if (NILP (registry))
1634 registry = build_string ("");
1636 if (NILP (short_name))
1637 XSETSTRING (short_name, XSYMBOL (name)->name);
1639 if (NILP (long_name))
1640 long_name = doc_string;
1643 columns = dimension;
1645 if (byte_offset < 0)
1649 else if (chars == 96)
1655 charset = make_charset (id, name, chars, dimension, columns, graphic,
1656 final, direction, short_name, long_name,
1657 doc_string, registry,
1658 Qnil, 0, 0, 0, byte_offset);
1659 if (!NILP (ccl_program))
1660 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1664 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1666 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1667 NEW-NAME is the name of the new charset. Return the new charset.
1669 (charset, new_name))
1671 Lisp_Object new_charset = Qnil;
1672 int id, chars, dimension, columns, graphic, final;
1674 Lisp_Object registry, doc_string, short_name, long_name;
1677 charset = Fget_charset (charset);
1678 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1679 signal_simple_error ("Charset already has reverse-direction charset",
1682 CHECK_SYMBOL (new_name);
1683 if (!NILP (Ffind_charset (new_name)))
1684 signal_simple_error ("Cannot redefine existing charset", new_name);
1686 cs = XCHARSET (charset);
1688 chars = CHARSET_CHARS (cs);
1689 dimension = CHARSET_DIMENSION (cs);
1690 columns = CHARSET_COLUMNS (cs);
1691 id = get_unallocated_leading_byte (dimension);
1693 graphic = CHARSET_GRAPHIC (cs);
1694 final = CHARSET_FINAL (cs);
1695 direction = CHARSET_RIGHT_TO_LEFT;
1696 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1697 direction = CHARSET_LEFT_TO_RIGHT;
1698 doc_string = CHARSET_DOC_STRING (cs);
1699 short_name = CHARSET_SHORT_NAME (cs);
1700 long_name = CHARSET_LONG_NAME (cs);
1701 registry = CHARSET_REGISTRY (cs);
1703 new_charset = make_charset (id, new_name, chars, dimension, columns,
1704 graphic, final, direction, short_name, long_name,
1705 doc_string, registry,
1707 CHARSET_DECODING_TABLE(cs),
1708 CHARSET_UCS_MIN(cs),
1709 CHARSET_UCS_MAX(cs),
1710 CHARSET_CODE_OFFSET(cs),
1711 CHARSET_BYTE_OFFSET(cs)
1717 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1718 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1723 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1724 Define symbol ALIAS as an alias for CHARSET.
1728 CHECK_SYMBOL (alias);
1729 charset = Fget_charset (charset);
1730 return Fputhash (alias, charset, Vcharset_hash_table);
1733 /* #### Reverse direction charsets not yet implemented. */
1735 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1737 Return the reverse-direction charset parallel to CHARSET, if any.
1738 This is the charset with the same properties (in particular, the same
1739 dimension, number of characters per dimension, and final byte) as
1740 CHARSET but whose characters are displayed in the opposite direction.
1744 charset = Fget_charset (charset);
1745 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1749 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1750 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1751 If DIRECTION is omitted, both directions will be checked (left-to-right
1752 will be returned if character sets exist for both directions).
1754 (dimension, chars, final, direction))
1756 int dm, ch, fi, di = -1;
1757 Lisp_Object obj = Qnil;
1759 CHECK_INT (dimension);
1760 dm = XINT (dimension);
1761 if (dm < 1 || dm > 2)
1762 signal_simple_error ("Invalid value for DIMENSION", dimension);
1766 if (ch != 94 && ch != 96)
1767 signal_simple_error ("Invalid value for CHARS", chars);
1769 CHECK_CHAR_COERCE_INT (final);
1771 if (fi < '0' || fi > '~')
1772 signal_simple_error ("Invalid value for FINAL", final);
1774 if (EQ (direction, Ql2r))
1775 di = CHARSET_LEFT_TO_RIGHT;
1776 else if (EQ (direction, Qr2l))
1777 di = CHARSET_RIGHT_TO_LEFT;
1778 else if (!NILP (direction))
1779 signal_simple_error ("Invalid value for DIRECTION", direction);
1781 if (dm == 2 && fi > 0x5F)
1783 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1787 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1789 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1792 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1795 return XCHARSET_NAME (obj);
1799 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1800 Return short name of CHARSET.
1804 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1807 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1808 Return long name of CHARSET.
1812 return XCHARSET_LONG_NAME (Fget_charset (charset));
1815 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1816 Return description of CHARSET.
1820 return XCHARSET_DOC_STRING (Fget_charset (charset));
1823 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1824 Return dimension of CHARSET.
1828 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1831 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1832 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1833 Recognized properties are those listed in `make-charset', as well as
1834 'name and 'doc-string.
1840 charset = Fget_charset (charset);
1841 cs = XCHARSET (charset);
1843 CHECK_SYMBOL (prop);
1844 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1845 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1846 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1847 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1848 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1849 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1850 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1851 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1852 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1853 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1854 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1855 if (EQ (prop, Qdirection))
1856 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1857 if (EQ (prop, Qreverse_direction_charset))
1859 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1860 /* #### Is this translation OK? If so, error checking sufficient? */
1861 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1863 signal_simple_error ("Unrecognized charset property name", prop);
1864 return Qnil; /* not reached */
1867 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1868 Return charset identification number of CHARSET.
1872 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1875 /* #### We need to figure out which properties we really want to
1878 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1879 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1881 (charset, ccl_program))
1883 struct ccl_program test_ccl;
1885 charset = Fget_charset (charset);
1886 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1887 signal_simple_error ("Invalid ccl-program", ccl_program);
1888 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1893 invalidate_charset_font_caches (Lisp_Object charset)
1895 /* Invalidate font cache entries for charset on all devices. */
1896 Lisp_Object devcons, concons, hash_table;
1897 DEVICE_LOOP_NO_BREAK (devcons, concons)
1899 struct device *d = XDEVICE (XCAR (devcons));
1900 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1901 if (!UNBOUNDP (hash_table))
1902 Fclrhash (hash_table);
1906 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1907 Set the 'registry property of CHARSET to REGISTRY.
1909 (charset, registry))
1911 charset = Fget_charset (charset);
1912 CHECK_STRING (registry);
1913 XCHARSET_REGISTRY (charset) = registry;
1914 invalidate_charset_font_caches (charset);
1915 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1920 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1921 Return mapping-table of CHARSET.
1925 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1928 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1929 Set mapping-table of CHARSET to TABLE.
1933 struct Lisp_Charset *cs;
1937 charset = Fget_charset (charset);
1938 cs = XCHARSET (charset);
1942 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
1943 make_vector_newer (CHARSET_DECODING_TABLE(cs));
1944 CHARSET_DECODING_TABLE(cs) = Qnil;
1947 else if (VECTORP (table))
1949 int ccs_len = CHARSET_BYTE_SIZE (cs);
1950 int ret = decoding_table_check_elements (table,
1951 CHARSET_DIMENSION (cs),
1956 signal_simple_error ("Too big table", table);
1958 signal_simple_error ("Invalid element is found", table);
1960 signal_simple_error ("Something wrong", table);
1962 CHARSET_DECODING_TABLE(cs) = Qnil;
1965 signal_error (Qwrong_type_argument,
1966 list2 (build_translated_string ("vector-or-nil-p"),
1969 byte_offset = CHARSET_BYTE_OFFSET (cs);
1970 switch (CHARSET_DIMENSION (cs))
1973 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1975 Lisp_Object c = XVECTOR_DATA(table)[i];
1978 put_char_ccs_code_point (c, charset,
1979 make_int (i + byte_offset));
1983 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1985 Lisp_Object v = XVECTOR_DATA(table)[i];
1991 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1993 Lisp_Object c = XVECTOR_DATA(v)[j];
1996 put_char_ccs_code_point
1998 make_int ( ( (i + byte_offset) << 8 )
2004 put_char_ccs_code_point (v, charset,
2005 make_int (i + byte_offset));
2014 /************************************************************************/
2015 /* Lisp primitives for working with characters */
2016 /************************************************************************/
2019 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
2020 Make a character from CHARSET and code-point CODE.
2021 If DEFINED_ONLY is non-nil, builtin character is not returned.
2022 If corresponding character is not found, nil is returned.
2024 (charset, code, defined_only))
2028 charset = Fget_charset (charset);
2031 if (XCHARSET_GRAPHIC (charset) == 1)
2033 if (NILP (defined_only))
2034 c = DECODE_CHAR (charset, c);
2036 c = DECODE_DEFINED_CHAR (charset, c);
2037 return c >= 0 ? make_char (c) : Qnil;
2040 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2041 Make a builtin character from CHARSET and code-point CODE.
2047 charset = Fget_charset (charset);
2049 if (EQ (charset, Vcharset_latin_viscii))
2051 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2057 (ret = Fget_char_attribute (chr,
2058 Vcharset_latin_viscii_lower,
2061 charset = Vcharset_latin_viscii_lower;
2065 (ret = Fget_char_attribute (chr,
2066 Vcharset_latin_viscii_upper,
2069 charset = Vcharset_latin_viscii_upper;
2076 if (XCHARSET_GRAPHIC (charset) == 1)
2079 c = decode_builtin_char (charset, c);
2080 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2084 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2085 Make a character from CHARSET and octets ARG1 and ARG2.
2086 ARG2 is required only for characters from two-dimensional charsets.
2087 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2088 character s with caron.
2090 (charset, arg1, arg2))
2094 int lowlim, highlim;
2096 charset = Fget_charset (charset);
2097 cs = XCHARSET (charset);
2099 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2100 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2102 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2104 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2105 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2108 /* It is useful (and safe, according to Olivier Galibert) to strip
2109 the 8th bit off ARG1 and ARG2 because it allows programmers to
2110 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2111 Latin 2 code of the character. */
2119 if (a1 < lowlim || a1 > highlim)
2120 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2122 if (CHARSET_DIMENSION (cs) == 1)
2126 ("Charset is of dimension one; second octet must be nil", arg2);
2127 return make_char (MAKE_CHAR (charset, a1, 0));
2136 a2 = XINT (arg2) & 0x7f;
2138 if (a2 < lowlim || a2 > highlim)
2139 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2141 return make_char (MAKE_CHAR (charset, a1, a2));
2144 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2145 Return the character set of CHARACTER.
2149 CHECK_CHAR_COERCE_INT (character);
2151 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2154 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2155 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2156 N defaults to 0 if omitted.
2160 Lisp_Object charset;
2163 CHECK_CHAR_COERCE_INT (character);
2165 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2167 if (NILP (n) || EQ (n, Qzero))
2168 return make_int (octet0);
2169 else if (EQ (n, make_int (1)))
2170 return make_int (octet1);
2172 signal_simple_error ("Octet number must be 0 or 1", n);
2175 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2176 Return list of charset and one or two position-codes of CHARACTER.
2180 /* This function can GC */
2181 struct gcpro gcpro1, gcpro2;
2182 Lisp_Object charset = Qnil;
2183 Lisp_Object rc = Qnil;
2191 GCPRO2 (charset, rc);
2192 CHECK_CHAR_COERCE_INT (character);
2195 code_point = ENCODE_CHAR (XCHAR (character), charset);
2196 dimension = XCHARSET_DIMENSION (charset);
2197 while (dimension > 0)
2199 rc = Fcons (make_int (code_point & 255), rc);
2203 rc = Fcons (XCHARSET_NAME (charset), rc);
2205 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2207 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2209 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2213 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2222 #ifdef ENABLE_COMPOSITE_CHARS
2223 /************************************************************************/
2224 /* composite character functions */
2225 /************************************************************************/
2228 lookup_composite_char (Bufbyte *str, int len)
2230 Lisp_Object lispstr = make_string (str, len);
2231 Lisp_Object ch = Fgethash (lispstr,
2232 Vcomposite_char_string2char_hash_table,
2238 if (composite_char_row_next >= 128)
2239 signal_simple_error ("No more composite chars available", lispstr);
2240 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2241 composite_char_col_next);
2242 Fputhash (make_char (emch), lispstr,
2243 Vcomposite_char_char2string_hash_table);
2244 Fputhash (lispstr, make_char (emch),
2245 Vcomposite_char_string2char_hash_table);
2246 composite_char_col_next++;
2247 if (composite_char_col_next >= 128)
2249 composite_char_col_next = 32;
2250 composite_char_row_next++;
2259 composite_char_string (Emchar ch)
2261 Lisp_Object str = Fgethash (make_char (ch),
2262 Vcomposite_char_char2string_hash_table,
2264 assert (!UNBOUNDP (str));
2268 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2269 Convert a string into a single composite character.
2270 The character is the result of overstriking all the characters in
2275 CHECK_STRING (string);
2276 return make_char (lookup_composite_char (XSTRING_DATA (string),
2277 XSTRING_LENGTH (string)));
2280 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2281 Return a string of the characters comprising a composite character.
2289 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2290 signal_simple_error ("Must be composite char", ch);
2291 return composite_char_string (emch);
2293 #endif /* ENABLE_COMPOSITE_CHARS */
2296 /************************************************************************/
2297 /* initialization */
2298 /************************************************************************/
2301 syms_of_mule_charset (void)
2303 INIT_LRECORD_IMPLEMENTATION (charset);
2305 DEFSUBR (Fcharsetp);
2306 DEFSUBR (Ffind_charset);
2307 DEFSUBR (Fget_charset);
2308 DEFSUBR (Fcharset_list);
2309 DEFSUBR (Fcharset_name);
2310 DEFSUBR (Fmake_charset);
2311 DEFSUBR (Fmake_reverse_direction_charset);
2312 /* DEFSUBR (Freverse_direction_charset); */
2313 DEFSUBR (Fdefine_charset_alias);
2314 DEFSUBR (Fcharset_from_attributes);
2315 DEFSUBR (Fcharset_short_name);
2316 DEFSUBR (Fcharset_long_name);
2317 DEFSUBR (Fcharset_description);
2318 DEFSUBR (Fcharset_dimension);
2319 DEFSUBR (Fcharset_property);
2320 DEFSUBR (Fcharset_id);
2321 DEFSUBR (Fset_charset_ccl_program);
2322 DEFSUBR (Fset_charset_registry);
2324 DEFSUBR (Fcharset_mapping_table);
2325 DEFSUBR (Fset_charset_mapping_table);
2329 DEFSUBR (Fdecode_char);
2330 DEFSUBR (Fdecode_builtin_char);
2332 DEFSUBR (Fmake_char);
2333 DEFSUBR (Fchar_charset);
2334 DEFSUBR (Fchar_octet);
2335 DEFSUBR (Fsplit_char);
2337 #ifdef ENABLE_COMPOSITE_CHARS
2338 DEFSUBR (Fmake_composite_char);
2339 DEFSUBR (Fcomposite_char_string);
2342 defsymbol (&Qcharsetp, "charsetp");
2343 defsymbol (&Qregistry, "registry");
2344 defsymbol (&Qfinal, "final");
2345 defsymbol (&Qgraphic, "graphic");
2346 defsymbol (&Qdirection, "direction");
2347 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2348 defsymbol (&Qshort_name, "short-name");
2349 defsymbol (&Qlong_name, "long-name");
2351 defsymbol (&Ql2r, "l2r");
2352 defsymbol (&Qr2l, "r2l");
2354 /* Charsets, compatible with FSF 20.3
2355 Naming convention is Script-Charset[-Edition] */
2356 defsymbol (&Qascii, "ascii");
2357 defsymbol (&Qcontrol_1, "control-1");
2358 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2359 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2360 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2361 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2362 defsymbol (&Qthai_tis620, "thai-tis620");
2363 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2364 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2365 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2366 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2367 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2368 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2369 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2370 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2371 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2372 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2373 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2374 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2375 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2376 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2377 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2378 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2380 defsymbol (&Qucs, "ucs");
2381 defsymbol (&Qucs_bmp, "ucs-bmp");
2382 defsymbol (&Qucs_cns, "ucs-cns");
2383 defsymbol (&Qucs_jis, "ucs-jis");
2384 defsymbol (&Qucs_big5, "ucs-big5");
2385 defsymbol (&Qlatin_viscii, "latin-viscii");
2386 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2387 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2388 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2389 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2390 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2391 defsymbol (&Qideograph_gt, "ideograph-gt");
2392 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
2393 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
2394 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
2395 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
2396 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
2397 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
2398 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
2399 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
2400 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
2401 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
2402 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
2403 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
2404 defsymbol (&Qchinese_big5, "chinese-big5");
2405 defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp");
2406 defsymbol (&Qideograph_hanziku_1, "ideograph-hanziku-1");
2407 defsymbol (&Qideograph_hanziku_2, "ideograph-hanziku-2");
2408 defsymbol (&Qideograph_hanziku_3, "ideograph-hanziku-3");
2409 defsymbol (&Qideograph_hanziku_4, "ideograph-hanziku-4");
2410 defsymbol (&Qideograph_hanziku_5, "ideograph-hanziku-5");
2411 defsymbol (&Qideograph_hanziku_6, "ideograph-hanziku-6");
2412 defsymbol (&Qideograph_hanziku_7, "ideograph-hanziku-7");
2413 defsymbol (&Qideograph_hanziku_8, "ideograph-hanziku-8");
2414 defsymbol (&Qideograph_hanziku_9, "ideograph-hanziku-9");
2415 defsymbol (&Qideograph_hanziku_10, "ideograph-hanziku-10");
2416 defsymbol (&Qideograph_hanziku_11, "ideograph-hanziku-11");
2417 defsymbol (&Qideograph_hanziku_12, "ideograph-hanziku-12");
2418 defsymbol (&Qchina3_jef, "china3-jef");
2419 defsymbol (&Qideograph_cbeta, "ideograph-cbeta");
2420 defsymbol (&Qmojikyo, "mojikyo");
2421 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
2422 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
2423 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
2424 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
2425 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
2426 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
2427 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
2428 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
2429 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
2430 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
2431 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
2432 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
2433 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
2434 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
2435 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
2436 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
2437 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
2438 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
2439 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
2440 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
2441 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
2442 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
2443 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2445 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2446 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2448 defsymbol (&Qcomposite, "composite");
2452 vars_of_mule_charset (void)
2459 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2460 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2462 /* Table of charsets indexed by leading byte. */
2463 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2464 chlook->charset_by_leading_byte[i] = Qnil;
2467 /* Table of charsets indexed by type/final-byte. */
2468 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2469 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2470 chlook->charset_by_attributes[i][j] = Qnil;
2472 /* Table of charsets indexed by type/final-byte/direction. */
2473 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2474 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2475 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2476 chlook->charset_by_attributes[i][j][k] = Qnil;
2480 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2482 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2483 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2487 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2488 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2489 Leading-code of private TYPE9N charset of column-width 1.
2491 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2495 Vdefault_coded_charset_priority_list = Qnil;
2496 DEFVAR_LISP ("default-coded-charset-priority-list",
2497 &Vdefault_coded_charset_priority_list /*
2498 Default order of preferred coded-character-sets.
2504 complex_vars_of_mule_charset (void)
2506 staticpro (&Vcharset_hash_table);
2507 Vcharset_hash_table =
2508 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2510 /* Predefined character sets. We store them into variables for
2514 staticpro (&Vcharset_ucs);
2516 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2517 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2518 build_string ("UCS"),
2519 build_string ("UCS"),
2520 build_string ("ISO/IEC 10646"),
2522 Qnil, 0, 0xFFFFFFF, 0, 0);
2523 staticpro (&Vcharset_ucs_bmp);
2525 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2526 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2527 build_string ("BMP"),
2528 build_string ("BMP"),
2529 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2530 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2531 Qnil, 0, 0xFFFF, 0, 0);
2532 staticpro (&Vcharset_ucs_cns);
2534 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2535 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2536 build_string ("UCS for CNS"),
2537 build_string ("UCS for CNS 11643"),
2538 build_string ("ISO/IEC 10646 for CNS 11643"),
2541 staticpro (&Vcharset_ucs_jis);
2543 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2544 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2545 build_string ("UCS for JIS"),
2546 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2547 build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2550 staticpro (&Vcharset_ucs_big5);
2552 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
2553 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2554 build_string ("UCS for Big5"),
2555 build_string ("UCS for Big5"),
2556 build_string ("ISO/IEC 10646 for Big5"),
2560 # define MIN_CHAR_THAI 0
2561 # define MAX_CHAR_THAI 0
2562 /* # define MIN_CHAR_HEBREW 0 */
2563 /* # define MAX_CHAR_HEBREW 0 */
2564 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2565 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2567 staticpro (&Vcharset_ascii);
2569 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2570 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2571 build_string ("ASCII"),
2572 build_string ("ASCII)"),
2573 build_string ("ASCII (ISO646 IRV)"),
2574 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2575 Qnil, 0, 0x7F, 0, 0);
2576 staticpro (&Vcharset_control_1);
2577 Vcharset_control_1 =
2578 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2579 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2580 build_string ("C1"),
2581 build_string ("Control characters"),
2582 build_string ("Control characters 128-191"),
2584 Qnil, 0x80, 0x9F, 0, 0);
2585 staticpro (&Vcharset_latin_iso8859_1);
2586 Vcharset_latin_iso8859_1 =
2587 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2588 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2589 build_string ("Latin-1"),
2590 build_string ("ISO8859-1 (Latin-1)"),
2591 build_string ("ISO8859-1 (Latin-1)"),
2592 build_string ("iso8859-1"),
2593 Qnil, 0xA0, 0xFF, 0, 32);
2594 staticpro (&Vcharset_latin_iso8859_2);
2595 Vcharset_latin_iso8859_2 =
2596 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2597 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2598 build_string ("Latin-2"),
2599 build_string ("ISO8859-2 (Latin-2)"),
2600 build_string ("ISO8859-2 (Latin-2)"),
2601 build_string ("iso8859-2"),
2603 staticpro (&Vcharset_latin_iso8859_3);
2604 Vcharset_latin_iso8859_3 =
2605 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2606 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2607 build_string ("Latin-3"),
2608 build_string ("ISO8859-3 (Latin-3)"),
2609 build_string ("ISO8859-3 (Latin-3)"),
2610 build_string ("iso8859-3"),
2612 staticpro (&Vcharset_latin_iso8859_4);
2613 Vcharset_latin_iso8859_4 =
2614 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2615 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2616 build_string ("Latin-4"),
2617 build_string ("ISO8859-4 (Latin-4)"),
2618 build_string ("ISO8859-4 (Latin-4)"),
2619 build_string ("iso8859-4"),
2621 staticpro (&Vcharset_thai_tis620);
2622 Vcharset_thai_tis620 =
2623 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2624 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2625 build_string ("TIS620"),
2626 build_string ("TIS620 (Thai)"),
2627 build_string ("TIS620.2529 (Thai)"),
2628 build_string ("tis620"),
2629 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2630 staticpro (&Vcharset_greek_iso8859_7);
2631 Vcharset_greek_iso8859_7 =
2632 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2633 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2634 build_string ("ISO8859-7"),
2635 build_string ("ISO8859-7 (Greek)"),
2636 build_string ("ISO8859-7 (Greek)"),
2637 build_string ("iso8859-7"),
2639 staticpro (&Vcharset_arabic_iso8859_6);
2640 Vcharset_arabic_iso8859_6 =
2641 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2642 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2643 build_string ("ISO8859-6"),
2644 build_string ("ISO8859-6 (Arabic)"),
2645 build_string ("ISO8859-6 (Arabic)"),
2646 build_string ("iso8859-6"),
2648 staticpro (&Vcharset_hebrew_iso8859_8);
2649 Vcharset_hebrew_iso8859_8 =
2650 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2651 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2652 build_string ("ISO8859-8"),
2653 build_string ("ISO8859-8 (Hebrew)"),
2654 build_string ("ISO8859-8 (Hebrew)"),
2655 build_string ("iso8859-8"),
2657 0 /* MIN_CHAR_HEBREW */,
2658 0 /* MAX_CHAR_HEBREW */, 0, 32);
2659 staticpro (&Vcharset_katakana_jisx0201);
2660 Vcharset_katakana_jisx0201 =
2661 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2662 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2663 build_string ("JISX0201 Kana"),
2664 build_string ("JISX0201.1976 (Japanese Kana)"),
2665 build_string ("JISX0201.1976 Japanese Kana"),
2666 build_string ("jisx0201\\.1976"),
2668 staticpro (&Vcharset_latin_jisx0201);
2669 Vcharset_latin_jisx0201 =
2670 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2671 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2672 build_string ("JISX0201 Roman"),
2673 build_string ("JISX0201.1976 (Japanese Roman)"),
2674 build_string ("JISX0201.1976 Japanese Roman"),
2675 build_string ("jisx0201\\.1976"),
2677 staticpro (&Vcharset_cyrillic_iso8859_5);
2678 Vcharset_cyrillic_iso8859_5 =
2679 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2680 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2681 build_string ("ISO8859-5"),
2682 build_string ("ISO8859-5 (Cyrillic)"),
2683 build_string ("ISO8859-5 (Cyrillic)"),
2684 build_string ("iso8859-5"),
2686 staticpro (&Vcharset_latin_iso8859_9);
2687 Vcharset_latin_iso8859_9 =
2688 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2689 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2690 build_string ("Latin-5"),
2691 build_string ("ISO8859-9 (Latin-5)"),
2692 build_string ("ISO8859-9 (Latin-5)"),
2693 build_string ("iso8859-9"),
2695 staticpro (&Vcharset_japanese_jisx0208_1978);
2696 Vcharset_japanese_jisx0208_1978 =
2697 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2698 Qjapanese_jisx0208_1978, 94, 2,
2699 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2700 build_string ("JIS X0208:1978"),
2701 build_string ("JIS X0208:1978 (Japanese)"),
2703 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2704 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2706 staticpro (&Vcharset_chinese_gb2312);
2707 Vcharset_chinese_gb2312 =
2708 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2709 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2710 build_string ("GB2312"),
2711 build_string ("GB2312)"),
2712 build_string ("GB2312 Chinese simplified"),
2713 build_string ("gb2312"),
2715 staticpro (&Vcharset_chinese_gb12345);
2716 Vcharset_chinese_gb12345 =
2717 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2718 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2719 build_string ("G1"),
2720 build_string ("GB 12345)"),
2721 build_string ("GB 12345-1990"),
2722 build_string ("GB12345\\(\\.1990\\)?-0"),
2724 staticpro (&Vcharset_japanese_jisx0208);
2725 Vcharset_japanese_jisx0208 =
2726 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2727 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2728 build_string ("JISX0208"),
2729 build_string ("JIS X0208:1983 (Japanese)"),
2730 build_string ("JIS X0208:1983 Japanese Kanji"),
2731 build_string ("jisx0208\\.1983"),
2734 staticpro (&Vcharset_japanese_jisx0208_1990);
2735 Vcharset_japanese_jisx0208_1990 =
2736 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2737 Qjapanese_jisx0208_1990, 94, 2,
2738 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2739 build_string ("JISX0208-1990"),
2740 build_string ("JIS X0208:1990 (Japanese)"),
2741 build_string ("JIS X0208:1990 Japanese Kanji"),
2742 build_string ("jisx0208\\.1990"),
2744 MIN_CHAR_JIS_X0208_1990,
2745 MAX_CHAR_JIS_X0208_1990, 0, 33);
2747 staticpro (&Vcharset_korean_ksc5601);
2748 Vcharset_korean_ksc5601 =
2749 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2750 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2751 build_string ("KSC5601"),
2752 build_string ("KSC5601 (Korean"),
2753 build_string ("KSC5601 Korean Hangul and Hanja"),
2754 build_string ("ksc5601"),
2756 staticpro (&Vcharset_japanese_jisx0212);
2757 Vcharset_japanese_jisx0212 =
2758 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2759 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2760 build_string ("JISX0212"),
2761 build_string ("JISX0212 (Japanese)"),
2762 build_string ("JISX0212 Japanese Supplement"),
2763 build_string ("jisx0212"),
2766 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2767 staticpro (&Vcharset_chinese_cns11643_1);
2768 Vcharset_chinese_cns11643_1 =
2769 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2770 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2771 build_string ("CNS11643-1"),
2772 build_string ("CNS11643-1 (Chinese traditional)"),
2774 ("CNS 11643 Plane 1 Chinese traditional"),
2775 build_string (CHINESE_CNS_PLANE_RE("1")),
2777 staticpro (&Vcharset_chinese_cns11643_2);
2778 Vcharset_chinese_cns11643_2 =
2779 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2780 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2781 build_string ("CNS11643-2"),
2782 build_string ("CNS11643-2 (Chinese traditional)"),
2784 ("CNS 11643 Plane 2 Chinese traditional"),
2785 build_string (CHINESE_CNS_PLANE_RE("2")),
2788 staticpro (&Vcharset_latin_tcvn5712);
2789 Vcharset_latin_tcvn5712 =
2790 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2791 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2792 build_string ("TCVN 5712"),
2793 build_string ("TCVN 5712 (VSCII-2)"),
2794 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2795 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2797 staticpro (&Vcharset_latin_viscii_lower);
2798 Vcharset_latin_viscii_lower =
2799 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2800 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2801 build_string ("VISCII lower"),
2802 build_string ("VISCII lower (Vietnamese)"),
2803 build_string ("VISCII lower (Vietnamese)"),
2804 build_string ("MULEVISCII-LOWER"),
2806 staticpro (&Vcharset_latin_viscii_upper);
2807 Vcharset_latin_viscii_upper =
2808 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2809 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2810 build_string ("VISCII upper"),
2811 build_string ("VISCII upper (Vietnamese)"),
2812 build_string ("VISCII upper (Vietnamese)"),
2813 build_string ("MULEVISCII-UPPER"),
2815 staticpro (&Vcharset_latin_viscii);
2816 Vcharset_latin_viscii =
2817 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2818 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2819 build_string ("VISCII"),
2820 build_string ("VISCII 1.1 (Vietnamese)"),
2821 build_string ("VISCII 1.1 (Vietnamese)"),
2822 build_string ("VISCII1\\.1"),
2824 staticpro (&Vcharset_chinese_big5);
2825 Vcharset_chinese_big5 =
2826 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2827 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2828 build_string ("Big5"),
2829 build_string ("Big5"),
2830 build_string ("Big5 Chinese traditional"),
2831 build_string ("big5"),
2833 /* 0 */ MIN_CHAR_BIG5_CDP,
2834 /* 0 */ MAX_CHAR_BIG5_CDP, 0, 0);
2835 staticpro (&Vcharset_chinese_big5_cdp);
2836 Vcharset_chinese_big5_cdp =
2837 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
2838 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2839 build_string ("Big5-CDP"),
2840 build_string ("Big5 + CDP extension"),
2841 build_string ("Big5 with CDP extension"),
2842 build_string ("big5\\.cdp-0"),
2843 Qnil, MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP, 0, 0);
2844 #define DEF_HANZIKU(n) \
2845 staticpro (&Vcharset_ideograph_hanziku_##n); \
2846 Vcharset_ideograph_hanziku_##n = \
2847 make_charset (LEADING_BYTE_HANZIKU_##n, Qideograph_hanziku_##n, 256, 2, \
2848 2, 2, 0, CHARSET_LEFT_TO_RIGHT, \
2849 build_string ("HZK-"#n), \
2850 build_string ("HANZIKU-"#n), \
2851 build_string ("HANZIKU (pseudo BIG5 encoding) part "#n), \
2853 ("hanziku-"#n"$"), \
2854 Qnil, MIN_CHAR_HANZIKU_##n, MAX_CHAR_HANZIKU_##n, 0, 0);
2867 staticpro (&Vcharset_china3_jef);
2868 Vcharset_china3_jef =
2869 make_charset (LEADING_BYTE_CHINA3_JEF, Qchina3_jef, 256, 2,
2870 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2871 build_string ("JC3"),
2872 build_string ("JEF + CHINA3"),
2873 build_string ("JEF + CHINA3 private characters"),
2874 build_string ("china3jef-0"),
2875 Qnil, MIN_CHAR_CHINA3_JEF, MAX_CHAR_CHINA3_JEF, 0, 0);
2876 staticpro (&Vcharset_ideograph_cbeta);
2877 Vcharset_ideograph_cbeta =
2878 make_charset (LEADING_BYTE_CBETA, Qideograph_cbeta, 256, 2,
2879 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2880 build_string ("CB"),
2881 build_string ("CBETA"),
2882 build_string ("CBETA private characters"),
2883 build_string ("cbeta-0"),
2884 Qnil, MIN_CHAR_CBETA, MAX_CHAR_CBETA, 0, 0);
2885 staticpro (&Vcharset_ideograph_gt);
2886 Vcharset_ideograph_gt =
2887 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
2888 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2889 build_string ("GT"),
2890 build_string ("GT"),
2891 build_string ("GT"),
2893 Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0);
2894 #define DEF_GT_PJ(n) \
2895 staticpro (&Vcharset_ideograph_gt_pj_##n); \
2896 Vcharset_ideograph_gt_pj_##n = \
2897 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
2898 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2899 build_string ("GT-PJ-"#n), \
2900 build_string ("GT (pseudo JIS encoding) part "#n), \
2901 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
2903 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
2917 staticpro (&Vcharset_ideograph_daikanwa);
2918 Vcharset_ideograph_daikanwa =
2919 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
2920 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2921 build_string ("Daikanwa"),
2922 build_string ("Morohashi's Daikanwa"),
2923 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
2924 build_string ("Daikanwa"),
2925 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
2926 staticpro (&Vcharset_mojikyo);
2928 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
2929 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2930 build_string ("Mojikyo"),
2931 build_string ("Mojikyo"),
2932 build_string ("Konjaku-Mojikyo"),
2934 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
2935 staticpro (&Vcharset_mojikyo_2022_1);
2936 Vcharset_mojikyo_2022_1 =
2937 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
2938 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
2939 build_string ("Mojikyo-2022-1"),
2940 build_string ("Mojikyo ISO-2022 Part 1"),
2941 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
2945 #define DEF_MOJIKYO_PJ(n) \
2946 staticpro (&Vcharset_mojikyo_pj_##n); \
2947 Vcharset_mojikyo_pj_##n = \
2948 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
2949 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
2950 build_string ("Mojikyo-PJ-"#n), \
2951 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
2953 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
2955 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
2967 DEF_MOJIKYO_PJ (10);
2968 DEF_MOJIKYO_PJ (11);
2969 DEF_MOJIKYO_PJ (12);
2970 DEF_MOJIKYO_PJ (13);
2971 DEF_MOJIKYO_PJ (14);
2972 DEF_MOJIKYO_PJ (15);
2973 DEF_MOJIKYO_PJ (16);
2974 DEF_MOJIKYO_PJ (17);
2975 DEF_MOJIKYO_PJ (18);
2976 DEF_MOJIKYO_PJ (19);
2977 DEF_MOJIKYO_PJ (20);
2978 DEF_MOJIKYO_PJ (21);
2980 staticpro (&Vcharset_ethiopic_ucs);
2981 Vcharset_ethiopic_ucs =
2982 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
2983 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2984 build_string ("Ethiopic (UCS)"),
2985 build_string ("Ethiopic (UCS)"),
2986 build_string ("Ethiopic of UCS"),
2987 build_string ("Ethiopic-Unicode"),
2988 Qnil, 0x1200, 0x137F, 0x1200, 0);
2990 staticpro (&Vcharset_chinese_big5_1);
2991 Vcharset_chinese_big5_1 =
2992 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
2993 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
2994 build_string ("Big5"),
2995 build_string ("Big5 (Level-1)"),
2997 ("Big5 Level-1 Chinese traditional"),
2998 build_string ("big5"),
3000 staticpro (&Vcharset_chinese_big5_2);
3001 Vcharset_chinese_big5_2 =
3002 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
3003 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
3004 build_string ("Big5"),
3005 build_string ("Big5 (Level-2)"),
3007 ("Big5 Level-2 Chinese traditional"),
3008 build_string ("big5"),
3011 #ifdef ENABLE_COMPOSITE_CHARS
3012 /* #### For simplicity, we put composite chars into a 96x96 charset.
3013 This is going to lead to problems because you can run out of
3014 room, esp. as we don't yet recycle numbers. */
3015 staticpro (&Vcharset_composite);
3016 Vcharset_composite =
3017 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
3018 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3019 build_string ("Composite"),
3020 build_string ("Composite characters"),
3021 build_string ("Composite characters"),
3024 /* #### not dumped properly */
3025 composite_char_row_next = 32;
3026 composite_char_col_next = 32;
3028 Vcomposite_char_string2char_hash_table =
3029 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
3030 Vcomposite_char_char2string_hash_table =
3031 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3032 staticpro (&Vcomposite_char_string2char_hash_table);
3033 staticpro (&Vcomposite_char_char2string_hash_table);
3034 #endif /* ENABLE_COMPOSITE_CHARS */