1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_gb;
72 Lisp_Object Vcharset_ucs_cns;
73 Lisp_Object Vcharset_latin_viscii;
74 Lisp_Object Vcharset_latin_tcvn5712;
75 Lisp_Object Vcharset_latin_viscii_lower;
76 Lisp_Object Vcharset_latin_viscii_upper;
77 Lisp_Object Vcharset_jis_x0208;
78 Lisp_Object Vcharset_chinese_big5;
79 Lisp_Object Vcharset_ethiopic_ucs;
81 Lisp_Object Vcharset_chinese_big5_1;
82 Lisp_Object Vcharset_chinese_big5_2;
84 #ifdef ENABLE_COMPOSITE_CHARS
85 Lisp_Object Vcharset_composite;
87 /* Hash tables for composite chars. One maps string representing
88 composed chars to their equivalent chars; one goes the
90 Lisp_Object Vcomposite_char_char2string_hash_table;
91 Lisp_Object Vcomposite_char_string2char_hash_table;
93 static int composite_char_row_next;
94 static int composite_char_col_next;
96 #endif /* ENABLE_COMPOSITE_CHARS */
98 struct charset_lookup *chlook;
100 static const struct lrecord_description charset_lookup_description_1[] = {
101 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
103 NUM_LEADING_BYTES+4*128
110 static const struct struct_description charset_lookup_description = {
111 sizeof (struct charset_lookup),
112 charset_lookup_description_1
116 /* Table of number of bytes in the string representation of a character
117 indexed by the first byte of that representation.
119 rep_bytes_by_first_byte(c) is more efficient than the equivalent
120 canonical computation:
122 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
124 const Bytecount rep_bytes_by_first_byte[0xA0] =
125 { /* 0x00 - 0x7f are for straight ASCII */
126 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
127 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 /* 0x80 - 0x8f are for Dimension-1 official charsets */
136 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
138 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
140 /* 0x90 - 0x9d are for Dimension-2 official charsets */
141 /* 0x9e is for Dimension-1 private charsets */
142 /* 0x9f is for Dimension-2 private charsets */
143 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
149 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
151 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
155 if (XVECTOR_LENGTH (v) > ccs_len)
158 for (i = 0; i < XVECTOR_LENGTH (v); i++)
160 Lisp_Object c = XVECTOR_DATA(v)[i];
162 if (!NILP (c) && !CHARP (c))
166 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
178 put_char_ccs_code_point (Lisp_Object character,
179 Lisp_Object ccs, Lisp_Object value)
181 if (!EQ (XCHARSET_NAME (ccs), Qucs)
183 || (XCHAR (character) != XINT (value)))
185 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
189 { /* obsolete representation: value must be a list of bytes */
190 Lisp_Object ret = Fcar (value);
194 signal_simple_error ("Invalid value for coded-charset", value);
195 code_point = XINT (ret);
196 if (XCHARSET_GRAPHIC (ccs) == 1)
204 signal_simple_error ("Invalid value for coded-charset",
208 signal_simple_error ("Invalid value for coded-charset",
211 if (XCHARSET_GRAPHIC (ccs) == 1)
213 code_point = (code_point << 8) | j;
216 value = make_int (code_point);
218 else if (INTP (value))
220 code_point = XINT (value);
221 if (XCHARSET_GRAPHIC (ccs) == 1)
223 code_point &= 0x7F7F7F7F;
224 value = make_int (code_point);
228 signal_simple_error ("Invalid value for coded-charset", value);
232 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
235 decoding_table_remove_char (ccs, XINT (cpos));
238 decoding_table_put_char (ccs, code_point, character);
244 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
246 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
247 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
249 if (VECTORP (decoding_table))
251 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
255 decoding_table_remove_char (ccs, XINT (cpos));
258 if (CHAR_TABLEP (encoding_table))
260 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
268 int leading_code_private_11;
271 Lisp_Object Qcharsetp;
273 /* Qdoc_string, Qdimension, Qchars defined in general.c */
274 Lisp_Object Qregistry, Qfinal, Qgraphic;
275 Lisp_Object Qdirection;
276 Lisp_Object Qreverse_direction_charset;
277 Lisp_Object Qleading_byte;
278 Lisp_Object Qshort_name, Qlong_name;
280 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
281 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
298 Qjapanese_jisx0208_1978,
302 Qjapanese_jisx0208_1990,
318 Qvietnamese_viscii_lower,
319 Qvietnamese_viscii_upper,
328 Lisp_Object Ql2r, Qr2l;
330 Lisp_Object Vcharset_hash_table;
332 /* Composite characters are characters constructed by overstriking two
333 or more regular characters.
335 1) The old Mule implementation involves storing composite characters
336 in a buffer as a tag followed by all of the actual characters
337 used to make up the composite character. I think this is a bad
338 idea; it greatly complicates code that wants to handle strings
339 one character at a time because it has to deal with the possibility
340 of great big ungainly characters. It's much more reasonable to
341 simply store an index into a table of composite characters.
343 2) The current implementation only allows for 16,384 separate
344 composite characters over the lifetime of the XEmacs process.
345 This could become a potential problem if the user
346 edited lots of different files that use composite characters.
347 Due to FSF bogosity, increasing the number of allowable
348 composite characters under Mule would decrease the number
349 of possible faces that can exist. Mule already has shrunk
350 this to 2048, and further shrinkage would become uncomfortable.
351 No such problems exist in XEmacs.
353 Composite characters could be represented as 0x80 C1 C2 C3,
354 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
355 for slightly under 2^20 (one million) composite characters
356 over the XEmacs process lifetime, and you only need to
357 increase the size of a Mule character from 19 to 21 bits.
358 Or you could use 0x80 C1 C2 C3 C4, allowing for about
359 85 million (slightly over 2^26) composite characters. */
362 /************************************************************************/
363 /* Basic Emchar functions */
364 /************************************************************************/
366 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
367 string in STR. Returns the number of bytes stored.
368 Do not call this directly. Use the macro set_charptr_emchar() instead.
372 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
387 else if ( c <= 0x7ff )
389 *p++ = (c >> 6) | 0xc0;
390 *p++ = (c & 0x3f) | 0x80;
392 else if ( c <= 0xffff )
394 *p++ = (c >> 12) | 0xe0;
395 *p++ = ((c >> 6) & 0x3f) | 0x80;
396 *p++ = (c & 0x3f) | 0x80;
398 else if ( c <= 0x1fffff )
400 *p++ = (c >> 18) | 0xf0;
401 *p++ = ((c >> 12) & 0x3f) | 0x80;
402 *p++ = ((c >> 6) & 0x3f) | 0x80;
403 *p++ = (c & 0x3f) | 0x80;
405 else if ( c <= 0x3ffffff )
407 *p++ = (c >> 24) | 0xf8;
408 *p++ = ((c >> 18) & 0x3f) | 0x80;
409 *p++ = ((c >> 12) & 0x3f) | 0x80;
410 *p++ = ((c >> 6) & 0x3f) | 0x80;
411 *p++ = (c & 0x3f) | 0x80;
415 *p++ = (c >> 30) | 0xfc;
416 *p++ = ((c >> 24) & 0x3f) | 0x80;
417 *p++ = ((c >> 18) & 0x3f) | 0x80;
418 *p++ = ((c >> 12) & 0x3f) | 0x80;
419 *p++ = ((c >> 6) & 0x3f) | 0x80;
420 *p++ = (c & 0x3f) | 0x80;
423 BREAKUP_CHAR (c, charset, c1, c2);
424 lb = CHAR_LEADING_BYTE (c);
425 if (LEADING_BYTE_PRIVATE_P (lb))
426 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
428 if (EQ (charset, Vcharset_control_1))
437 /* Return the first character from a Mule-encoded string in STR,
438 assuming it's non-ASCII. Do not call this directly.
439 Use the macro charptr_emchar() instead. */
442 non_ascii_charptr_emchar (const Bufbyte *str)
455 else if ( b >= 0xf8 )
460 else if ( b >= 0xf0 )
465 else if ( b >= 0xe0 )
470 else if ( b >= 0xc0 )
480 for( ; len > 0; len-- )
483 ch = ( ch << 6 ) | ( b & 0x3f );
487 Bufbyte i0 = *str, i1, i2 = 0;
490 if (i0 == LEADING_BYTE_CONTROL_1)
491 return (Emchar) (*++str - 0x20);
493 if (LEADING_BYTE_PREFIX_P (i0))
498 charset = CHARSET_BY_LEADING_BYTE (i0);
499 if (XCHARSET_DIMENSION (charset) == 2)
502 return MAKE_CHAR (charset, i1, i2);
506 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
507 Do not call this directly. Use the macro valid_char_p() instead. */
511 non_ascii_valid_char_p (Emchar ch)
515 /* Must have only lowest 19 bits set */
519 f1 = CHAR_FIELD1 (ch);
520 f2 = CHAR_FIELD2 (ch);
521 f3 = CHAR_FIELD3 (ch);
527 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
528 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
529 f2 > MAX_CHAR_FIELD2_PRIVATE)
534 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
535 f2 <= MAX_CHAR_FIELD2_PRIVATE))
539 NOTE: This takes advantage of the fact that
540 FIELD2_TO_OFFICIAL_LEADING_BYTE and
541 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
543 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
544 if (EQ (charset, Qnil))
546 return (XCHARSET_CHARS (charset) == 96);
552 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
553 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
554 f1 > MAX_CHAR_FIELD1_PRIVATE)
556 if (f2 < 0x20 || f3 < 0x20)
559 #ifdef ENABLE_COMPOSITE_CHARS
560 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
562 if (UNBOUNDP (Fgethash (make_int (ch),
563 Vcomposite_char_char2string_hash_table,
568 #endif /* ENABLE_COMPOSITE_CHARS */
570 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
571 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
574 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
576 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
579 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
581 if (EQ (charset, Qnil))
583 return (XCHARSET_CHARS (charset) == 96);
589 /************************************************************************/
590 /* Basic string functions */
591 /************************************************************************/
593 /* Copy the character pointed to by SRC into DST. Do not call this
594 directly. Use the macro charptr_copy_char() instead.
595 Return the number of bytes copied. */
598 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
600 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
602 for (i = bytes; i; i--, dst++, src++)
608 /************************************************************************/
609 /* streams of Emchars */
610 /************************************************************************/
612 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
613 The functions below are not meant to be called directly; use
614 the macros in insdel.h. */
617 Lstream_get_emchar_1 (Lstream *stream, int ch)
619 Bufbyte str[MAX_EMCHAR_LEN];
620 Bufbyte *strptr = str;
623 str[0] = (Bufbyte) ch;
625 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
627 int c = Lstream_getc (stream);
628 bufpos_checking_assert (c >= 0);
629 *++strptr = (Bufbyte) c;
631 return charptr_emchar (str);
635 Lstream_fput_emchar (Lstream *stream, Emchar ch)
637 Bufbyte str[MAX_EMCHAR_LEN];
638 Bytecount len = set_charptr_emchar (str, ch);
639 return Lstream_write (stream, str, len);
643 Lstream_funget_emchar (Lstream *stream, Emchar ch)
645 Bufbyte str[MAX_EMCHAR_LEN];
646 Bytecount len = set_charptr_emchar (str, ch);
647 Lstream_unread (stream, str, len);
651 /************************************************************************/
653 /************************************************************************/
656 mark_charset (Lisp_Object obj)
658 Lisp_Charset *cs = XCHARSET (obj);
660 mark_object (cs->short_name);
661 mark_object (cs->long_name);
662 mark_object (cs->doc_string);
663 mark_object (cs->registry);
664 mark_object (cs->ccl_program);
666 mark_object (cs->decoding_table);
667 mark_object (cs->mother);
673 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
675 Lisp_Charset *cs = XCHARSET (obj);
679 error ("printing unreadable object #<charset %s 0x%x>",
680 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
683 write_c_string ("#<charset ", printcharfun);
684 print_internal (CHARSET_NAME (cs), printcharfun, 0);
685 write_c_string (" ", printcharfun);
686 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
687 write_c_string (" ", printcharfun);
688 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
689 write_c_string (" ", printcharfun);
690 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
691 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
693 CHARSET_DIMENSION (cs),
694 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
695 CHARSET_COLUMNS (cs),
696 CHARSET_GRAPHIC (cs),
698 write_c_string (buf, printcharfun);
699 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
700 sprintf (buf, " 0x%x>", cs->header.uid);
701 write_c_string (buf, printcharfun);
704 static const struct lrecord_description charset_description[] = {
705 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
706 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
707 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
708 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
709 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
710 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
711 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
713 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
714 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
719 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
720 mark_charset, print_charset, 0, 0, 0,
724 /* Make a new charset. */
725 /* #### SJT Should generic properties be allowed? */
727 make_charset (Charset_ID id, Lisp_Object name,
728 unsigned short chars, unsigned char dimension,
729 unsigned char columns, unsigned char graphic,
730 Bufbyte final, unsigned char direction, Lisp_Object short_name,
731 Lisp_Object long_name, Lisp_Object doc,
733 Lisp_Object decoding_table,
734 Emchar min_code, Emchar max_code,
735 Emchar code_offset, unsigned char byte_offset,
736 Lisp_Object mother, unsigned char conversion)
739 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
743 XSETCHARSET (obj, cs);
745 CHARSET_ID (cs) = id;
746 CHARSET_NAME (cs) = name;
747 CHARSET_SHORT_NAME (cs) = short_name;
748 CHARSET_LONG_NAME (cs) = long_name;
749 CHARSET_CHARS (cs) = chars;
750 CHARSET_DIMENSION (cs) = dimension;
751 CHARSET_DIRECTION (cs) = direction;
752 CHARSET_COLUMNS (cs) = columns;
753 CHARSET_GRAPHIC (cs) = graphic;
754 CHARSET_FINAL (cs) = final;
755 CHARSET_DOC_STRING (cs) = doc;
756 CHARSET_REGISTRY (cs) = reg;
757 CHARSET_CCL_PROGRAM (cs) = Qnil;
758 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
760 CHARSET_DECODING_TABLE(cs) = Qunbound;
761 CHARSET_MIN_CODE (cs) = min_code;
762 CHARSET_MAX_CODE (cs) = max_code;
763 CHARSET_CODE_OFFSET (cs) = code_offset;
764 CHARSET_BYTE_OFFSET (cs) = byte_offset;
765 CHARSET_MOTHER (cs) = mother;
766 CHARSET_CONVERSION (cs) = conversion;
770 if (id == LEADING_BYTE_ASCII)
771 CHARSET_REP_BYTES (cs) = 1;
773 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
775 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
780 /* some charsets do not have final characters. This includes
781 ASCII, Control-1, Composite, and the two faux private
783 unsigned char iso2022_type
784 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
786 if (code_offset == 0)
788 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
789 chlook->charset_by_attributes[iso2022_type][final] = obj;
793 (chlook->charset_by_attributes[iso2022_type][final][direction]));
794 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
798 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
799 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
801 /* Some charsets are "faux" and don't have names or really exist at
802 all except in the leading-byte table. */
804 Fputhash (name, obj, Vcharset_hash_table);
809 get_unallocated_leading_byte (int dimension)
814 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
817 lb = chlook->next_allocated_leading_byte++;
821 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
824 lb = chlook->next_allocated_1_byte_leading_byte++;
828 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
831 lb = chlook->next_allocated_2_byte_leading_byte++;
837 ("No more character sets free for this dimension",
838 make_int (dimension));
844 /* Number of Big5 characters which have the same code in 1st byte. */
846 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
849 decode_defined_char (Lisp_Object ccs, int code_point)
851 int dim = XCHARSET_DIMENSION (ccs);
852 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
860 = get_ccs_octet_table (decoding_table, ccs,
861 (code_point >> (dim * 8)) & 255);
863 if (CHARP (decoding_table))
864 return XCHAR (decoding_table);
867 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
869 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
871 if ( EQ (mother, Vcharset_ucs) )
872 return DECODE_CHAR (mother, code_point);
874 return decode_defined_char (mother, code_point);
881 decode_builtin_char (Lisp_Object charset, int code_point)
883 Lisp_Object mother = XCHARSET_MOTHER (charset);
886 if ( XCHARSET_MAX_CODE (charset) > 0 )
888 if ( CHARSETP (mother) )
890 int code = code_point;
892 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
894 int row = code_point >> 8;
895 int cell = code_point & 255;
899 else if (row < 16 + 32 + 30)
900 code = (row - (16 + 32)) * 94 + cell - 33;
901 else if (row < 18 + 32 + 30)
903 else if (row < 18 + 32 + 60)
904 code = (row - (18 + 32)) * 94 + cell - 33;
906 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
908 int plane = code_point >> 16;
909 int row = (code_point >> 8) & 255;
910 int cell = code_point & 255;
914 else if (row < 16 + 32 + 30)
916 = (plane - 33) * 94 * 60
917 + (row - (16 + 32)) * 94
919 else if (row < 18 + 32 + 30)
921 else if (row < 18 + 32 + 60)
923 = (plane - 33) * 94 * 60
924 + (row - (18 + 32)) * 94
928 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
933 = (XCHARSET_DIMENSION (charset) == 1
935 code_point - XCHARSET_BYTE_OFFSET (charset)
937 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
938 * XCHARSET_CHARS (charset)
939 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
940 + XCHARSET_CODE_OFFSET (charset);
941 if ((cid < XCHARSET_MIN_CODE (charset))
942 || (XCHARSET_MAX_CODE (charset) < cid))
947 else if ((final = XCHARSET_FINAL (charset)) >= '0')
949 if (XCHARSET_DIMENSION (charset) == 1)
951 switch (XCHARSET_CHARS (charset))
955 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
958 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
966 switch (XCHARSET_CHARS (charset))
969 return MIN_CHAR_94x94
970 + (final - '0') * 94 * 94
971 + (((code_point >> 8) & 0x7F) - 33) * 94
972 + ((code_point & 0x7F) - 33);
974 return MIN_CHAR_96x96
975 + (final - '0') * 96 * 96
976 + (((code_point >> 8) & 0x7F) - 32) * 96
977 + ((code_point & 0x7F) - 32);
989 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
991 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
994 if ( CHAR_TABLEP (encoding_table)
995 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1000 Lisp_Object mother = XCHARSET_MOTHER (charset);
1001 int min = XCHARSET_MIN_CODE (charset);
1002 int max = XCHARSET_MAX_CODE (charset);
1005 if ( CHARSETP (mother) )
1007 if (XCHARSET_FINAL (charset) >= '0')
1008 code = charset_code_point (mother, ch, 1);
1010 code = charset_code_point (mother, ch, defined_only);
1012 else if (defined_only)
1014 else if ( ((max == 0) && CHARSETP (mother)
1015 && (XCHARSET_FINAL (charset) == 0))
1016 || ((min <= ch) && (ch <= max)) )
1018 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1019 || ((min <= code) && (code <= max)) )
1021 int d = code - XCHARSET_CODE_OFFSET (charset);
1023 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1025 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1027 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1029 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1032 int cell = d % 94 + 33;
1038 return (row << 8) | cell;
1040 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1041 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1042 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1043 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1044 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1046 int plane = d / (94 * 60) + 33;
1047 int row = (d % (94 * 60)) / 94;
1048 int cell = d % 94 + 33;
1054 return (plane << 16) | (row << 8) | cell;
1056 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1058 ( (d / (94 * 94) + 33) << 16)
1059 | ((d / 94 % 94 + 33) << 8)
1061 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1063 ( (d / (96 * 96) + 32) << 16)
1064 | ((d / 96 % 96 + 32) << 8)
1066 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1068 ( (d / (94 * 94 * 94) + 33) << 24)
1069 | ((d / (94 * 94) % 94 + 33) << 16)
1070 | ((d / 94 % 94 + 33) << 8)
1072 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1074 ( (d / (96 * 96 * 96) + 32) << 24)
1075 | ((d / (96 * 96) % 96 + 32) << 16)
1076 | ((d / 96 % 96 + 32) << 8)
1080 printf ("Unknown CCS-conversion %d is specified!",
1081 XCHARSET_CONVERSION (charset));
1085 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1086 ( XCHARSET_MIN_CODE (charset) == 0 )
1088 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1089 (XCHARSET_CODE_OFFSET (charset)
1090 == XCHARSET_MIN_CODE (charset))
1095 if (XCHARSET_DIMENSION (charset) == 1)
1097 if (XCHARSET_CHARS (charset) == 94)
1099 if (((d = ch - (MIN_CHAR_94
1100 + (XCHARSET_FINAL (charset) - '0') * 94))
1105 else if (XCHARSET_CHARS (charset) == 96)
1107 if (((d = ch - (MIN_CHAR_96
1108 + (XCHARSET_FINAL (charset) - '0') * 96))
1116 else if (XCHARSET_DIMENSION (charset) == 2)
1118 if (XCHARSET_CHARS (charset) == 94)
1120 if (((d = ch - (MIN_CHAR_94x94
1122 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1125 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1127 else if (XCHARSET_CHARS (charset) == 96)
1129 if (((d = ch - (MIN_CHAR_96x96
1131 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1134 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1145 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1147 if (c <= MAX_CHAR_BASIC_LATIN)
1149 *charset = Vcharset_ascii;
1154 *charset = Vcharset_control_1;
1159 *charset = Vcharset_latin_iso8859_1;
1163 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1165 *charset = Vcharset_hebrew_iso8859_8;
1166 return c - MIN_CHAR_HEBREW + 0x20;
1169 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1171 *charset = Vcharset_thai_tis620;
1172 return c - MIN_CHAR_THAI + 0x20;
1175 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1176 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1178 return list2 (Vcharset_katakana_jisx0201,
1179 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1182 else if (c <= MAX_CHAR_BMP)
1184 *charset = Vcharset_ucs_bmp;
1187 else if (c <= MAX_CHAR_SMP)
1189 *charset = Vcharset_ucs_smp;
1190 return c - MIN_CHAR_SMP;
1192 else if (c <= MAX_CHAR_SIP)
1194 *charset = Vcharset_ucs_sip;
1195 return c - MIN_CHAR_SIP;
1197 else if (c < MIN_CHAR_94)
1199 *charset = Vcharset_ucs;
1202 else if (c <= MAX_CHAR_94)
1204 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1205 ((c - MIN_CHAR_94) / 94) + '0',
1206 CHARSET_LEFT_TO_RIGHT);
1207 if (!NILP (*charset))
1208 return ((c - MIN_CHAR_94) % 94) + 33;
1211 *charset = Vcharset_ucs;
1215 else if (c <= MAX_CHAR_96)
1217 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1218 ((c - MIN_CHAR_96) / 96) + '0',
1219 CHARSET_LEFT_TO_RIGHT);
1220 if (!NILP (*charset))
1221 return ((c - MIN_CHAR_96) % 96) + 32;
1224 *charset = Vcharset_ucs;
1228 else if (c <= MAX_CHAR_94x94)
1231 = CHARSET_BY_ATTRIBUTES (94, 2,
1232 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1233 CHARSET_LEFT_TO_RIGHT);
1234 if (!NILP (*charset))
1235 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1236 | (((c - MIN_CHAR_94x94) % 94) + 33);
1239 *charset = Vcharset_ucs;
1243 else if (c <= MAX_CHAR_96x96)
1246 = CHARSET_BY_ATTRIBUTES (96, 2,
1247 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1248 CHARSET_LEFT_TO_RIGHT);
1249 if (!NILP (*charset))
1250 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1251 | (((c - MIN_CHAR_96x96) % 96) + 32);
1254 *charset = Vcharset_ucs;
1260 *charset = Vcharset_ucs;
1265 Lisp_Object Vdefault_coded_charset_priority_list;
1269 /************************************************************************/
1270 /* Basic charset Lisp functions */
1271 /************************************************************************/
1273 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1274 Return non-nil if OBJECT is a charset.
1278 return CHARSETP (object) ? Qt : Qnil;
1281 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1282 Retrieve the charset of the given name.
1283 If CHARSET-OR-NAME is a charset object, it is simply returned.
1284 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1285 nil is returned. Otherwise the associated charset object is returned.
1289 if (CHARSETP (charset_or_name))
1290 return charset_or_name;
1292 CHECK_SYMBOL (charset_or_name);
1293 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1296 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1297 Retrieve the charset of the given name.
1298 Same as `find-charset' except an error is signalled if there is no such
1299 charset instead of returning nil.
1303 Lisp_Object charset = Ffind_charset (name);
1306 signal_simple_error ("No such charset", name);
1310 /* We store the charsets in hash tables with the names as the key and the
1311 actual charset object as the value. Occasionally we need to use them
1312 in a list format. These routines provide us with that. */
1313 struct charset_list_closure
1315 Lisp_Object *charset_list;
1319 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1320 void *charset_list_closure)
1322 /* This function can GC */
1323 struct charset_list_closure *chcl =
1324 (struct charset_list_closure*) charset_list_closure;
1325 Lisp_Object *charset_list = chcl->charset_list;
1327 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1331 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1332 Return a list of the names of all defined charsets.
1336 Lisp_Object charset_list = Qnil;
1337 struct gcpro gcpro1;
1338 struct charset_list_closure charset_list_closure;
1340 GCPRO1 (charset_list);
1341 charset_list_closure.charset_list = &charset_list;
1342 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1343 &charset_list_closure);
1346 return charset_list;
1349 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1350 Return the name of charset CHARSET.
1354 return XCHARSET_NAME (Fget_charset (charset));
1357 /* #### SJT Should generic properties be allowed? */
1358 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1359 Define a new character set.
1360 This function is for use with Mule support.
1361 NAME is a symbol, the name by which the character set is normally referred.
1362 DOC-STRING is a string describing the character set.
1363 PROPS is a property list, describing the specific nature of the
1364 character set. Recognized properties are:
1366 'short-name Short version of the charset name (ex: Latin-1)
1367 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1368 'registry A regular expression matching the font registry field for
1370 'dimension Number of octets used to index a character in this charset.
1371 Either 1 or 2. Defaults to 1.
1372 If UTF-2000 feature is enabled, 3 or 4 are also available.
1373 'columns Number of columns used to display a character in this charset.
1374 Only used in TTY mode. (Under X, the actual width of a
1375 character can be derived from the font used to display the
1376 characters.) If unspecified, defaults to the dimension
1377 (this is almost always the correct value).
1378 'chars Number of characters in each dimension (94 or 96).
1379 Defaults to 94. Note that if the dimension is 2, the
1380 character set thus described is 94x94 or 96x96.
1381 If UTF-2000 feature is enabled, 128 or 256 are also available.
1382 'final Final byte of ISO 2022 escape sequence. Must be
1383 supplied. Each combination of (DIMENSION, CHARS) defines a
1384 separate namespace for final bytes. Note that ISO
1385 2022 restricts the final byte to the range
1386 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1387 dimension == 2. Note also that final bytes in the range
1388 0x30 - 0x3F are reserved for user-defined (not official)
1390 'graphic 0 (use left half of font on output) or 1 (use right half
1391 of font on output). Defaults to 0. For example, for
1392 a font whose registry is ISO8859-1, the left half
1393 (octets 0x20 - 0x7F) is the `ascii' character set, while
1394 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1395 character set. With 'graphic set to 0, the octets
1396 will have their high bit cleared; with it set to 1,
1397 the octets will have their high bit set.
1398 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1400 'ccl-program A compiled CCL program used to convert a character in
1401 this charset into an index into the font. This is in
1402 addition to the 'graphic property. The CCL program
1403 is passed the octets of the character, with the high
1404 bit cleared and set depending upon whether the value
1405 of the 'graphic property is 0 or 1.
1406 'mother [UTF-2000 only] Base coded-charset.
1407 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1408 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1409 'code-offset [UTF-2000 only] Offset for a code-point of a base
1411 'conversion [UTF-2000 only] Conversion for a code-point of a base
1412 coded-charset (94x60 or 94x94x60).
1414 (name, doc_string, props))
1416 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1417 int direction = CHARSET_LEFT_TO_RIGHT;
1418 Lisp_Object registry = Qnil;
1419 Lisp_Object charset;
1420 Lisp_Object ccl_program = Qnil;
1421 Lisp_Object short_name = Qnil, long_name = Qnil;
1422 Lisp_Object mother = Qnil;
1423 int min_code = 0, max_code = 0, code_offset = 0;
1424 int byte_offset = -1;
1427 CHECK_SYMBOL (name);
1428 if (!NILP (doc_string))
1429 CHECK_STRING (doc_string);
1431 charset = Ffind_charset (name);
1432 if (!NILP (charset))
1433 signal_simple_error ("Cannot redefine existing charset", name);
1436 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1438 if (EQ (keyword, Qshort_name))
1440 CHECK_STRING (value);
1444 if (EQ (keyword, Qlong_name))
1446 CHECK_STRING (value);
1450 else if (EQ (keyword, Qdimension))
1453 dimension = XINT (value);
1454 if (dimension < 1 ||
1461 signal_simple_error ("Invalid value for 'dimension", value);
1464 else if (EQ (keyword, Qchars))
1467 chars = XINT (value);
1468 if (chars != 94 && chars != 96
1470 && chars != 128 && chars != 256
1473 signal_simple_error ("Invalid value for 'chars", value);
1476 else if (EQ (keyword, Qcolumns))
1479 columns = XINT (value);
1480 if (columns != 1 && columns != 2)
1481 signal_simple_error ("Invalid value for 'columns", value);
1484 else if (EQ (keyword, Qgraphic))
1487 graphic = XINT (value);
1495 signal_simple_error ("Invalid value for 'graphic", value);
1498 else if (EQ (keyword, Qregistry))
1500 CHECK_STRING (value);
1504 else if (EQ (keyword, Qdirection))
1506 if (EQ (value, Ql2r))
1507 direction = CHARSET_LEFT_TO_RIGHT;
1508 else if (EQ (value, Qr2l))
1509 direction = CHARSET_RIGHT_TO_LEFT;
1511 signal_simple_error ("Invalid value for 'direction", value);
1514 else if (EQ (keyword, Qfinal))
1516 CHECK_CHAR_COERCE_INT (value);
1517 final = XCHAR (value);
1518 if (final < '0' || final > '~')
1519 signal_simple_error ("Invalid value for 'final", value);
1523 else if (EQ (keyword, Qmother))
1525 mother = Fget_charset (value);
1528 else if (EQ (keyword, Qmin_code))
1531 min_code = XUINT (value);
1534 else if (EQ (keyword, Qmax_code))
1537 max_code = XUINT (value);
1540 else if (EQ (keyword, Qcode_offset))
1543 code_offset = XUINT (value);
1546 else if (EQ (keyword, Qconversion))
1548 if (EQ (value, Q94x60))
1549 conversion = CONVERSION_94x60;
1550 else if (EQ (value, Q94x94x60))
1551 conversion = CONVERSION_94x94x60;
1553 signal_simple_error ("Unrecognized conversion", value);
1557 else if (EQ (keyword, Qccl_program))
1559 struct ccl_program test_ccl;
1561 if (setup_ccl_program (&test_ccl, value) < 0)
1562 signal_simple_error ("Invalid value for 'ccl-program", value);
1563 ccl_program = value;
1567 signal_simple_error ("Unrecognized property", keyword);
1573 error ("'final must be specified");
1575 if (dimension == 2 && final > 0x5F)
1577 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1580 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1581 CHARSET_LEFT_TO_RIGHT)) ||
1582 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1583 CHARSET_RIGHT_TO_LEFT)))
1585 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1587 id = get_unallocated_leading_byte (dimension);
1589 if (NILP (doc_string))
1590 doc_string = build_string ("");
1592 if (NILP (registry))
1593 registry = build_string ("");
1595 if (NILP (short_name))
1596 XSETSTRING (short_name, XSYMBOL (name)->name);
1598 if (NILP (long_name))
1599 long_name = doc_string;
1602 columns = dimension;
1604 if (byte_offset < 0)
1608 else if (chars == 96)
1614 charset = make_charset (id, name, chars, dimension, columns, graphic,
1615 final, direction, short_name, long_name,
1616 doc_string, registry,
1617 Qnil, min_code, max_code, code_offset, byte_offset,
1618 mother, conversion);
1619 if (!NILP (ccl_program))
1620 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1624 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1626 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1627 NEW-NAME is the name of the new charset. Return the new charset.
1629 (charset, new_name))
1631 Lisp_Object new_charset = Qnil;
1632 int id, chars, dimension, columns, graphic, final;
1634 Lisp_Object registry, doc_string, short_name, long_name;
1637 charset = Fget_charset (charset);
1638 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1639 signal_simple_error ("Charset already has reverse-direction charset",
1642 CHECK_SYMBOL (new_name);
1643 if (!NILP (Ffind_charset (new_name)))
1644 signal_simple_error ("Cannot redefine existing charset", new_name);
1646 cs = XCHARSET (charset);
1648 chars = CHARSET_CHARS (cs);
1649 dimension = CHARSET_DIMENSION (cs);
1650 columns = CHARSET_COLUMNS (cs);
1651 id = get_unallocated_leading_byte (dimension);
1653 graphic = CHARSET_GRAPHIC (cs);
1654 final = CHARSET_FINAL (cs);
1655 direction = CHARSET_RIGHT_TO_LEFT;
1656 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1657 direction = CHARSET_LEFT_TO_RIGHT;
1658 doc_string = CHARSET_DOC_STRING (cs);
1659 short_name = CHARSET_SHORT_NAME (cs);
1660 long_name = CHARSET_LONG_NAME (cs);
1661 registry = CHARSET_REGISTRY (cs);
1663 new_charset = make_charset (id, new_name, chars, dimension, columns,
1664 graphic, final, direction, short_name, long_name,
1665 doc_string, registry,
1667 CHARSET_DECODING_TABLE(cs),
1668 CHARSET_MIN_CODE(cs),
1669 CHARSET_MAX_CODE(cs),
1670 CHARSET_CODE_OFFSET(cs),
1671 CHARSET_BYTE_OFFSET(cs),
1673 CHARSET_CONVERSION (cs)
1675 Qnil, 0, 0, 0, 0, Qnil, 0
1679 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1680 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1685 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1686 Define symbol ALIAS as an alias for CHARSET.
1690 CHECK_SYMBOL (alias);
1691 charset = Fget_charset (charset);
1692 return Fputhash (alias, charset, Vcharset_hash_table);
1695 /* #### Reverse direction charsets not yet implemented. */
1697 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1699 Return the reverse-direction charset parallel to CHARSET, if any.
1700 This is the charset with the same properties (in particular, the same
1701 dimension, number of characters per dimension, and final byte) as
1702 CHARSET but whose characters are displayed in the opposite direction.
1706 charset = Fget_charset (charset);
1707 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1711 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1712 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1713 If DIRECTION is omitted, both directions will be checked (left-to-right
1714 will be returned if character sets exist for both directions).
1716 (dimension, chars, final, direction))
1718 int dm, ch, fi, di = -1;
1719 Lisp_Object obj = Qnil;
1721 CHECK_INT (dimension);
1722 dm = XINT (dimension);
1723 if (dm < 1 || dm > 2)
1724 signal_simple_error ("Invalid value for DIMENSION", dimension);
1728 if (ch != 94 && ch != 96)
1729 signal_simple_error ("Invalid value for CHARS", chars);
1731 CHECK_CHAR_COERCE_INT (final);
1733 if (fi < '0' || fi > '~')
1734 signal_simple_error ("Invalid value for FINAL", final);
1736 if (EQ (direction, Ql2r))
1737 di = CHARSET_LEFT_TO_RIGHT;
1738 else if (EQ (direction, Qr2l))
1739 di = CHARSET_RIGHT_TO_LEFT;
1740 else if (!NILP (direction))
1741 signal_simple_error ("Invalid value for DIRECTION", direction);
1743 if (dm == 2 && fi > 0x5F)
1745 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1749 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1751 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1754 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1757 return XCHARSET_NAME (obj);
1761 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1762 Return short name of CHARSET.
1766 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1769 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1770 Return long name of CHARSET.
1774 return XCHARSET_LONG_NAME (Fget_charset (charset));
1777 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1778 Return description of CHARSET.
1782 return XCHARSET_DOC_STRING (Fget_charset (charset));
1785 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1786 Return dimension of CHARSET.
1790 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1793 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1794 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1795 Recognized properties are those listed in `make-charset', as well as
1796 'name and 'doc-string.
1802 charset = Fget_charset (charset);
1803 cs = XCHARSET (charset);
1805 CHECK_SYMBOL (prop);
1806 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1807 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1808 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1809 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1810 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1811 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1812 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1813 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1814 Qnil : make_char (CHARSET_FINAL (cs));
1815 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1816 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1817 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1818 if (EQ (prop, Qdirection))
1819 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1820 if (EQ (prop, Qreverse_direction_charset))
1822 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1823 /* #### Is this translation OK? If so, error checking sufficient? */
1824 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1827 if (EQ (prop, Qmother))
1828 return CHARSET_MOTHER (cs);
1829 if (EQ (prop, Qmin_code))
1830 return make_int (CHARSET_MIN_CODE (cs));
1831 if (EQ (prop, Qmax_code))
1832 return make_int (CHARSET_MAX_CODE (cs));
1834 signal_simple_error ("Unrecognized charset property name", prop);
1835 return Qnil; /* not reached */
1838 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1839 Return charset identification number of CHARSET.
1843 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1846 /* #### We need to figure out which properties we really want to
1849 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1850 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1852 (charset, ccl_program))
1854 struct ccl_program test_ccl;
1856 charset = Fget_charset (charset);
1857 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1858 signal_simple_error ("Invalid ccl-program", ccl_program);
1859 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1864 invalidate_charset_font_caches (Lisp_Object charset)
1866 /* Invalidate font cache entries for charset on all devices. */
1867 Lisp_Object devcons, concons, hash_table;
1868 DEVICE_LOOP_NO_BREAK (devcons, concons)
1870 struct device *d = XDEVICE (XCAR (devcons));
1871 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1872 if (!UNBOUNDP (hash_table))
1873 Fclrhash (hash_table);
1877 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1878 Set the 'registry property of CHARSET to REGISTRY.
1880 (charset, registry))
1882 charset = Fget_charset (charset);
1883 CHECK_STRING (registry);
1884 XCHARSET_REGISTRY (charset) = registry;
1885 invalidate_charset_font_caches (charset);
1886 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1891 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1892 Return mapping-table of CHARSET.
1896 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1899 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1900 Set mapping-table of CHARSET to TABLE.
1904 struct Lisp_Charset *cs;
1908 charset = Fget_charset (charset);
1909 cs = XCHARSET (charset);
1913 CHARSET_DECODING_TABLE(cs) = Qnil;
1916 else if (VECTORP (table))
1918 int ccs_len = CHARSET_BYTE_SIZE (cs);
1919 int ret = decoding_table_check_elements (table,
1920 CHARSET_DIMENSION (cs),
1925 signal_simple_error ("Too big table", table);
1927 signal_simple_error ("Invalid element is found", table);
1929 signal_simple_error ("Something wrong", table);
1931 CHARSET_DECODING_TABLE(cs) = Qnil;
1934 signal_error (Qwrong_type_argument,
1935 list2 (build_translated_string ("vector-or-nil-p"),
1938 byte_offset = CHARSET_BYTE_OFFSET (cs);
1939 switch (CHARSET_DIMENSION (cs))
1942 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1944 Lisp_Object c = XVECTOR_DATA(table)[i];
1947 Fput_char_attribute (c, XCHARSET_NAME (charset),
1948 make_int (i + byte_offset));
1952 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1954 Lisp_Object v = XVECTOR_DATA(table)[i];
1960 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1962 Lisp_Object c = XVECTOR_DATA(v)[j];
1966 (c, XCHARSET_NAME (charset),
1967 make_int ( ( (i + byte_offset) << 8 )
1973 Fput_char_attribute (v, XCHARSET_NAME (charset),
1974 make_int (i + byte_offset));
1983 /************************************************************************/
1984 /* Lisp primitives for working with characters */
1985 /************************************************************************/
1988 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
1989 Make a character from CHARSET and code-point CODE.
1990 If DEFINED_ONLY is non-nil, builtin character is not returned.
1991 If corresponding character is not found, nil is returned.
1993 (charset, code, defined_only))
1997 charset = Fget_charset (charset);
2000 if (XCHARSET_GRAPHIC (charset) == 1)
2002 if (NILP (defined_only))
2003 c = DECODE_CHAR (charset, c);
2005 c = decode_defined_char (charset, c);
2006 return c >= 0 ? make_char (c) : Qnil;
2009 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2010 Make a builtin character from CHARSET and code-point CODE.
2016 charset = Fget_charset (charset);
2018 if (EQ (charset, Vcharset_latin_viscii))
2020 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2026 (ret = Fget_char_attribute (chr,
2027 Vcharset_latin_viscii_lower,
2030 charset = Vcharset_latin_viscii_lower;
2034 (ret = Fget_char_attribute (chr,
2035 Vcharset_latin_viscii_upper,
2038 charset = Vcharset_latin_viscii_upper;
2045 if (XCHARSET_GRAPHIC (charset) == 1)
2048 c = decode_builtin_char (charset, c);
2049 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2053 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2054 Make a character from CHARSET and octets ARG1 and ARG2.
2055 ARG2 is required only for characters from two-dimensional charsets.
2056 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2057 character s with caron.
2059 (charset, arg1, arg2))
2063 int lowlim, highlim;
2065 charset = Fget_charset (charset);
2066 cs = XCHARSET (charset);
2068 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2069 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2071 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2073 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2074 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2077 /* It is useful (and safe, according to Olivier Galibert) to strip
2078 the 8th bit off ARG1 and ARG2 because it allows programmers to
2079 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2080 Latin 2 code of the character. */
2088 if (a1 < lowlim || a1 > highlim)
2089 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2091 if (CHARSET_DIMENSION (cs) == 1)
2095 ("Charset is of dimension one; second octet must be nil", arg2);
2096 return make_char (MAKE_CHAR (charset, a1, 0));
2105 a2 = XINT (arg2) & 0x7f;
2107 if (a2 < lowlim || a2 > highlim)
2108 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2110 return make_char (MAKE_CHAR (charset, a1, a2));
2113 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2114 Return the character set of CHARACTER.
2118 CHECK_CHAR_COERCE_INT (character);
2120 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2123 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2124 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2125 N defaults to 0 if omitted.
2129 Lisp_Object charset;
2132 CHECK_CHAR_COERCE_INT (character);
2134 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2136 if (NILP (n) || EQ (n, Qzero))
2137 return make_int (octet0);
2138 else if (EQ (n, make_int (1)))
2139 return make_int (octet1);
2141 signal_simple_error ("Octet number must be 0 or 1", n);
2145 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2146 Return code-point of CHARACTER in specified CHARSET.
2148 (character, charset, defined_only))
2152 CHECK_CHAR_COERCE_INT (character);
2153 charset = Fget_charset (charset);
2154 code_point = charset_code_point (charset, XCHAR (character),
2155 !NILP (defined_only));
2156 if (code_point >= 0)
2157 return make_int (code_point);
2163 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2164 Return list of charset and one or two position-codes of CHARACTER.
2168 /* This function can GC */
2169 struct gcpro gcpro1, gcpro2;
2170 Lisp_Object charset = Qnil;
2171 Lisp_Object rc = Qnil;
2179 GCPRO2 (charset, rc);
2180 CHECK_CHAR_COERCE_INT (character);
2183 code_point = ENCODE_CHAR (XCHAR (character), charset);
2184 dimension = XCHARSET_DIMENSION (charset);
2185 while (dimension > 0)
2187 rc = Fcons (make_int (code_point & 255), rc);
2191 rc = Fcons (XCHARSET_NAME (charset), rc);
2193 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2195 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2197 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2201 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2210 #ifdef ENABLE_COMPOSITE_CHARS
2211 /************************************************************************/
2212 /* composite character functions */
2213 /************************************************************************/
2216 lookup_composite_char (Bufbyte *str, int len)
2218 Lisp_Object lispstr = make_string (str, len);
2219 Lisp_Object ch = Fgethash (lispstr,
2220 Vcomposite_char_string2char_hash_table,
2226 if (composite_char_row_next >= 128)
2227 signal_simple_error ("No more composite chars available", lispstr);
2228 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2229 composite_char_col_next);
2230 Fputhash (make_char (emch), lispstr,
2231 Vcomposite_char_char2string_hash_table);
2232 Fputhash (lispstr, make_char (emch),
2233 Vcomposite_char_string2char_hash_table);
2234 composite_char_col_next++;
2235 if (composite_char_col_next >= 128)
2237 composite_char_col_next = 32;
2238 composite_char_row_next++;
2247 composite_char_string (Emchar ch)
2249 Lisp_Object str = Fgethash (make_char (ch),
2250 Vcomposite_char_char2string_hash_table,
2252 assert (!UNBOUNDP (str));
2256 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2257 Convert a string into a single composite character.
2258 The character is the result of overstriking all the characters in
2263 CHECK_STRING (string);
2264 return make_char (lookup_composite_char (XSTRING_DATA (string),
2265 XSTRING_LENGTH (string)));
2268 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2269 Return a string of the characters comprising a composite character.
2277 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2278 signal_simple_error ("Must be composite char", ch);
2279 return composite_char_string (emch);
2281 #endif /* ENABLE_COMPOSITE_CHARS */
2284 /************************************************************************/
2285 /* initialization */
2286 /************************************************************************/
2289 syms_of_mule_charset (void)
2291 INIT_LRECORD_IMPLEMENTATION (charset);
2293 DEFSUBR (Fcharsetp);
2294 DEFSUBR (Ffind_charset);
2295 DEFSUBR (Fget_charset);
2296 DEFSUBR (Fcharset_list);
2297 DEFSUBR (Fcharset_name);
2298 DEFSUBR (Fmake_charset);
2299 DEFSUBR (Fmake_reverse_direction_charset);
2300 /* DEFSUBR (Freverse_direction_charset); */
2301 DEFSUBR (Fdefine_charset_alias);
2302 DEFSUBR (Fcharset_from_attributes);
2303 DEFSUBR (Fcharset_short_name);
2304 DEFSUBR (Fcharset_long_name);
2305 DEFSUBR (Fcharset_description);
2306 DEFSUBR (Fcharset_dimension);
2307 DEFSUBR (Fcharset_property);
2308 DEFSUBR (Fcharset_id);
2309 DEFSUBR (Fset_charset_ccl_program);
2310 DEFSUBR (Fset_charset_registry);
2312 DEFSUBR (Fcharset_mapping_table);
2313 DEFSUBR (Fset_charset_mapping_table);
2317 DEFSUBR (Fdecode_char);
2318 DEFSUBR (Fdecode_builtin_char);
2319 DEFSUBR (Fencode_char);
2321 DEFSUBR (Fmake_char);
2322 DEFSUBR (Fchar_charset);
2323 DEFSUBR (Fchar_octet);
2324 DEFSUBR (Fsplit_char);
2326 #ifdef ENABLE_COMPOSITE_CHARS
2327 DEFSUBR (Fmake_composite_char);
2328 DEFSUBR (Fcomposite_char_string);
2331 defsymbol (&Qcharsetp, "charsetp");
2332 defsymbol (&Qregistry, "registry");
2333 defsymbol (&Qfinal, "final");
2334 defsymbol (&Qgraphic, "graphic");
2335 defsymbol (&Qdirection, "direction");
2336 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2337 defsymbol (&Qshort_name, "short-name");
2338 defsymbol (&Qlong_name, "long-name");
2340 defsymbol (&Qmother, "mother");
2341 defsymbol (&Qmin_code, "min-code");
2342 defsymbol (&Qmax_code, "max-code");
2343 defsymbol (&Qcode_offset, "code-offset");
2344 defsymbol (&Qconversion, "conversion");
2345 defsymbol (&Q94x60, "94x60");
2346 defsymbol (&Q94x94x60, "94x94x60");
2349 defsymbol (&Ql2r, "l2r");
2350 defsymbol (&Qr2l, "r2l");
2352 /* Charsets, compatible with FSF 20.3
2353 Naming convention is Script-Charset[-Edition] */
2354 defsymbol (&Qascii, "ascii");
2355 defsymbol (&Qcontrol_1, "control-1");
2356 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2357 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2358 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2359 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2360 defsymbol (&Qthai_tis620, "thai-tis620");
2361 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2362 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2363 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2364 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2365 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2366 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2367 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2368 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2369 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2370 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2371 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2372 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2373 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2374 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2375 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2376 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2378 defsymbol (&Qucs, "ucs");
2379 defsymbol (&Qucs_bmp, "ucs-bmp");
2380 defsymbol (&Qucs_smp, "ucs-smp");
2381 defsymbol (&Qucs_sip, "ucs-sip");
2382 defsymbol (&Qucs_gb, "ucs-gb");
2383 defsymbol (&Qucs_cns, "ucs-cns");
2384 defsymbol (&Qlatin_viscii, "latin-viscii");
2385 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2386 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2387 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2388 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2389 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2390 defsymbol (&Qjis_x0208, "=jis-x0208");
2391 defsymbol (&Qchinese_big5, "chinese-big5");
2392 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2394 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2395 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2397 defsymbol (&Qcomposite, "composite");
2401 vars_of_mule_charset (void)
2408 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2409 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2411 /* Table of charsets indexed by leading byte. */
2412 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2413 chlook->charset_by_leading_byte[i] = Qnil;
2416 /* Table of charsets indexed by type/final-byte. */
2417 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2418 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2419 chlook->charset_by_attributes[i][j] = Qnil;
2421 /* Table of charsets indexed by type/final-byte/direction. */
2422 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2423 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2424 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2425 chlook->charset_by_attributes[i][j][k] = Qnil;
2429 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2431 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2432 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2436 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2437 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2438 Leading-code of private TYPE9N charset of column-width 1.
2440 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2444 Vdefault_coded_charset_priority_list = Qnil;
2445 DEFVAR_LISP ("default-coded-charset-priority-list",
2446 &Vdefault_coded_charset_priority_list /*
2447 Default order of preferred coded-character-sets.
2453 complex_vars_of_mule_charset (void)
2455 staticpro (&Vcharset_hash_table);
2456 Vcharset_hash_table =
2457 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2459 /* Predefined character sets. We store them into variables for
2463 staticpro (&Vcharset_ucs);
2465 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2466 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2467 build_string ("UCS"),
2468 build_string ("UCS"),
2469 build_string ("ISO/IEC 10646"),
2471 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2472 staticpro (&Vcharset_ucs_bmp);
2474 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2475 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2476 build_string ("BMP"),
2477 build_string ("UCS-BMP"),
2478 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2480 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2481 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2482 staticpro (&Vcharset_ucs_smp);
2484 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2485 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2486 build_string ("SMP"),
2487 build_string ("UCS-SMP"),
2488 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2489 build_string ("UCS00-1"),
2490 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2491 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2492 staticpro (&Vcharset_ucs_sip);
2494 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2495 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2496 build_string ("SIP"),
2497 build_string ("UCS-SIP"),
2498 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2499 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2500 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2501 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2502 staticpro (&Vcharset_ucs_gb);
2504 make_charset (LEADING_BYTE_UCS_GB, Qucs_gb, 256, 3,
2505 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2506 build_string ("UCS for GB"),
2507 build_string ("UCS for GB"),
2508 build_string ("ISO/IEC 10646 for GB"),
2510 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2511 staticpro (&Vcharset_ucs_cns);
2513 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2514 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2515 build_string ("UCS for CNS"),
2516 build_string ("UCS for CNS 11643"),
2517 build_string ("ISO/IEC 10646 for CNS 11643"),
2519 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2521 # define MIN_CHAR_THAI 0
2522 # define MAX_CHAR_THAI 0
2523 /* # define MIN_CHAR_HEBREW 0 */
2524 /* # define MAX_CHAR_HEBREW 0 */
2525 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2526 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2528 staticpro (&Vcharset_ascii);
2530 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2531 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2532 build_string ("ASCII"),
2533 build_string ("ASCII)"),
2534 build_string ("ASCII (ISO646 IRV)"),
2535 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2536 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2537 staticpro (&Vcharset_control_1);
2538 Vcharset_control_1 =
2539 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2540 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2541 build_string ("C1"),
2542 build_string ("Control characters"),
2543 build_string ("Control characters 128-191"),
2545 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2546 staticpro (&Vcharset_latin_iso8859_1);
2547 Vcharset_latin_iso8859_1 =
2548 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2549 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2550 build_string ("Latin-1"),
2551 build_string ("ISO8859-1 (Latin-1)"),
2552 build_string ("ISO8859-1 (Latin-1)"),
2553 build_string ("iso8859-1"),
2554 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2555 staticpro (&Vcharset_latin_iso8859_2);
2556 Vcharset_latin_iso8859_2 =
2557 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2558 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2559 build_string ("Latin-2"),
2560 build_string ("ISO8859-2 (Latin-2)"),
2561 build_string ("ISO8859-2 (Latin-2)"),
2562 build_string ("iso8859-2"),
2563 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2564 staticpro (&Vcharset_latin_iso8859_3);
2565 Vcharset_latin_iso8859_3 =
2566 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2567 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2568 build_string ("Latin-3"),
2569 build_string ("ISO8859-3 (Latin-3)"),
2570 build_string ("ISO8859-3 (Latin-3)"),
2571 build_string ("iso8859-3"),
2572 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2573 staticpro (&Vcharset_latin_iso8859_4);
2574 Vcharset_latin_iso8859_4 =
2575 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2576 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2577 build_string ("Latin-4"),
2578 build_string ("ISO8859-4 (Latin-4)"),
2579 build_string ("ISO8859-4 (Latin-4)"),
2580 build_string ("iso8859-4"),
2581 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2582 staticpro (&Vcharset_thai_tis620);
2583 Vcharset_thai_tis620 =
2584 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2585 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2586 build_string ("TIS620"),
2587 build_string ("TIS620 (Thai)"),
2588 build_string ("TIS620.2529 (Thai)"),
2589 build_string ("tis620"),
2590 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2591 staticpro (&Vcharset_greek_iso8859_7);
2592 Vcharset_greek_iso8859_7 =
2593 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2594 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2595 build_string ("ISO8859-7"),
2596 build_string ("ISO8859-7 (Greek)"),
2597 build_string ("ISO8859-7 (Greek)"),
2598 build_string ("iso8859-7"),
2599 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2600 staticpro (&Vcharset_arabic_iso8859_6);
2601 Vcharset_arabic_iso8859_6 =
2602 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2603 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2604 build_string ("ISO8859-6"),
2605 build_string ("ISO8859-6 (Arabic)"),
2606 build_string ("ISO8859-6 (Arabic)"),
2607 build_string ("iso8859-6"),
2608 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2609 staticpro (&Vcharset_hebrew_iso8859_8);
2610 Vcharset_hebrew_iso8859_8 =
2611 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2612 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2613 build_string ("ISO8859-8"),
2614 build_string ("ISO8859-8 (Hebrew)"),
2615 build_string ("ISO8859-8 (Hebrew)"),
2616 build_string ("iso8859-8"),
2618 0 /* MIN_CHAR_HEBREW */,
2619 0 /* MAX_CHAR_HEBREW */, 0, 32,
2620 Qnil, CONVERSION_IDENTICAL);
2621 staticpro (&Vcharset_katakana_jisx0201);
2622 Vcharset_katakana_jisx0201 =
2623 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2624 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2625 build_string ("JISX0201 Kana"),
2626 build_string ("JISX0201.1976 (Japanese Kana)"),
2627 build_string ("JISX0201.1976 Japanese Kana"),
2628 build_string ("jisx0201\\.1976"),
2629 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2630 staticpro (&Vcharset_latin_jisx0201);
2631 Vcharset_latin_jisx0201 =
2632 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2633 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2634 build_string ("JISX0201 Roman"),
2635 build_string ("JISX0201.1976 (Japanese Roman)"),
2636 build_string ("JISX0201.1976 Japanese Roman"),
2637 build_string ("jisx0201\\.1976"),
2638 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2639 staticpro (&Vcharset_cyrillic_iso8859_5);
2640 Vcharset_cyrillic_iso8859_5 =
2641 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2642 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2643 build_string ("ISO8859-5"),
2644 build_string ("ISO8859-5 (Cyrillic)"),
2645 build_string ("ISO8859-5 (Cyrillic)"),
2646 build_string ("iso8859-5"),
2647 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2648 staticpro (&Vcharset_latin_iso8859_9);
2649 Vcharset_latin_iso8859_9 =
2650 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2651 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2652 build_string ("Latin-5"),
2653 build_string ("ISO8859-9 (Latin-5)"),
2654 build_string ("ISO8859-9 (Latin-5)"),
2655 build_string ("iso8859-9"),
2656 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2658 staticpro (&Vcharset_jis_x0208);
2659 Vcharset_jis_x0208 =
2660 make_charset (LEADING_BYTE_JIS_X0208,
2662 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2663 build_string ("JIS X0208"),
2664 build_string ("JIS X0208 Common"),
2665 build_string ("JIS X0208 Common part"),
2666 build_string ("jisx0208\\.1990"),
2668 MIN_CHAR_JIS_X0208_1990,
2669 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2670 Qnil, CONVERSION_94x94);
2672 staticpro (&Vcharset_japanese_jisx0208_1978);
2673 Vcharset_japanese_jisx0208_1978 =
2674 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2675 Qjapanese_jisx0208_1978, 94, 2,
2676 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2677 build_string ("JIS X0208:1978"),
2678 build_string ("JIS X0208:1978 (Japanese)"),
2680 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2681 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2688 CONVERSION_IDENTICAL);
2689 staticpro (&Vcharset_chinese_gb2312);
2690 Vcharset_chinese_gb2312 =
2691 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2692 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2693 build_string ("GB2312"),
2694 build_string ("GB2312)"),
2695 build_string ("GB2312 Chinese simplified"),
2696 build_string ("gb2312"),
2697 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2698 staticpro (&Vcharset_chinese_gb12345);
2699 Vcharset_chinese_gb12345 =
2700 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2701 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2702 build_string ("G1"),
2703 build_string ("GB 12345)"),
2704 build_string ("GB 12345-1990"),
2705 build_string ("GB12345\\(\\.1990\\)?-0"),
2706 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2707 staticpro (&Vcharset_japanese_jisx0208);
2708 Vcharset_japanese_jisx0208 =
2709 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2710 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2711 build_string ("JISX0208"),
2712 build_string ("JIS X0208:1983 (Japanese)"),
2713 build_string ("JIS X0208:1983 Japanese Kanji"),
2714 build_string ("jisx0208\\.1983"),
2721 CONVERSION_IDENTICAL);
2723 staticpro (&Vcharset_japanese_jisx0208_1990);
2724 Vcharset_japanese_jisx0208_1990 =
2725 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2726 Qjapanese_jisx0208_1990, 94, 2,
2727 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2728 build_string ("JISX0208-1990"),
2729 build_string ("JIS X0208:1990 (Japanese)"),
2730 build_string ("JIS X0208:1990 Japanese Kanji"),
2731 build_string ("jisx0208\\.1990"),
2733 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
2734 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
2735 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
2736 Vcharset_jis_x0208 /* Qnil */,
2737 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
2739 staticpro (&Vcharset_korean_ksc5601);
2740 Vcharset_korean_ksc5601 =
2741 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2742 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2743 build_string ("KSC5601"),
2744 build_string ("KSC5601 (Korean"),
2745 build_string ("KSC5601 Korean Hangul and Hanja"),
2746 build_string ("ksc5601"),
2747 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2748 staticpro (&Vcharset_japanese_jisx0212);
2749 Vcharset_japanese_jisx0212 =
2750 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2751 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2752 build_string ("JISX0212"),
2753 build_string ("JISX0212 (Japanese)"),
2754 build_string ("JISX0212 Japanese Supplement"),
2755 build_string ("jisx0212"),
2756 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2758 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2759 staticpro (&Vcharset_chinese_cns11643_1);
2760 Vcharset_chinese_cns11643_1 =
2761 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2762 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2763 build_string ("CNS11643-1"),
2764 build_string ("CNS11643-1 (Chinese traditional)"),
2766 ("CNS 11643 Plane 1 Chinese traditional"),
2767 build_string (CHINESE_CNS_PLANE_RE("1")),
2768 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2769 staticpro (&Vcharset_chinese_cns11643_2);
2770 Vcharset_chinese_cns11643_2 =
2771 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2772 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2773 build_string ("CNS11643-2"),
2774 build_string ("CNS11643-2 (Chinese traditional)"),
2776 ("CNS 11643 Plane 2 Chinese traditional"),
2777 build_string (CHINESE_CNS_PLANE_RE("2")),
2778 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2780 staticpro (&Vcharset_latin_tcvn5712);
2781 Vcharset_latin_tcvn5712 =
2782 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2783 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2784 build_string ("TCVN 5712"),
2785 build_string ("TCVN 5712 (VSCII-2)"),
2786 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2787 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2788 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2789 staticpro (&Vcharset_latin_viscii_lower);
2790 Vcharset_latin_viscii_lower =
2791 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2792 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2793 build_string ("VISCII lower"),
2794 build_string ("VISCII lower (Vietnamese)"),
2795 build_string ("VISCII lower (Vietnamese)"),
2796 build_string ("MULEVISCII-LOWER"),
2797 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2798 staticpro (&Vcharset_latin_viscii_upper);
2799 Vcharset_latin_viscii_upper =
2800 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2801 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2802 build_string ("VISCII upper"),
2803 build_string ("VISCII upper (Vietnamese)"),
2804 build_string ("VISCII upper (Vietnamese)"),
2805 build_string ("MULEVISCII-UPPER"),
2806 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2807 staticpro (&Vcharset_latin_viscii);
2808 Vcharset_latin_viscii =
2809 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2810 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2811 build_string ("VISCII"),
2812 build_string ("VISCII 1.1 (Vietnamese)"),
2813 build_string ("VISCII 1.1 (Vietnamese)"),
2814 build_string ("VISCII1\\.1"),
2815 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2816 staticpro (&Vcharset_chinese_big5);
2817 Vcharset_chinese_big5 =
2818 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2819 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2820 build_string ("Big5"),
2821 build_string ("Big5"),
2822 build_string ("Big5 Chinese traditional"),
2823 build_string ("big5-0"),
2825 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2826 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2828 staticpro (&Vcharset_ethiopic_ucs);
2829 Vcharset_ethiopic_ucs =
2830 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
2831 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2832 build_string ("Ethiopic (UCS)"),
2833 build_string ("Ethiopic (UCS)"),
2834 build_string ("Ethiopic of UCS"),
2835 build_string ("Ethiopic-Unicode"),
2836 Qnil, 0x1200, 0x137F, 0, 0,
2837 Qnil, CONVERSION_IDENTICAL);
2839 staticpro (&Vcharset_chinese_big5_1);
2840 Vcharset_chinese_big5_1 =
2841 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
2842 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
2843 build_string ("Big5"),
2844 build_string ("Big5 (Level-1)"),
2846 ("Big5 Level-1 Chinese traditional"),
2847 build_string ("big5"),
2848 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2849 staticpro (&Vcharset_chinese_big5_2);
2850 Vcharset_chinese_big5_2 =
2851 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
2852 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
2853 build_string ("Big5"),
2854 build_string ("Big5 (Level-2)"),
2856 ("Big5 Level-2 Chinese traditional"),
2857 build_string ("big5"),
2858 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2860 #ifdef ENABLE_COMPOSITE_CHARS
2861 /* #### For simplicity, we put composite chars into a 96x96 charset.
2862 This is going to lead to problems because you can run out of
2863 room, esp. as we don't yet recycle numbers. */
2864 staticpro (&Vcharset_composite);
2865 Vcharset_composite =
2866 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
2867 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2868 build_string ("Composite"),
2869 build_string ("Composite characters"),
2870 build_string ("Composite characters"),
2873 /* #### not dumped properly */
2874 composite_char_row_next = 32;
2875 composite_char_col_next = 32;
2877 Vcomposite_char_string2char_hash_table =
2878 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2879 Vcomposite_char_char2string_hash_table =
2880 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2881 staticpro (&Vcomposite_char_string2char_hash_table);
2882 staticpro (&Vcomposite_char_char2string_hash_table);
2883 #endif /* ENABLE_COMPOSITE_CHARS */