1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_gb;
72 Lisp_Object Vcharset_ucs_cns;
73 Lisp_Object Vcharset_ucs_jis;
74 Lisp_Object Vcharset_latin_viscii;
75 Lisp_Object Vcharset_latin_tcvn5712;
76 Lisp_Object Vcharset_latin_viscii_lower;
77 Lisp_Object Vcharset_latin_viscii_upper;
78 Lisp_Object Vcharset_jis_x0208;
79 Lisp_Object Vcharset_chinese_big5;
80 Lisp_Object Vcharset_ethiopic_ucs;
82 Lisp_Object Vcharset_chinese_big5_1;
83 Lisp_Object Vcharset_chinese_big5_2;
85 #ifdef ENABLE_COMPOSITE_CHARS
86 Lisp_Object Vcharset_composite;
88 /* Hash tables for composite chars. One maps string representing
89 composed chars to their equivalent chars; one goes the
91 Lisp_Object Vcomposite_char_char2string_hash_table;
92 Lisp_Object Vcomposite_char_string2char_hash_table;
94 static int composite_char_row_next;
95 static int composite_char_col_next;
97 #endif /* ENABLE_COMPOSITE_CHARS */
99 struct charset_lookup *chlook;
101 static const struct lrecord_description charset_lookup_description_1[] = {
102 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
104 NUM_LEADING_BYTES+4*128
111 static const struct struct_description charset_lookup_description = {
112 sizeof (struct charset_lookup),
113 charset_lookup_description_1
117 /* Table of number of bytes in the string representation of a character
118 indexed by the first byte of that representation.
120 rep_bytes_by_first_byte(c) is more efficient than the equivalent
121 canonical computation:
123 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
125 const Bytecount rep_bytes_by_first_byte[0xA0] =
126 { /* 0x00 - 0x7f are for straight ASCII */
127 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 /* 0x80 - 0x8f are for Dimension-1 official charsets */
137 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
139 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
141 /* 0x90 - 0x9d are for Dimension-2 official charsets */
142 /* 0x9e is for Dimension-1 private charsets */
143 /* 0x9f is for Dimension-2 private charsets */
144 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
150 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
152 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
156 if (XVECTOR_LENGTH (v) > ccs_len)
159 for (i = 0; i < XVECTOR_LENGTH (v); i++)
161 Lisp_Object c = XVECTOR_DATA(v)[i];
163 if (!NILP (c) && !CHARP (c))
167 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
179 put_char_ccs_code_point (Lisp_Object character,
180 Lisp_Object ccs, Lisp_Object value)
182 if (!EQ (XCHARSET_NAME (ccs), Qucs)
184 || (XCHAR (character) != XINT (value)))
186 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
190 { /* obsolete representation: value must be a list of bytes */
191 Lisp_Object ret = Fcar (value);
195 signal_simple_error ("Invalid value for coded-charset", value);
196 code_point = XINT (ret);
197 if (XCHARSET_GRAPHIC (ccs) == 1)
205 signal_simple_error ("Invalid value for coded-charset",
209 signal_simple_error ("Invalid value for coded-charset",
212 if (XCHARSET_GRAPHIC (ccs) == 1)
214 code_point = (code_point << 8) | j;
217 value = make_int (code_point);
219 else if (INTP (value))
221 code_point = XINT (value);
222 if (XCHARSET_GRAPHIC (ccs) == 1)
224 code_point &= 0x7F7F7F7F;
225 value = make_int (code_point);
229 signal_simple_error ("Invalid value for coded-charset", value);
233 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
236 decoding_table_remove_char (ccs, XINT (cpos));
239 decoding_table_put_char (ccs, code_point, character);
245 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
247 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
248 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
250 if (VECTORP (decoding_table))
252 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
256 decoding_table_remove_char (ccs, XINT (cpos));
259 if (CHAR_TABLEP (encoding_table))
261 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
269 int leading_code_private_11;
272 Lisp_Object Qcharsetp;
274 /* Qdoc_string, Qdimension, Qchars defined in general.c */
275 Lisp_Object Qregistry, Qfinal, Qgraphic;
276 Lisp_Object Qdirection;
277 Lisp_Object Qreverse_direction_charset;
278 Lisp_Object Qleading_byte;
279 Lisp_Object Qshort_name, Qlong_name;
281 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
282 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
299 Qjapanese_jisx0208_1978,
303 Qjapanese_jisx0208_1990,
320 Qvietnamese_viscii_lower,
321 Qvietnamese_viscii_upper,
330 Lisp_Object Ql2r, Qr2l;
332 Lisp_Object Vcharset_hash_table;
334 /* Composite characters are characters constructed by overstriking two
335 or more regular characters.
337 1) The old Mule implementation involves storing composite characters
338 in a buffer as a tag followed by all of the actual characters
339 used to make up the composite character. I think this is a bad
340 idea; it greatly complicates code that wants to handle strings
341 one character at a time because it has to deal with the possibility
342 of great big ungainly characters. It's much more reasonable to
343 simply store an index into a table of composite characters.
345 2) The current implementation only allows for 16,384 separate
346 composite characters over the lifetime of the XEmacs process.
347 This could become a potential problem if the user
348 edited lots of different files that use composite characters.
349 Due to FSF bogosity, increasing the number of allowable
350 composite characters under Mule would decrease the number
351 of possible faces that can exist. Mule already has shrunk
352 this to 2048, and further shrinkage would become uncomfortable.
353 No such problems exist in XEmacs.
355 Composite characters could be represented as 0x80 C1 C2 C3,
356 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
357 for slightly under 2^20 (one million) composite characters
358 over the XEmacs process lifetime, and you only need to
359 increase the size of a Mule character from 19 to 21 bits.
360 Or you could use 0x80 C1 C2 C3 C4, allowing for about
361 85 million (slightly over 2^26) composite characters. */
364 /************************************************************************/
365 /* Basic Emchar functions */
366 /************************************************************************/
368 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
369 string in STR. Returns the number of bytes stored.
370 Do not call this directly. Use the macro set_charptr_emchar() instead.
374 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
389 else if ( c <= 0x7ff )
391 *p++ = (c >> 6) | 0xc0;
392 *p++ = (c & 0x3f) | 0x80;
394 else if ( c <= 0xffff )
396 *p++ = (c >> 12) | 0xe0;
397 *p++ = ((c >> 6) & 0x3f) | 0x80;
398 *p++ = (c & 0x3f) | 0x80;
400 else if ( c <= 0x1fffff )
402 *p++ = (c >> 18) | 0xf0;
403 *p++ = ((c >> 12) & 0x3f) | 0x80;
404 *p++ = ((c >> 6) & 0x3f) | 0x80;
405 *p++ = (c & 0x3f) | 0x80;
407 else if ( c <= 0x3ffffff )
409 *p++ = (c >> 24) | 0xf8;
410 *p++ = ((c >> 18) & 0x3f) | 0x80;
411 *p++ = ((c >> 12) & 0x3f) | 0x80;
412 *p++ = ((c >> 6) & 0x3f) | 0x80;
413 *p++ = (c & 0x3f) | 0x80;
417 *p++ = (c >> 30) | 0xfc;
418 *p++ = ((c >> 24) & 0x3f) | 0x80;
419 *p++ = ((c >> 18) & 0x3f) | 0x80;
420 *p++ = ((c >> 12) & 0x3f) | 0x80;
421 *p++ = ((c >> 6) & 0x3f) | 0x80;
422 *p++ = (c & 0x3f) | 0x80;
425 BREAKUP_CHAR (c, charset, c1, c2);
426 lb = CHAR_LEADING_BYTE (c);
427 if (LEADING_BYTE_PRIVATE_P (lb))
428 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
430 if (EQ (charset, Vcharset_control_1))
439 /* Return the first character from a Mule-encoded string in STR,
440 assuming it's non-ASCII. Do not call this directly.
441 Use the macro charptr_emchar() instead. */
444 non_ascii_charptr_emchar (const Bufbyte *str)
457 else if ( b >= 0xf8 )
462 else if ( b >= 0xf0 )
467 else if ( b >= 0xe0 )
472 else if ( b >= 0xc0 )
482 for( ; len > 0; len-- )
485 ch = ( ch << 6 ) | ( b & 0x3f );
489 Bufbyte i0 = *str, i1, i2 = 0;
492 if (i0 == LEADING_BYTE_CONTROL_1)
493 return (Emchar) (*++str - 0x20);
495 if (LEADING_BYTE_PREFIX_P (i0))
500 charset = CHARSET_BY_LEADING_BYTE (i0);
501 if (XCHARSET_DIMENSION (charset) == 2)
504 return MAKE_CHAR (charset, i1, i2);
508 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
509 Do not call this directly. Use the macro valid_char_p() instead. */
513 non_ascii_valid_char_p (Emchar ch)
517 /* Must have only lowest 19 bits set */
521 f1 = CHAR_FIELD1 (ch);
522 f2 = CHAR_FIELD2 (ch);
523 f3 = CHAR_FIELD3 (ch);
529 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
530 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
531 f2 > MAX_CHAR_FIELD2_PRIVATE)
536 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
537 f2 <= MAX_CHAR_FIELD2_PRIVATE))
541 NOTE: This takes advantage of the fact that
542 FIELD2_TO_OFFICIAL_LEADING_BYTE and
543 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
545 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
546 if (EQ (charset, Qnil))
548 return (XCHARSET_CHARS (charset) == 96);
554 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
555 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
556 f1 > MAX_CHAR_FIELD1_PRIVATE)
558 if (f2 < 0x20 || f3 < 0x20)
561 #ifdef ENABLE_COMPOSITE_CHARS
562 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
564 if (UNBOUNDP (Fgethash (make_int (ch),
565 Vcomposite_char_char2string_hash_table,
570 #endif /* ENABLE_COMPOSITE_CHARS */
572 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
573 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
576 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
578 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
581 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
583 if (EQ (charset, Qnil))
585 return (XCHARSET_CHARS (charset) == 96);
591 /************************************************************************/
592 /* Basic string functions */
593 /************************************************************************/
595 /* Copy the character pointed to by SRC into DST. Do not call this
596 directly. Use the macro charptr_copy_char() instead.
597 Return the number of bytes copied. */
600 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
602 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
604 for (i = bytes; i; i--, dst++, src++)
610 /************************************************************************/
611 /* streams of Emchars */
612 /************************************************************************/
614 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
615 The functions below are not meant to be called directly; use
616 the macros in insdel.h. */
619 Lstream_get_emchar_1 (Lstream *stream, int ch)
621 Bufbyte str[MAX_EMCHAR_LEN];
622 Bufbyte *strptr = str;
625 str[0] = (Bufbyte) ch;
627 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
629 int c = Lstream_getc (stream);
630 bufpos_checking_assert (c >= 0);
631 *++strptr = (Bufbyte) c;
633 return charptr_emchar (str);
637 Lstream_fput_emchar (Lstream *stream, Emchar ch)
639 Bufbyte str[MAX_EMCHAR_LEN];
640 Bytecount len = set_charptr_emchar (str, ch);
641 return Lstream_write (stream, str, len);
645 Lstream_funget_emchar (Lstream *stream, Emchar ch)
647 Bufbyte str[MAX_EMCHAR_LEN];
648 Bytecount len = set_charptr_emchar (str, ch);
649 Lstream_unread (stream, str, len);
653 /************************************************************************/
655 /************************************************************************/
658 mark_charset (Lisp_Object obj)
660 Lisp_Charset *cs = XCHARSET (obj);
662 mark_object (cs->short_name);
663 mark_object (cs->long_name);
664 mark_object (cs->doc_string);
665 mark_object (cs->registry);
666 mark_object (cs->ccl_program);
668 mark_object (cs->decoding_table);
669 mark_object (cs->mother);
675 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
677 Lisp_Charset *cs = XCHARSET (obj);
681 error ("printing unreadable object #<charset %s 0x%x>",
682 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
685 write_c_string ("#<charset ", printcharfun);
686 print_internal (CHARSET_NAME (cs), printcharfun, 0);
687 write_c_string (" ", printcharfun);
688 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
689 write_c_string (" ", printcharfun);
690 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
691 write_c_string (" ", printcharfun);
692 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
693 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
695 CHARSET_DIMENSION (cs),
696 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
697 CHARSET_COLUMNS (cs),
698 CHARSET_GRAPHIC (cs),
700 write_c_string (buf, printcharfun);
701 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
702 sprintf (buf, " 0x%x>", cs->header.uid);
703 write_c_string (buf, printcharfun);
706 static const struct lrecord_description charset_description[] = {
707 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
708 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
709 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
710 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
711 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
712 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
713 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
715 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
716 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
721 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
722 mark_charset, print_charset, 0, 0, 0,
726 /* Make a new charset. */
727 /* #### SJT Should generic properties be allowed? */
729 make_charset (Charset_ID id, Lisp_Object name,
730 unsigned short chars, unsigned char dimension,
731 unsigned char columns, unsigned char graphic,
732 Bufbyte final, unsigned char direction, Lisp_Object short_name,
733 Lisp_Object long_name, Lisp_Object doc,
735 Lisp_Object decoding_table,
736 Emchar min_code, Emchar max_code,
737 Emchar code_offset, unsigned char byte_offset,
738 Lisp_Object mother, unsigned char conversion)
741 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
745 XSETCHARSET (obj, cs);
747 CHARSET_ID (cs) = id;
748 CHARSET_NAME (cs) = name;
749 CHARSET_SHORT_NAME (cs) = short_name;
750 CHARSET_LONG_NAME (cs) = long_name;
751 CHARSET_CHARS (cs) = chars;
752 CHARSET_DIMENSION (cs) = dimension;
753 CHARSET_DIRECTION (cs) = direction;
754 CHARSET_COLUMNS (cs) = columns;
755 CHARSET_GRAPHIC (cs) = graphic;
756 CHARSET_FINAL (cs) = final;
757 CHARSET_DOC_STRING (cs) = doc;
758 CHARSET_REGISTRY (cs) = reg;
759 CHARSET_CCL_PROGRAM (cs) = Qnil;
760 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
762 CHARSET_DECODING_TABLE(cs) = Qunbound;
763 CHARSET_MIN_CODE (cs) = min_code;
764 CHARSET_MAX_CODE (cs) = max_code;
765 CHARSET_CODE_OFFSET (cs) = code_offset;
766 CHARSET_BYTE_OFFSET (cs) = byte_offset;
767 CHARSET_MOTHER (cs) = mother;
768 CHARSET_CONVERSION (cs) = conversion;
772 if (id == LEADING_BYTE_ASCII)
773 CHARSET_REP_BYTES (cs) = 1;
775 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
777 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
782 /* some charsets do not have final characters. This includes
783 ASCII, Control-1, Composite, and the two faux private
785 unsigned char iso2022_type
786 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
788 if (code_offset == 0)
790 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
791 chlook->charset_by_attributes[iso2022_type][final] = obj;
795 (chlook->charset_by_attributes[iso2022_type][final][direction]));
796 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
800 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
801 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
803 /* Some charsets are "faux" and don't have names or really exist at
804 all except in the leading-byte table. */
806 Fputhash (name, obj, Vcharset_hash_table);
811 get_unallocated_leading_byte (int dimension)
816 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
819 lb = chlook->next_allocated_leading_byte++;
823 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
826 lb = chlook->next_allocated_1_byte_leading_byte++;
830 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
833 lb = chlook->next_allocated_2_byte_leading_byte++;
839 ("No more character sets free for this dimension",
840 make_int (dimension));
846 /* Number of Big5 characters which have the same code in 1st byte. */
848 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
851 decode_defined_char (Lisp_Object ccs, int code_point)
853 int dim = XCHARSET_DIMENSION (ccs);
854 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
862 = get_ccs_octet_table (decoding_table, ccs,
863 (code_point >> (dim * 8)) & 255);
865 if (CHARP (decoding_table))
866 return XCHAR (decoding_table);
869 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
871 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
873 if ( EQ (mother, Vcharset_ucs) )
874 return DECODE_CHAR (mother, code_point);
876 return decode_defined_char (mother, code_point);
883 decode_builtin_char (Lisp_Object charset, int code_point)
885 Lisp_Object mother = XCHARSET_MOTHER (charset);
888 if ( XCHARSET_MAX_CODE (charset) > 0 )
890 if ( CHARSETP (mother) )
892 int code = code_point;
894 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
896 int row = code_point >> 8;
897 int cell = code_point & 255;
901 else if (row < 16 + 32 + 30)
902 code = (row - (16 + 32)) * 94 + cell - 33;
903 else if (row < 18 + 32 + 30)
905 else if (row < 18 + 32 + 60)
906 code = (row - (18 + 32)) * 94 + cell - 33;
908 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
910 int plane = code_point >> 16;
911 int row = (code_point >> 8) & 255;
912 int cell = code_point & 255;
916 else if (row < 16 + 32 + 30)
918 = (plane - 33) * 94 * 60
919 + (row - (16 + 32)) * 94
921 else if (row < 18 + 32 + 30)
923 else if (row < 18 + 32 + 60)
925 = (plane - 33) * 94 * 60
926 + (row - (18 + 32)) * 94
930 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
935 = (XCHARSET_DIMENSION (charset) == 1
937 code_point - XCHARSET_BYTE_OFFSET (charset)
939 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
940 * XCHARSET_CHARS (charset)
941 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
942 + XCHARSET_CODE_OFFSET (charset);
943 if ((cid < XCHARSET_MIN_CODE (charset))
944 || (XCHARSET_MAX_CODE (charset) < cid))
949 else if ((final = XCHARSET_FINAL (charset)) >= '0')
951 if (XCHARSET_DIMENSION (charset) == 1)
953 switch (XCHARSET_CHARS (charset))
957 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
960 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
968 switch (XCHARSET_CHARS (charset))
971 return MIN_CHAR_94x94
972 + (final - '0') * 94 * 94
973 + (((code_point >> 8) & 0x7F) - 33) * 94
974 + ((code_point & 0x7F) - 33);
976 return MIN_CHAR_96x96
977 + (final - '0') * 96 * 96
978 + (((code_point >> 8) & 0x7F) - 32) * 96
979 + ((code_point & 0x7F) - 32);
991 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
993 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
996 if ( CHAR_TABLEP (encoding_table)
997 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1002 Lisp_Object mother = XCHARSET_MOTHER (charset);
1003 int min = XCHARSET_MIN_CODE (charset);
1004 int max = XCHARSET_MAX_CODE (charset);
1007 if ( CHARSETP (mother) )
1009 if (XCHARSET_FINAL (charset) >= '0')
1010 code = charset_code_point (mother, ch, 1);
1012 code = charset_code_point (mother, ch, defined_only);
1014 else if (defined_only)
1016 else if ( ((max == 0) && CHARSETP (mother)
1017 && (XCHARSET_FINAL (charset) == 0))
1018 || ((min <= ch) && (ch <= max)) )
1020 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1021 || ((min <= code) && (code <= max)) )
1023 int d = code - XCHARSET_CODE_OFFSET (charset);
1025 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1027 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1029 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1031 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1034 int cell = d % 94 + 33;
1040 return (row << 8) | cell;
1042 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1043 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1044 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1045 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1046 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1048 int plane = d / (94 * 60) + 33;
1049 int row = (d % (94 * 60)) / 94;
1050 int cell = d % 94 + 33;
1056 return (plane << 16) | (row << 8) | cell;
1058 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1060 ( (d / (94 * 94) + 33) << 16)
1061 | ((d / 94 % 94 + 33) << 8)
1063 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1065 ( (d / (96 * 96) + 32) << 16)
1066 | ((d / 96 % 96 + 32) << 8)
1068 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1070 ( (d / (94 * 94 * 94) + 33) << 24)
1071 | ((d / (94 * 94) % 94 + 33) << 16)
1072 | ((d / 94 % 94 + 33) << 8)
1074 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1076 ( (d / (96 * 96 * 96) + 32) << 24)
1077 | ((d / (96 * 96) % 96 + 32) << 16)
1078 | ((d / 96 % 96 + 32) << 8)
1082 printf ("Unknown CCS-conversion %d is specified!",
1083 XCHARSET_CONVERSION (charset));
1087 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1088 ( XCHARSET_MIN_CODE (charset) == 0 )
1090 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1091 (XCHARSET_CODE_OFFSET (charset)
1092 == XCHARSET_MIN_CODE (charset))
1097 if (XCHARSET_DIMENSION (charset) == 1)
1099 if (XCHARSET_CHARS (charset) == 94)
1101 if (((d = ch - (MIN_CHAR_94
1102 + (XCHARSET_FINAL (charset) - '0') * 94))
1107 else if (XCHARSET_CHARS (charset) == 96)
1109 if (((d = ch - (MIN_CHAR_96
1110 + (XCHARSET_FINAL (charset) - '0') * 96))
1118 else if (XCHARSET_DIMENSION (charset) == 2)
1120 if (XCHARSET_CHARS (charset) == 94)
1122 if (((d = ch - (MIN_CHAR_94x94
1124 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1127 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1129 else if (XCHARSET_CHARS (charset) == 96)
1131 if (((d = ch - (MIN_CHAR_96x96
1133 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1136 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1147 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1149 if (c <= MAX_CHAR_BASIC_LATIN)
1151 *charset = Vcharset_ascii;
1156 *charset = Vcharset_control_1;
1161 *charset = Vcharset_latin_iso8859_1;
1165 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1167 *charset = Vcharset_hebrew_iso8859_8;
1168 return c - MIN_CHAR_HEBREW + 0x20;
1171 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1173 *charset = Vcharset_thai_tis620;
1174 return c - MIN_CHAR_THAI + 0x20;
1177 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1178 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1180 return list2 (Vcharset_katakana_jisx0201,
1181 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1184 else if (c <= MAX_CHAR_BMP)
1186 *charset = Vcharset_ucs_bmp;
1189 else if (c <= MAX_CHAR_SMP)
1191 *charset = Vcharset_ucs_smp;
1192 return c - MIN_CHAR_SMP;
1194 else if (c <= MAX_CHAR_SIP)
1196 *charset = Vcharset_ucs_sip;
1197 return c - MIN_CHAR_SIP;
1199 else if (c < MIN_CHAR_94)
1201 *charset = Vcharset_ucs;
1204 else if (c <= MAX_CHAR_94)
1206 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1207 ((c - MIN_CHAR_94) / 94) + '0',
1208 CHARSET_LEFT_TO_RIGHT);
1209 if (!NILP (*charset))
1210 return ((c - MIN_CHAR_94) % 94) + 33;
1213 *charset = Vcharset_ucs;
1217 else if (c <= MAX_CHAR_96)
1219 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1220 ((c - MIN_CHAR_96) / 96) + '0',
1221 CHARSET_LEFT_TO_RIGHT);
1222 if (!NILP (*charset))
1223 return ((c - MIN_CHAR_96) % 96) + 32;
1226 *charset = Vcharset_ucs;
1230 else if (c <= MAX_CHAR_94x94)
1233 = CHARSET_BY_ATTRIBUTES (94, 2,
1234 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1235 CHARSET_LEFT_TO_RIGHT);
1236 if (!NILP (*charset))
1237 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1238 | (((c - MIN_CHAR_94x94) % 94) + 33);
1241 *charset = Vcharset_ucs;
1245 else if (c <= MAX_CHAR_96x96)
1248 = CHARSET_BY_ATTRIBUTES (96, 2,
1249 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1250 CHARSET_LEFT_TO_RIGHT);
1251 if (!NILP (*charset))
1252 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1253 | (((c - MIN_CHAR_96x96) % 96) + 32);
1256 *charset = Vcharset_ucs;
1262 *charset = Vcharset_ucs;
1267 Lisp_Object Vdefault_coded_charset_priority_list;
1271 /************************************************************************/
1272 /* Basic charset Lisp functions */
1273 /************************************************************************/
1275 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1276 Return non-nil if OBJECT is a charset.
1280 return CHARSETP (object) ? Qt : Qnil;
1283 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1284 Retrieve the charset of the given name.
1285 If CHARSET-OR-NAME is a charset object, it is simply returned.
1286 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1287 nil is returned. Otherwise the associated charset object is returned.
1291 if (CHARSETP (charset_or_name))
1292 return charset_or_name;
1294 CHECK_SYMBOL (charset_or_name);
1295 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1298 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1299 Retrieve the charset of the given name.
1300 Same as `find-charset' except an error is signalled if there is no such
1301 charset instead of returning nil.
1305 Lisp_Object charset = Ffind_charset (name);
1308 signal_simple_error ("No such charset", name);
1312 /* We store the charsets in hash tables with the names as the key and the
1313 actual charset object as the value. Occasionally we need to use them
1314 in a list format. These routines provide us with that. */
1315 struct charset_list_closure
1317 Lisp_Object *charset_list;
1321 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1322 void *charset_list_closure)
1324 /* This function can GC */
1325 struct charset_list_closure *chcl =
1326 (struct charset_list_closure*) charset_list_closure;
1327 Lisp_Object *charset_list = chcl->charset_list;
1329 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1333 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1334 Return a list of the names of all defined charsets.
1338 Lisp_Object charset_list = Qnil;
1339 struct gcpro gcpro1;
1340 struct charset_list_closure charset_list_closure;
1342 GCPRO1 (charset_list);
1343 charset_list_closure.charset_list = &charset_list;
1344 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1345 &charset_list_closure);
1348 return charset_list;
1351 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1352 Return the name of charset CHARSET.
1356 return XCHARSET_NAME (Fget_charset (charset));
1359 /* #### SJT Should generic properties be allowed? */
1360 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1361 Define a new character set.
1362 This function is for use with Mule support.
1363 NAME is a symbol, the name by which the character set is normally referred.
1364 DOC-STRING is a string describing the character set.
1365 PROPS is a property list, describing the specific nature of the
1366 character set. Recognized properties are:
1368 'short-name Short version of the charset name (ex: Latin-1)
1369 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1370 'registry A regular expression matching the font registry field for
1372 'dimension Number of octets used to index a character in this charset.
1373 Either 1 or 2. Defaults to 1.
1374 If UTF-2000 feature is enabled, 3 or 4 are also available.
1375 'columns Number of columns used to display a character in this charset.
1376 Only used in TTY mode. (Under X, the actual width of a
1377 character can be derived from the font used to display the
1378 characters.) If unspecified, defaults to the dimension
1379 (this is almost always the correct value).
1380 'chars Number of characters in each dimension (94 or 96).
1381 Defaults to 94. Note that if the dimension is 2, the
1382 character set thus described is 94x94 or 96x96.
1383 If UTF-2000 feature is enabled, 128 or 256 are also available.
1384 'final Final byte of ISO 2022 escape sequence. Must be
1385 supplied. Each combination of (DIMENSION, CHARS) defines a
1386 separate namespace for final bytes. Note that ISO
1387 2022 restricts the final byte to the range
1388 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1389 dimension == 2. Note also that final bytes in the range
1390 0x30 - 0x3F are reserved for user-defined (not official)
1392 'graphic 0 (use left half of font on output) or 1 (use right half
1393 of font on output). Defaults to 0. For example, for
1394 a font whose registry is ISO8859-1, the left half
1395 (octets 0x20 - 0x7F) is the `ascii' character set, while
1396 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1397 character set. With 'graphic set to 0, the octets
1398 will have their high bit cleared; with it set to 1,
1399 the octets will have their high bit set.
1400 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1402 'ccl-program A compiled CCL program used to convert a character in
1403 this charset into an index into the font. This is in
1404 addition to the 'graphic property. The CCL program
1405 is passed the octets of the character, with the high
1406 bit cleared and set depending upon whether the value
1407 of the 'graphic property is 0 or 1.
1408 'mother [UTF-2000 only] Base coded-charset.
1409 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1410 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1411 'code-offset [UTF-2000 only] Offset for a code-point of a base
1413 'conversion [UTF-2000 only] Conversion for a code-point of a base
1414 coded-charset (94x60 or 94x94x60).
1416 (name, doc_string, props))
1418 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1419 int direction = CHARSET_LEFT_TO_RIGHT;
1420 Lisp_Object registry = Qnil;
1421 Lisp_Object charset;
1422 Lisp_Object ccl_program = Qnil;
1423 Lisp_Object short_name = Qnil, long_name = Qnil;
1424 Lisp_Object mother = Qnil;
1425 int min_code = 0, max_code = 0, code_offset = 0;
1426 int byte_offset = -1;
1429 CHECK_SYMBOL (name);
1430 if (!NILP (doc_string))
1431 CHECK_STRING (doc_string);
1433 charset = Ffind_charset (name);
1434 if (!NILP (charset))
1435 signal_simple_error ("Cannot redefine existing charset", name);
1438 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1440 if (EQ (keyword, Qshort_name))
1442 CHECK_STRING (value);
1446 if (EQ (keyword, Qlong_name))
1448 CHECK_STRING (value);
1452 else if (EQ (keyword, Qdimension))
1455 dimension = XINT (value);
1456 if (dimension < 1 ||
1463 signal_simple_error ("Invalid value for 'dimension", value);
1466 else if (EQ (keyword, Qchars))
1469 chars = XINT (value);
1470 if (chars != 94 && chars != 96
1472 && chars != 128 && chars != 256
1475 signal_simple_error ("Invalid value for 'chars", value);
1478 else if (EQ (keyword, Qcolumns))
1481 columns = XINT (value);
1482 if (columns != 1 && columns != 2)
1483 signal_simple_error ("Invalid value for 'columns", value);
1486 else if (EQ (keyword, Qgraphic))
1489 graphic = XINT (value);
1497 signal_simple_error ("Invalid value for 'graphic", value);
1500 else if (EQ (keyword, Qregistry))
1502 CHECK_STRING (value);
1506 else if (EQ (keyword, Qdirection))
1508 if (EQ (value, Ql2r))
1509 direction = CHARSET_LEFT_TO_RIGHT;
1510 else if (EQ (value, Qr2l))
1511 direction = CHARSET_RIGHT_TO_LEFT;
1513 signal_simple_error ("Invalid value for 'direction", value);
1516 else if (EQ (keyword, Qfinal))
1518 CHECK_CHAR_COERCE_INT (value);
1519 final = XCHAR (value);
1520 if (final < '0' || final > '~')
1521 signal_simple_error ("Invalid value for 'final", value);
1525 else if (EQ (keyword, Qmother))
1527 mother = Fget_charset (value);
1530 else if (EQ (keyword, Qmin_code))
1533 min_code = XUINT (value);
1536 else if (EQ (keyword, Qmax_code))
1539 max_code = XUINT (value);
1542 else if (EQ (keyword, Qcode_offset))
1545 code_offset = XUINT (value);
1548 else if (EQ (keyword, Qconversion))
1550 if (EQ (value, Q94x60))
1551 conversion = CONVERSION_94x60;
1552 else if (EQ (value, Q94x94x60))
1553 conversion = CONVERSION_94x94x60;
1555 signal_simple_error ("Unrecognized conversion", value);
1559 else if (EQ (keyword, Qccl_program))
1561 struct ccl_program test_ccl;
1563 if (setup_ccl_program (&test_ccl, value) < 0)
1564 signal_simple_error ("Invalid value for 'ccl-program", value);
1565 ccl_program = value;
1569 signal_simple_error ("Unrecognized property", keyword);
1575 error ("'final must be specified");
1577 if (dimension == 2 && final > 0x5F)
1579 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1582 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1583 CHARSET_LEFT_TO_RIGHT)) ||
1584 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1585 CHARSET_RIGHT_TO_LEFT)))
1587 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1589 id = get_unallocated_leading_byte (dimension);
1591 if (NILP (doc_string))
1592 doc_string = build_string ("");
1594 if (NILP (registry))
1595 registry = build_string ("");
1597 if (NILP (short_name))
1598 XSETSTRING (short_name, XSYMBOL (name)->name);
1600 if (NILP (long_name))
1601 long_name = doc_string;
1604 columns = dimension;
1606 if (byte_offset < 0)
1610 else if (chars == 96)
1616 charset = make_charset (id, name, chars, dimension, columns, graphic,
1617 final, direction, short_name, long_name,
1618 doc_string, registry,
1619 Qnil, min_code, max_code, code_offset, byte_offset,
1620 mother, conversion);
1621 if (!NILP (ccl_program))
1622 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1626 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1628 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1629 NEW-NAME is the name of the new charset. Return the new charset.
1631 (charset, new_name))
1633 Lisp_Object new_charset = Qnil;
1634 int id, chars, dimension, columns, graphic, final;
1636 Lisp_Object registry, doc_string, short_name, long_name;
1639 charset = Fget_charset (charset);
1640 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1641 signal_simple_error ("Charset already has reverse-direction charset",
1644 CHECK_SYMBOL (new_name);
1645 if (!NILP (Ffind_charset (new_name)))
1646 signal_simple_error ("Cannot redefine existing charset", new_name);
1648 cs = XCHARSET (charset);
1650 chars = CHARSET_CHARS (cs);
1651 dimension = CHARSET_DIMENSION (cs);
1652 columns = CHARSET_COLUMNS (cs);
1653 id = get_unallocated_leading_byte (dimension);
1655 graphic = CHARSET_GRAPHIC (cs);
1656 final = CHARSET_FINAL (cs);
1657 direction = CHARSET_RIGHT_TO_LEFT;
1658 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1659 direction = CHARSET_LEFT_TO_RIGHT;
1660 doc_string = CHARSET_DOC_STRING (cs);
1661 short_name = CHARSET_SHORT_NAME (cs);
1662 long_name = CHARSET_LONG_NAME (cs);
1663 registry = CHARSET_REGISTRY (cs);
1665 new_charset = make_charset (id, new_name, chars, dimension, columns,
1666 graphic, final, direction, short_name, long_name,
1667 doc_string, registry,
1669 CHARSET_DECODING_TABLE(cs),
1670 CHARSET_MIN_CODE(cs),
1671 CHARSET_MAX_CODE(cs),
1672 CHARSET_CODE_OFFSET(cs),
1673 CHARSET_BYTE_OFFSET(cs),
1675 CHARSET_CONVERSION (cs)
1677 Qnil, 0, 0, 0, 0, Qnil, 0
1681 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1682 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1687 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1688 Define symbol ALIAS as an alias for CHARSET.
1692 CHECK_SYMBOL (alias);
1693 charset = Fget_charset (charset);
1694 return Fputhash (alias, charset, Vcharset_hash_table);
1697 /* #### Reverse direction charsets not yet implemented. */
1699 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1701 Return the reverse-direction charset parallel to CHARSET, if any.
1702 This is the charset with the same properties (in particular, the same
1703 dimension, number of characters per dimension, and final byte) as
1704 CHARSET but whose characters are displayed in the opposite direction.
1708 charset = Fget_charset (charset);
1709 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1713 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1714 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1715 If DIRECTION is omitted, both directions will be checked (left-to-right
1716 will be returned if character sets exist for both directions).
1718 (dimension, chars, final, direction))
1720 int dm, ch, fi, di = -1;
1721 Lisp_Object obj = Qnil;
1723 CHECK_INT (dimension);
1724 dm = XINT (dimension);
1725 if (dm < 1 || dm > 2)
1726 signal_simple_error ("Invalid value for DIMENSION", dimension);
1730 if (ch != 94 && ch != 96)
1731 signal_simple_error ("Invalid value for CHARS", chars);
1733 CHECK_CHAR_COERCE_INT (final);
1735 if (fi < '0' || fi > '~')
1736 signal_simple_error ("Invalid value for FINAL", final);
1738 if (EQ (direction, Ql2r))
1739 di = CHARSET_LEFT_TO_RIGHT;
1740 else if (EQ (direction, Qr2l))
1741 di = CHARSET_RIGHT_TO_LEFT;
1742 else if (!NILP (direction))
1743 signal_simple_error ("Invalid value for DIRECTION", direction);
1745 if (dm == 2 && fi > 0x5F)
1747 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1751 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1753 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1756 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1759 return XCHARSET_NAME (obj);
1763 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1764 Return short name of CHARSET.
1768 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1771 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1772 Return long name of CHARSET.
1776 return XCHARSET_LONG_NAME (Fget_charset (charset));
1779 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1780 Return description of CHARSET.
1784 return XCHARSET_DOC_STRING (Fget_charset (charset));
1787 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1788 Return dimension of CHARSET.
1792 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1795 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1796 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1797 Recognized properties are those listed in `make-charset', as well as
1798 'name and 'doc-string.
1804 charset = Fget_charset (charset);
1805 cs = XCHARSET (charset);
1807 CHECK_SYMBOL (prop);
1808 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1809 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1810 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1811 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1812 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1813 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1814 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1815 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1816 Qnil : make_char (CHARSET_FINAL (cs));
1817 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1818 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1819 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1820 if (EQ (prop, Qdirection))
1821 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1822 if (EQ (prop, Qreverse_direction_charset))
1824 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1825 /* #### Is this translation OK? If so, error checking sufficient? */
1826 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1829 if (EQ (prop, Qmother))
1830 return CHARSET_MOTHER (cs);
1831 if (EQ (prop, Qmin_code))
1832 return make_int (CHARSET_MIN_CODE (cs));
1833 if (EQ (prop, Qmax_code))
1834 return make_int (CHARSET_MAX_CODE (cs));
1836 signal_simple_error ("Unrecognized charset property name", prop);
1837 return Qnil; /* not reached */
1840 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1841 Return charset identification number of CHARSET.
1845 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1848 /* #### We need to figure out which properties we really want to
1851 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1852 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1854 (charset, ccl_program))
1856 struct ccl_program test_ccl;
1858 charset = Fget_charset (charset);
1859 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1860 signal_simple_error ("Invalid ccl-program", ccl_program);
1861 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1866 invalidate_charset_font_caches (Lisp_Object charset)
1868 /* Invalidate font cache entries for charset on all devices. */
1869 Lisp_Object devcons, concons, hash_table;
1870 DEVICE_LOOP_NO_BREAK (devcons, concons)
1872 struct device *d = XDEVICE (XCAR (devcons));
1873 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1874 if (!UNBOUNDP (hash_table))
1875 Fclrhash (hash_table);
1879 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1880 Set the 'registry property of CHARSET to REGISTRY.
1882 (charset, registry))
1884 charset = Fget_charset (charset);
1885 CHECK_STRING (registry);
1886 XCHARSET_REGISTRY (charset) = registry;
1887 invalidate_charset_font_caches (charset);
1888 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1893 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1894 Return mapping-table of CHARSET.
1898 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1901 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1902 Set mapping-table of CHARSET to TABLE.
1906 struct Lisp_Charset *cs;
1910 charset = Fget_charset (charset);
1911 cs = XCHARSET (charset);
1915 CHARSET_DECODING_TABLE(cs) = Qnil;
1918 else if (VECTORP (table))
1920 int ccs_len = CHARSET_BYTE_SIZE (cs);
1921 int ret = decoding_table_check_elements (table,
1922 CHARSET_DIMENSION (cs),
1927 signal_simple_error ("Too big table", table);
1929 signal_simple_error ("Invalid element is found", table);
1931 signal_simple_error ("Something wrong", table);
1933 CHARSET_DECODING_TABLE(cs) = Qnil;
1936 signal_error (Qwrong_type_argument,
1937 list2 (build_translated_string ("vector-or-nil-p"),
1940 byte_offset = CHARSET_BYTE_OFFSET (cs);
1941 switch (CHARSET_DIMENSION (cs))
1944 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1946 Lisp_Object c = XVECTOR_DATA(table)[i];
1949 Fput_char_attribute (c, XCHARSET_NAME (charset),
1950 make_int (i + byte_offset));
1954 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1956 Lisp_Object v = XVECTOR_DATA(table)[i];
1962 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1964 Lisp_Object c = XVECTOR_DATA(v)[j];
1968 (c, XCHARSET_NAME (charset),
1969 make_int ( ( (i + byte_offset) << 8 )
1975 Fput_char_attribute (v, XCHARSET_NAME (charset),
1976 make_int (i + byte_offset));
1985 /************************************************************************/
1986 /* Lisp primitives for working with characters */
1987 /************************************************************************/
1990 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
1991 Make a character from CHARSET and code-point CODE.
1992 If DEFINED_ONLY is non-nil, builtin character is not returned.
1993 If corresponding character is not found, nil is returned.
1995 (charset, code, defined_only))
1999 charset = Fget_charset (charset);
2002 if (XCHARSET_GRAPHIC (charset) == 1)
2004 if (NILP (defined_only))
2005 c = DECODE_CHAR (charset, c);
2007 c = decode_defined_char (charset, c);
2008 return c >= 0 ? make_char (c) : Qnil;
2011 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2012 Make a builtin character from CHARSET and code-point CODE.
2018 charset = Fget_charset (charset);
2020 if (EQ (charset, Vcharset_latin_viscii))
2022 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2028 (ret = Fget_char_attribute (chr,
2029 Vcharset_latin_viscii_lower,
2032 charset = Vcharset_latin_viscii_lower;
2036 (ret = Fget_char_attribute (chr,
2037 Vcharset_latin_viscii_upper,
2040 charset = Vcharset_latin_viscii_upper;
2047 if (XCHARSET_GRAPHIC (charset) == 1)
2050 c = decode_builtin_char (charset, c);
2051 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2055 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2056 Make a character from CHARSET and octets ARG1 and ARG2.
2057 ARG2 is required only for characters from two-dimensional charsets.
2058 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2059 character s with caron.
2061 (charset, arg1, arg2))
2065 int lowlim, highlim;
2067 charset = Fget_charset (charset);
2068 cs = XCHARSET (charset);
2070 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2071 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2073 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2075 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2076 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2079 /* It is useful (and safe, according to Olivier Galibert) to strip
2080 the 8th bit off ARG1 and ARG2 because it allows programmers to
2081 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2082 Latin 2 code of the character. */
2090 if (a1 < lowlim || a1 > highlim)
2091 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2093 if (CHARSET_DIMENSION (cs) == 1)
2097 ("Charset is of dimension one; second octet must be nil", arg2);
2098 return make_char (MAKE_CHAR (charset, a1, 0));
2107 a2 = XINT (arg2) & 0x7f;
2109 if (a2 < lowlim || a2 > highlim)
2110 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2112 return make_char (MAKE_CHAR (charset, a1, a2));
2115 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2116 Return the character set of CHARACTER.
2120 CHECK_CHAR_COERCE_INT (character);
2122 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2125 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2126 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2127 N defaults to 0 if omitted.
2131 Lisp_Object charset;
2134 CHECK_CHAR_COERCE_INT (character);
2136 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2138 if (NILP (n) || EQ (n, Qzero))
2139 return make_int (octet0);
2140 else if (EQ (n, make_int (1)))
2141 return make_int (octet1);
2143 signal_simple_error ("Octet number must be 0 or 1", n);
2147 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2148 Return code-point of CHARACTER in specified CHARSET.
2150 (character, charset, defined_only))
2154 CHECK_CHAR_COERCE_INT (character);
2155 charset = Fget_charset (charset);
2156 code_point = charset_code_point (charset, XCHAR (character),
2157 !NILP (defined_only));
2158 if (code_point >= 0)
2159 return make_int (code_point);
2165 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2166 Return list of charset and one or two position-codes of CHARACTER.
2170 /* This function can GC */
2171 struct gcpro gcpro1, gcpro2;
2172 Lisp_Object charset = Qnil;
2173 Lisp_Object rc = Qnil;
2181 GCPRO2 (charset, rc);
2182 CHECK_CHAR_COERCE_INT (character);
2185 code_point = ENCODE_CHAR (XCHAR (character), charset);
2186 dimension = XCHARSET_DIMENSION (charset);
2187 while (dimension > 0)
2189 rc = Fcons (make_int (code_point & 255), rc);
2193 rc = Fcons (XCHARSET_NAME (charset), rc);
2195 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2197 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2199 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2203 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2212 #ifdef ENABLE_COMPOSITE_CHARS
2213 /************************************************************************/
2214 /* composite character functions */
2215 /************************************************************************/
2218 lookup_composite_char (Bufbyte *str, int len)
2220 Lisp_Object lispstr = make_string (str, len);
2221 Lisp_Object ch = Fgethash (lispstr,
2222 Vcomposite_char_string2char_hash_table,
2228 if (composite_char_row_next >= 128)
2229 signal_simple_error ("No more composite chars available", lispstr);
2230 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2231 composite_char_col_next);
2232 Fputhash (make_char (emch), lispstr,
2233 Vcomposite_char_char2string_hash_table);
2234 Fputhash (lispstr, make_char (emch),
2235 Vcomposite_char_string2char_hash_table);
2236 composite_char_col_next++;
2237 if (composite_char_col_next >= 128)
2239 composite_char_col_next = 32;
2240 composite_char_row_next++;
2249 composite_char_string (Emchar ch)
2251 Lisp_Object str = Fgethash (make_char (ch),
2252 Vcomposite_char_char2string_hash_table,
2254 assert (!UNBOUNDP (str));
2258 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2259 Convert a string into a single composite character.
2260 The character is the result of overstriking all the characters in
2265 CHECK_STRING (string);
2266 return make_char (lookup_composite_char (XSTRING_DATA (string),
2267 XSTRING_LENGTH (string)));
2270 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2271 Return a string of the characters comprising a composite character.
2279 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2280 signal_simple_error ("Must be composite char", ch);
2281 return composite_char_string (emch);
2283 #endif /* ENABLE_COMPOSITE_CHARS */
2286 /************************************************************************/
2287 /* initialization */
2288 /************************************************************************/
2291 syms_of_mule_charset (void)
2293 INIT_LRECORD_IMPLEMENTATION (charset);
2295 DEFSUBR (Fcharsetp);
2296 DEFSUBR (Ffind_charset);
2297 DEFSUBR (Fget_charset);
2298 DEFSUBR (Fcharset_list);
2299 DEFSUBR (Fcharset_name);
2300 DEFSUBR (Fmake_charset);
2301 DEFSUBR (Fmake_reverse_direction_charset);
2302 /* DEFSUBR (Freverse_direction_charset); */
2303 DEFSUBR (Fdefine_charset_alias);
2304 DEFSUBR (Fcharset_from_attributes);
2305 DEFSUBR (Fcharset_short_name);
2306 DEFSUBR (Fcharset_long_name);
2307 DEFSUBR (Fcharset_description);
2308 DEFSUBR (Fcharset_dimension);
2309 DEFSUBR (Fcharset_property);
2310 DEFSUBR (Fcharset_id);
2311 DEFSUBR (Fset_charset_ccl_program);
2312 DEFSUBR (Fset_charset_registry);
2314 DEFSUBR (Fcharset_mapping_table);
2315 DEFSUBR (Fset_charset_mapping_table);
2319 DEFSUBR (Fdecode_char);
2320 DEFSUBR (Fdecode_builtin_char);
2321 DEFSUBR (Fencode_char);
2323 DEFSUBR (Fmake_char);
2324 DEFSUBR (Fchar_charset);
2325 DEFSUBR (Fchar_octet);
2326 DEFSUBR (Fsplit_char);
2328 #ifdef ENABLE_COMPOSITE_CHARS
2329 DEFSUBR (Fmake_composite_char);
2330 DEFSUBR (Fcomposite_char_string);
2333 defsymbol (&Qcharsetp, "charsetp");
2334 defsymbol (&Qregistry, "registry");
2335 defsymbol (&Qfinal, "final");
2336 defsymbol (&Qgraphic, "graphic");
2337 defsymbol (&Qdirection, "direction");
2338 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2339 defsymbol (&Qshort_name, "short-name");
2340 defsymbol (&Qlong_name, "long-name");
2342 defsymbol (&Qmother, "mother");
2343 defsymbol (&Qmin_code, "min-code");
2344 defsymbol (&Qmax_code, "max-code");
2345 defsymbol (&Qcode_offset, "code-offset");
2346 defsymbol (&Qconversion, "conversion");
2347 defsymbol (&Q94x60, "94x60");
2348 defsymbol (&Q94x94x60, "94x94x60");
2351 defsymbol (&Ql2r, "l2r");
2352 defsymbol (&Qr2l, "r2l");
2354 /* Charsets, compatible with FSF 20.3
2355 Naming convention is Script-Charset[-Edition] */
2356 defsymbol (&Qascii, "ascii");
2357 defsymbol (&Qcontrol_1, "control-1");
2358 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2359 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2360 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2361 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2362 defsymbol (&Qthai_tis620, "thai-tis620");
2363 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2364 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2365 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2366 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2367 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2368 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2369 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2370 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2371 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2372 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2373 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2374 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2375 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2376 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2377 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2378 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2380 defsymbol (&Qucs, "ucs");
2381 defsymbol (&Qucs_bmp, "ucs-bmp");
2382 defsymbol (&Qucs_smp, "ucs-smp");
2383 defsymbol (&Qucs_sip, "ucs-sip");
2384 defsymbol (&Qucs_gb, "ucs-gb");
2385 defsymbol (&Qucs_cns, "ucs-cns");
2386 defsymbol (&Qucs_jis, "ucs-jis");
2387 defsymbol (&Qlatin_viscii, "latin-viscii");
2388 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2389 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2390 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2391 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2392 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2393 defsymbol (&Qjis_x0208, "=jis-x0208");
2394 defsymbol (&Qchinese_big5, "chinese-big5");
2395 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2397 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2398 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2400 defsymbol (&Qcomposite, "composite");
2404 vars_of_mule_charset (void)
2411 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2412 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2414 /* Table of charsets indexed by leading byte. */
2415 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2416 chlook->charset_by_leading_byte[i] = Qnil;
2419 /* Table of charsets indexed by type/final-byte. */
2420 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2421 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2422 chlook->charset_by_attributes[i][j] = Qnil;
2424 /* Table of charsets indexed by type/final-byte/direction. */
2425 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2426 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2427 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2428 chlook->charset_by_attributes[i][j][k] = Qnil;
2432 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2434 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2435 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2439 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2440 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2441 Leading-code of private TYPE9N charset of column-width 1.
2443 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2447 Vdefault_coded_charset_priority_list = Qnil;
2448 DEFVAR_LISP ("default-coded-charset-priority-list",
2449 &Vdefault_coded_charset_priority_list /*
2450 Default order of preferred coded-character-sets.
2456 complex_vars_of_mule_charset (void)
2458 staticpro (&Vcharset_hash_table);
2459 Vcharset_hash_table =
2460 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2462 /* Predefined character sets. We store them into variables for
2466 staticpro (&Vcharset_ucs);
2468 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2469 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2470 build_string ("UCS"),
2471 build_string ("UCS"),
2472 build_string ("ISO/IEC 10646"),
2474 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2475 staticpro (&Vcharset_ucs_bmp);
2477 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2478 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2479 build_string ("BMP"),
2480 build_string ("UCS-BMP"),
2481 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2483 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2484 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2485 staticpro (&Vcharset_ucs_smp);
2487 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2488 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2489 build_string ("SMP"),
2490 build_string ("UCS-SMP"),
2491 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2492 build_string ("UCS00-1"),
2493 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2494 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2495 staticpro (&Vcharset_ucs_sip);
2497 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2498 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2499 build_string ("SIP"),
2500 build_string ("UCS-SIP"),
2501 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2502 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2503 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2504 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2505 staticpro (&Vcharset_ucs_gb);
2507 make_charset (LEADING_BYTE_UCS_GB, Qucs_gb, 256, 3,
2508 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2509 build_string ("UCS for GB"),
2510 build_string ("UCS for GB"),
2511 build_string ("ISO/IEC 10646 for GB"),
2513 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2514 staticpro (&Vcharset_ucs_cns);
2516 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2517 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2518 build_string ("UCS for CNS"),
2519 build_string ("UCS for CNS 11643"),
2520 build_string ("ISO/IEC 10646 for CNS 11643"),
2522 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2523 staticpro (&Vcharset_ucs_jis);
2525 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2526 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2527 build_string ("UCS for JIS"),
2528 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2530 ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2532 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2534 # define MIN_CHAR_THAI 0
2535 # define MAX_CHAR_THAI 0
2536 /* # define MIN_CHAR_HEBREW 0 */
2537 /* # define MAX_CHAR_HEBREW 0 */
2538 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2539 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2541 staticpro (&Vcharset_ascii);
2543 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2544 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2545 build_string ("ASCII"),
2546 build_string ("ASCII)"),
2547 build_string ("ASCII (ISO646 IRV)"),
2548 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2549 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2550 staticpro (&Vcharset_control_1);
2551 Vcharset_control_1 =
2552 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2553 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2554 build_string ("C1"),
2555 build_string ("Control characters"),
2556 build_string ("Control characters 128-191"),
2558 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2559 staticpro (&Vcharset_latin_iso8859_1);
2560 Vcharset_latin_iso8859_1 =
2561 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2562 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2563 build_string ("Latin-1"),
2564 build_string ("ISO8859-1 (Latin-1)"),
2565 build_string ("ISO8859-1 (Latin-1)"),
2566 build_string ("iso8859-1"),
2567 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2568 staticpro (&Vcharset_latin_iso8859_2);
2569 Vcharset_latin_iso8859_2 =
2570 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2571 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2572 build_string ("Latin-2"),
2573 build_string ("ISO8859-2 (Latin-2)"),
2574 build_string ("ISO8859-2 (Latin-2)"),
2575 build_string ("iso8859-2"),
2576 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2577 staticpro (&Vcharset_latin_iso8859_3);
2578 Vcharset_latin_iso8859_3 =
2579 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2580 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2581 build_string ("Latin-3"),
2582 build_string ("ISO8859-3 (Latin-3)"),
2583 build_string ("ISO8859-3 (Latin-3)"),
2584 build_string ("iso8859-3"),
2585 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2586 staticpro (&Vcharset_latin_iso8859_4);
2587 Vcharset_latin_iso8859_4 =
2588 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2589 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2590 build_string ("Latin-4"),
2591 build_string ("ISO8859-4 (Latin-4)"),
2592 build_string ("ISO8859-4 (Latin-4)"),
2593 build_string ("iso8859-4"),
2594 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2595 staticpro (&Vcharset_thai_tis620);
2596 Vcharset_thai_tis620 =
2597 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2598 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2599 build_string ("TIS620"),
2600 build_string ("TIS620 (Thai)"),
2601 build_string ("TIS620.2529 (Thai)"),
2602 build_string ("tis620"),
2603 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2604 staticpro (&Vcharset_greek_iso8859_7);
2605 Vcharset_greek_iso8859_7 =
2606 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2607 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2608 build_string ("ISO8859-7"),
2609 build_string ("ISO8859-7 (Greek)"),
2610 build_string ("ISO8859-7 (Greek)"),
2611 build_string ("iso8859-7"),
2612 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2613 staticpro (&Vcharset_arabic_iso8859_6);
2614 Vcharset_arabic_iso8859_6 =
2615 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2616 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2617 build_string ("ISO8859-6"),
2618 build_string ("ISO8859-6 (Arabic)"),
2619 build_string ("ISO8859-6 (Arabic)"),
2620 build_string ("iso8859-6"),
2621 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2622 staticpro (&Vcharset_hebrew_iso8859_8);
2623 Vcharset_hebrew_iso8859_8 =
2624 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2625 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2626 build_string ("ISO8859-8"),
2627 build_string ("ISO8859-8 (Hebrew)"),
2628 build_string ("ISO8859-8 (Hebrew)"),
2629 build_string ("iso8859-8"),
2631 0 /* MIN_CHAR_HEBREW */,
2632 0 /* MAX_CHAR_HEBREW */, 0, 32,
2633 Qnil, CONVERSION_IDENTICAL);
2634 staticpro (&Vcharset_katakana_jisx0201);
2635 Vcharset_katakana_jisx0201 =
2636 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2637 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2638 build_string ("JISX0201 Kana"),
2639 build_string ("JISX0201.1976 (Japanese Kana)"),
2640 build_string ("JISX0201.1976 Japanese Kana"),
2641 build_string ("jisx0201\\.1976"),
2642 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2643 staticpro (&Vcharset_latin_jisx0201);
2644 Vcharset_latin_jisx0201 =
2645 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2646 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2647 build_string ("JISX0201 Roman"),
2648 build_string ("JISX0201.1976 (Japanese Roman)"),
2649 build_string ("JISX0201.1976 Japanese Roman"),
2650 build_string ("jisx0201\\.1976"),
2651 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2652 staticpro (&Vcharset_cyrillic_iso8859_5);
2653 Vcharset_cyrillic_iso8859_5 =
2654 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2655 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2656 build_string ("ISO8859-5"),
2657 build_string ("ISO8859-5 (Cyrillic)"),
2658 build_string ("ISO8859-5 (Cyrillic)"),
2659 build_string ("iso8859-5"),
2660 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2661 staticpro (&Vcharset_latin_iso8859_9);
2662 Vcharset_latin_iso8859_9 =
2663 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2664 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2665 build_string ("Latin-5"),
2666 build_string ("ISO8859-9 (Latin-5)"),
2667 build_string ("ISO8859-9 (Latin-5)"),
2668 build_string ("iso8859-9"),
2669 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2671 staticpro (&Vcharset_jis_x0208);
2672 Vcharset_jis_x0208 =
2673 make_charset (LEADING_BYTE_JIS_X0208,
2675 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2676 build_string ("JIS X0208"),
2677 build_string ("JIS X0208 Common"),
2678 build_string ("JIS X0208 Common part"),
2679 build_string ("jisx0208\\.1990"),
2681 MIN_CHAR_JIS_X0208_1990,
2682 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2683 Qnil, CONVERSION_94x94);
2685 staticpro (&Vcharset_japanese_jisx0208_1978);
2686 Vcharset_japanese_jisx0208_1978 =
2687 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2688 Qjapanese_jisx0208_1978, 94, 2,
2689 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2690 build_string ("JIS X0208:1978"),
2691 build_string ("JIS X0208:1978 (Japanese)"),
2693 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2694 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2701 CONVERSION_IDENTICAL);
2702 staticpro (&Vcharset_chinese_gb2312);
2703 Vcharset_chinese_gb2312 =
2704 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2705 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2706 build_string ("GB2312"),
2707 build_string ("GB2312)"),
2708 build_string ("GB2312 Chinese simplified"),
2709 build_string ("gb2312"),
2710 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2711 staticpro (&Vcharset_chinese_gb12345);
2712 Vcharset_chinese_gb12345 =
2713 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2714 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2715 build_string ("G1"),
2716 build_string ("GB 12345)"),
2717 build_string ("GB 12345-1990"),
2718 build_string ("GB12345\\(\\.1990\\)?-0"),
2719 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2720 staticpro (&Vcharset_japanese_jisx0208);
2721 Vcharset_japanese_jisx0208 =
2722 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2723 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2724 build_string ("JISX0208"),
2725 build_string ("JIS X0208:1983 (Japanese)"),
2726 build_string ("JIS X0208:1983 Japanese Kanji"),
2727 build_string ("jisx0208\\.1983"),
2734 CONVERSION_IDENTICAL);
2736 staticpro (&Vcharset_japanese_jisx0208_1990);
2737 Vcharset_japanese_jisx0208_1990 =
2738 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2739 Qjapanese_jisx0208_1990, 94, 2,
2740 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2741 build_string ("JISX0208-1990"),
2742 build_string ("JIS X0208:1990 (Japanese)"),
2743 build_string ("JIS X0208:1990 Japanese Kanji"),
2744 build_string ("jisx0208\\.1990"),
2746 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
2747 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
2748 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
2749 Vcharset_jis_x0208 /* Qnil */,
2750 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
2752 staticpro (&Vcharset_korean_ksc5601);
2753 Vcharset_korean_ksc5601 =
2754 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2755 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2756 build_string ("KSC5601"),
2757 build_string ("KSC5601 (Korean"),
2758 build_string ("KSC5601 Korean Hangul and Hanja"),
2759 build_string ("ksc5601"),
2760 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2761 staticpro (&Vcharset_japanese_jisx0212);
2762 Vcharset_japanese_jisx0212 =
2763 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2764 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2765 build_string ("JISX0212"),
2766 build_string ("JISX0212 (Japanese)"),
2767 build_string ("JISX0212 Japanese Supplement"),
2768 build_string ("jisx0212"),
2769 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2771 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2772 staticpro (&Vcharset_chinese_cns11643_1);
2773 Vcharset_chinese_cns11643_1 =
2774 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2775 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2776 build_string ("CNS11643-1"),
2777 build_string ("CNS11643-1 (Chinese traditional)"),
2779 ("CNS 11643 Plane 1 Chinese traditional"),
2780 build_string (CHINESE_CNS_PLANE_RE("1")),
2781 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2782 staticpro (&Vcharset_chinese_cns11643_2);
2783 Vcharset_chinese_cns11643_2 =
2784 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2785 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2786 build_string ("CNS11643-2"),
2787 build_string ("CNS11643-2 (Chinese traditional)"),
2789 ("CNS 11643 Plane 2 Chinese traditional"),
2790 build_string (CHINESE_CNS_PLANE_RE("2")),
2791 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2793 staticpro (&Vcharset_latin_tcvn5712);
2794 Vcharset_latin_tcvn5712 =
2795 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2796 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2797 build_string ("TCVN 5712"),
2798 build_string ("TCVN 5712 (VSCII-2)"),
2799 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2800 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2801 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2802 staticpro (&Vcharset_latin_viscii_lower);
2803 Vcharset_latin_viscii_lower =
2804 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2805 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2806 build_string ("VISCII lower"),
2807 build_string ("VISCII lower (Vietnamese)"),
2808 build_string ("VISCII lower (Vietnamese)"),
2809 build_string ("MULEVISCII-LOWER"),
2810 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2811 staticpro (&Vcharset_latin_viscii_upper);
2812 Vcharset_latin_viscii_upper =
2813 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2814 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2815 build_string ("VISCII upper"),
2816 build_string ("VISCII upper (Vietnamese)"),
2817 build_string ("VISCII upper (Vietnamese)"),
2818 build_string ("MULEVISCII-UPPER"),
2819 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2820 staticpro (&Vcharset_latin_viscii);
2821 Vcharset_latin_viscii =
2822 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2823 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2824 build_string ("VISCII"),
2825 build_string ("VISCII 1.1 (Vietnamese)"),
2826 build_string ("VISCII 1.1 (Vietnamese)"),
2827 build_string ("VISCII1\\.1"),
2828 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2829 staticpro (&Vcharset_chinese_big5);
2830 Vcharset_chinese_big5 =
2831 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2832 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2833 build_string ("Big5"),
2834 build_string ("Big5"),
2835 build_string ("Big5 Chinese traditional"),
2836 build_string ("big5-0"),
2838 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2839 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2841 staticpro (&Vcharset_ethiopic_ucs);
2842 Vcharset_ethiopic_ucs =
2843 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
2844 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2845 build_string ("Ethiopic (UCS)"),
2846 build_string ("Ethiopic (UCS)"),
2847 build_string ("Ethiopic of UCS"),
2848 build_string ("Ethiopic-Unicode"),
2849 Qnil, 0x1200, 0x137F, 0, 0,
2850 Qnil, CONVERSION_IDENTICAL);
2852 staticpro (&Vcharset_chinese_big5_1);
2853 Vcharset_chinese_big5_1 =
2854 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
2855 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
2856 build_string ("Big5"),
2857 build_string ("Big5 (Level-1)"),
2859 ("Big5 Level-1 Chinese traditional"),
2860 build_string ("big5"),
2861 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2862 staticpro (&Vcharset_chinese_big5_2);
2863 Vcharset_chinese_big5_2 =
2864 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
2865 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
2866 build_string ("Big5"),
2867 build_string ("Big5 (Level-2)"),
2869 ("Big5 Level-2 Chinese traditional"),
2870 build_string ("big5"),
2871 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2873 #ifdef ENABLE_COMPOSITE_CHARS
2874 /* #### For simplicity, we put composite chars into a 96x96 charset.
2875 This is going to lead to problems because you can run out of
2876 room, esp. as we don't yet recycle numbers. */
2877 staticpro (&Vcharset_composite);
2878 Vcharset_composite =
2879 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
2880 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2881 build_string ("Composite"),
2882 build_string ("Composite characters"),
2883 build_string ("Composite characters"),
2886 /* #### not dumped properly */
2887 composite_char_row_next = 32;
2888 composite_char_col_next = 32;
2890 Vcomposite_char_string2char_hash_table =
2891 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2892 Vcomposite_char_char2string_hash_table =
2893 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2894 staticpro (&Vcomposite_char_string2char_hash_table);
2895 staticpro (&Vcomposite_char_char2string_hash_table);
2896 #endif /* ENABLE_COMPOSITE_CHARS */