1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_ucs_gb;
72 Lisp_Object Vcharset_ucs_cns;
73 Lisp_Object Vcharset_ucs_jis;
74 Lisp_Object Vcharset_ucs_ks;
75 Lisp_Object Vcharset_latin_viscii;
76 Lisp_Object Vcharset_latin_tcvn5712;
77 Lisp_Object Vcharset_latin_viscii_lower;
78 Lisp_Object Vcharset_latin_viscii_upper;
79 Lisp_Object Vcharset_jis_x0208;
80 Lisp_Object Vcharset_chinese_big5;
81 Lisp_Object Vcharset_ethiopic_ucs;
83 Lisp_Object Vcharset_chinese_big5_1;
84 Lisp_Object Vcharset_chinese_big5_2;
86 #ifdef ENABLE_COMPOSITE_CHARS
87 Lisp_Object Vcharset_composite;
89 /* Hash tables for composite chars. One maps string representing
90 composed chars to their equivalent chars; one goes the
92 Lisp_Object Vcomposite_char_char2string_hash_table;
93 Lisp_Object Vcomposite_char_string2char_hash_table;
95 static int composite_char_row_next;
96 static int composite_char_col_next;
98 #endif /* ENABLE_COMPOSITE_CHARS */
100 struct charset_lookup *chlook;
102 static const struct lrecord_description charset_lookup_description_1[] = {
103 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
105 NUM_LEADING_BYTES+4*128
112 static const struct struct_description charset_lookup_description = {
113 sizeof (struct charset_lookup),
114 charset_lookup_description_1
118 /* Table of number of bytes in the string representation of a character
119 indexed by the first byte of that representation.
121 rep_bytes_by_first_byte(c) is more efficient than the equivalent
122 canonical computation:
124 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
126 const Bytecount rep_bytes_by_first_byte[0xA0] =
127 { /* 0x00 - 0x7f are for straight ASCII */
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136 /* 0x80 - 0x8f are for Dimension-1 official charsets */
138 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
140 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
142 /* 0x90 - 0x9d are for Dimension-2 official charsets */
143 /* 0x9e is for Dimension-1 private charsets */
144 /* 0x9f is for Dimension-2 private charsets */
145 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
151 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
153 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
157 if (XVECTOR_LENGTH (v) > ccs_len)
160 for (i = 0; i < XVECTOR_LENGTH (v); i++)
162 Lisp_Object c = XVECTOR_DATA(v)[i];
164 if (!NILP (c) && !CHARP (c))
168 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
180 put_char_ccs_code_point (Lisp_Object character,
181 Lisp_Object ccs, Lisp_Object value)
183 if (!EQ (XCHARSET_NAME (ccs), Qucs)
185 || (XCHAR (character) != XINT (value)))
187 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
191 { /* obsolete representation: value must be a list of bytes */
192 Lisp_Object ret = Fcar (value);
196 signal_simple_error ("Invalid value for coded-charset", value);
197 code_point = XINT (ret);
198 if (XCHARSET_GRAPHIC (ccs) == 1)
206 signal_simple_error ("Invalid value for coded-charset",
210 signal_simple_error ("Invalid value for coded-charset",
213 if (XCHARSET_GRAPHIC (ccs) == 1)
215 code_point = (code_point << 8) | j;
218 value = make_int (code_point);
220 else if (INTP (value))
222 code_point = XINT (value);
223 if (XCHARSET_GRAPHIC (ccs) == 1)
225 code_point &= 0x7F7F7F7F;
226 value = make_int (code_point);
230 signal_simple_error ("Invalid value for coded-charset", value);
234 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
237 decoding_table_remove_char (ccs, XINT (cpos));
240 decoding_table_put_char (ccs, code_point, character);
246 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
248 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
249 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
251 if (VECTORP (decoding_table))
253 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
257 decoding_table_remove_char (ccs, XINT (cpos));
260 if (CHAR_TABLEP (encoding_table))
262 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
270 int leading_code_private_11;
273 Lisp_Object Qcharsetp;
275 /* Qdoc_string, Qdimension, Qchars defined in general.c */
276 Lisp_Object Qregistry, Qfinal, Qgraphic;
277 Lisp_Object Qdirection;
278 Lisp_Object Qreverse_direction_charset;
279 Lisp_Object Qleading_byte;
280 Lisp_Object Qshort_name, Qlong_name;
282 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
283 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
300 Qjapanese_jisx0208_1978,
304 Qjapanese_jisx0208_1990,
322 Qvietnamese_viscii_lower,
323 Qvietnamese_viscii_upper,
332 Lisp_Object Ql2r, Qr2l;
334 Lisp_Object Vcharset_hash_table;
336 /* Composite characters are characters constructed by overstriking two
337 or more regular characters.
339 1) The old Mule implementation involves storing composite characters
340 in a buffer as a tag followed by all of the actual characters
341 used to make up the composite character. I think this is a bad
342 idea; it greatly complicates code that wants to handle strings
343 one character at a time because it has to deal with the possibility
344 of great big ungainly characters. It's much more reasonable to
345 simply store an index into a table of composite characters.
347 2) The current implementation only allows for 16,384 separate
348 composite characters over the lifetime of the XEmacs process.
349 This could become a potential problem if the user
350 edited lots of different files that use composite characters.
351 Due to FSF bogosity, increasing the number of allowable
352 composite characters under Mule would decrease the number
353 of possible faces that can exist. Mule already has shrunk
354 this to 2048, and further shrinkage would become uncomfortable.
355 No such problems exist in XEmacs.
357 Composite characters could be represented as 0x80 C1 C2 C3,
358 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
359 for slightly under 2^20 (one million) composite characters
360 over the XEmacs process lifetime, and you only need to
361 increase the size of a Mule character from 19 to 21 bits.
362 Or you could use 0x80 C1 C2 C3 C4, allowing for about
363 85 million (slightly over 2^26) composite characters. */
366 /************************************************************************/
367 /* Basic Emchar functions */
368 /************************************************************************/
370 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
371 string in STR. Returns the number of bytes stored.
372 Do not call this directly. Use the macro set_charptr_emchar() instead.
376 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
391 else if ( c <= 0x7ff )
393 *p++ = (c >> 6) | 0xc0;
394 *p++ = (c & 0x3f) | 0x80;
396 else if ( c <= 0xffff )
398 *p++ = (c >> 12) | 0xe0;
399 *p++ = ((c >> 6) & 0x3f) | 0x80;
400 *p++ = (c & 0x3f) | 0x80;
402 else if ( c <= 0x1fffff )
404 *p++ = (c >> 18) | 0xf0;
405 *p++ = ((c >> 12) & 0x3f) | 0x80;
406 *p++ = ((c >> 6) & 0x3f) | 0x80;
407 *p++ = (c & 0x3f) | 0x80;
409 else if ( c <= 0x3ffffff )
411 *p++ = (c >> 24) | 0xf8;
412 *p++ = ((c >> 18) & 0x3f) | 0x80;
413 *p++ = ((c >> 12) & 0x3f) | 0x80;
414 *p++ = ((c >> 6) & 0x3f) | 0x80;
415 *p++ = (c & 0x3f) | 0x80;
419 *p++ = (c >> 30) | 0xfc;
420 *p++ = ((c >> 24) & 0x3f) | 0x80;
421 *p++ = ((c >> 18) & 0x3f) | 0x80;
422 *p++ = ((c >> 12) & 0x3f) | 0x80;
423 *p++ = ((c >> 6) & 0x3f) | 0x80;
424 *p++ = (c & 0x3f) | 0x80;
427 BREAKUP_CHAR (c, charset, c1, c2);
428 lb = CHAR_LEADING_BYTE (c);
429 if (LEADING_BYTE_PRIVATE_P (lb))
430 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
432 if (EQ (charset, Vcharset_control_1))
441 /* Return the first character from a Mule-encoded string in STR,
442 assuming it's non-ASCII. Do not call this directly.
443 Use the macro charptr_emchar() instead. */
446 non_ascii_charptr_emchar (const Bufbyte *str)
459 else if ( b >= 0xf8 )
464 else if ( b >= 0xf0 )
469 else if ( b >= 0xe0 )
474 else if ( b >= 0xc0 )
484 for( ; len > 0; len-- )
487 ch = ( ch << 6 ) | ( b & 0x3f );
491 Bufbyte i0 = *str, i1, i2 = 0;
494 if (i0 == LEADING_BYTE_CONTROL_1)
495 return (Emchar) (*++str - 0x20);
497 if (LEADING_BYTE_PREFIX_P (i0))
502 charset = CHARSET_BY_LEADING_BYTE (i0);
503 if (XCHARSET_DIMENSION (charset) == 2)
506 return MAKE_CHAR (charset, i1, i2);
510 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
511 Do not call this directly. Use the macro valid_char_p() instead. */
515 non_ascii_valid_char_p (Emchar ch)
519 /* Must have only lowest 19 bits set */
523 f1 = CHAR_FIELD1 (ch);
524 f2 = CHAR_FIELD2 (ch);
525 f3 = CHAR_FIELD3 (ch);
531 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
532 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
533 f2 > MAX_CHAR_FIELD2_PRIVATE)
538 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
539 f2 <= MAX_CHAR_FIELD2_PRIVATE))
543 NOTE: This takes advantage of the fact that
544 FIELD2_TO_OFFICIAL_LEADING_BYTE and
545 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
547 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
548 if (EQ (charset, Qnil))
550 return (XCHARSET_CHARS (charset) == 96);
556 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
557 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
558 f1 > MAX_CHAR_FIELD1_PRIVATE)
560 if (f2 < 0x20 || f3 < 0x20)
563 #ifdef ENABLE_COMPOSITE_CHARS
564 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
566 if (UNBOUNDP (Fgethash (make_int (ch),
567 Vcomposite_char_char2string_hash_table,
572 #endif /* ENABLE_COMPOSITE_CHARS */
574 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
575 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
578 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
580 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
583 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
585 if (EQ (charset, Qnil))
587 return (XCHARSET_CHARS (charset) == 96);
593 /************************************************************************/
594 /* Basic string functions */
595 /************************************************************************/
597 /* Copy the character pointed to by SRC into DST. Do not call this
598 directly. Use the macro charptr_copy_char() instead.
599 Return the number of bytes copied. */
602 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
604 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
606 for (i = bytes; i; i--, dst++, src++)
612 /************************************************************************/
613 /* streams of Emchars */
614 /************************************************************************/
616 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
617 The functions below are not meant to be called directly; use
618 the macros in insdel.h. */
621 Lstream_get_emchar_1 (Lstream *stream, int ch)
623 Bufbyte str[MAX_EMCHAR_LEN];
624 Bufbyte *strptr = str;
627 str[0] = (Bufbyte) ch;
629 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
631 int c = Lstream_getc (stream);
632 bufpos_checking_assert (c >= 0);
633 *++strptr = (Bufbyte) c;
635 return charptr_emchar (str);
639 Lstream_fput_emchar (Lstream *stream, Emchar ch)
641 Bufbyte str[MAX_EMCHAR_LEN];
642 Bytecount len = set_charptr_emchar (str, ch);
643 return Lstream_write (stream, str, len);
647 Lstream_funget_emchar (Lstream *stream, Emchar ch)
649 Bufbyte str[MAX_EMCHAR_LEN];
650 Bytecount len = set_charptr_emchar (str, ch);
651 Lstream_unread (stream, str, len);
655 /************************************************************************/
657 /************************************************************************/
660 mark_charset (Lisp_Object obj)
662 Lisp_Charset *cs = XCHARSET (obj);
664 mark_object (cs->short_name);
665 mark_object (cs->long_name);
666 mark_object (cs->doc_string);
667 mark_object (cs->registry);
668 mark_object (cs->ccl_program);
670 mark_object (cs->decoding_table);
671 mark_object (cs->mother);
677 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
679 Lisp_Charset *cs = XCHARSET (obj);
683 error ("printing unreadable object #<charset %s 0x%x>",
684 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
687 write_c_string ("#<charset ", printcharfun);
688 print_internal (CHARSET_NAME (cs), printcharfun, 0);
689 write_c_string (" ", printcharfun);
690 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
691 write_c_string (" ", printcharfun);
692 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
693 write_c_string (" ", printcharfun);
694 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
695 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
697 CHARSET_DIMENSION (cs),
698 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
699 CHARSET_COLUMNS (cs),
700 CHARSET_GRAPHIC (cs),
702 write_c_string (buf, printcharfun);
703 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
704 sprintf (buf, " 0x%x>", cs->header.uid);
705 write_c_string (buf, printcharfun);
708 static const struct lrecord_description charset_description[] = {
709 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
710 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
711 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
712 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
713 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
714 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
715 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
717 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
718 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
723 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
724 mark_charset, print_charset, 0, 0, 0,
728 /* Make a new charset. */
729 /* #### SJT Should generic properties be allowed? */
731 make_charset (Charset_ID id, Lisp_Object name,
732 unsigned short chars, unsigned char dimension,
733 unsigned char columns, unsigned char graphic,
734 Bufbyte final, unsigned char direction, Lisp_Object short_name,
735 Lisp_Object long_name, Lisp_Object doc,
737 Lisp_Object decoding_table,
738 Emchar min_code, Emchar max_code,
739 Emchar code_offset, unsigned char byte_offset,
740 Lisp_Object mother, unsigned char conversion)
743 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
747 XSETCHARSET (obj, cs);
749 CHARSET_ID (cs) = id;
750 CHARSET_NAME (cs) = name;
751 CHARSET_SHORT_NAME (cs) = short_name;
752 CHARSET_LONG_NAME (cs) = long_name;
753 CHARSET_CHARS (cs) = chars;
754 CHARSET_DIMENSION (cs) = dimension;
755 CHARSET_DIRECTION (cs) = direction;
756 CHARSET_COLUMNS (cs) = columns;
757 CHARSET_GRAPHIC (cs) = graphic;
758 CHARSET_FINAL (cs) = final;
759 CHARSET_DOC_STRING (cs) = doc;
760 CHARSET_REGISTRY (cs) = reg;
761 CHARSET_CCL_PROGRAM (cs) = Qnil;
762 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
764 CHARSET_DECODING_TABLE(cs) = Qunbound;
765 CHARSET_MIN_CODE (cs) = min_code;
766 CHARSET_MAX_CODE (cs) = max_code;
767 CHARSET_CODE_OFFSET (cs) = code_offset;
768 CHARSET_BYTE_OFFSET (cs) = byte_offset;
769 CHARSET_MOTHER (cs) = mother;
770 CHARSET_CONVERSION (cs) = conversion;
774 if (id == LEADING_BYTE_ASCII)
775 CHARSET_REP_BYTES (cs) = 1;
777 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
779 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
784 /* some charsets do not have final characters. This includes
785 ASCII, Control-1, Composite, and the two faux private
787 unsigned char iso2022_type
788 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
790 if (code_offset == 0)
792 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
793 chlook->charset_by_attributes[iso2022_type][final] = obj;
797 (chlook->charset_by_attributes[iso2022_type][final][direction]));
798 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
802 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
803 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
805 /* Some charsets are "faux" and don't have names or really exist at
806 all except in the leading-byte table. */
808 Fputhash (name, obj, Vcharset_hash_table);
813 get_unallocated_leading_byte (int dimension)
818 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
821 lb = chlook->next_allocated_leading_byte++;
825 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
828 lb = chlook->next_allocated_1_byte_leading_byte++;
832 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
835 lb = chlook->next_allocated_2_byte_leading_byte++;
841 ("No more character sets free for this dimension",
842 make_int (dimension));
848 /* Number of Big5 characters which have the same code in 1st byte. */
850 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
853 decode_defined_char (Lisp_Object ccs, int code_point)
855 int dim = XCHARSET_DIMENSION (ccs);
856 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
864 = get_ccs_octet_table (decoding_table, ccs,
865 (code_point >> (dim * 8)) & 255);
867 if (CHARP (decoding_table))
868 return XCHAR (decoding_table);
871 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
873 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
875 if ( EQ (mother, Vcharset_ucs) )
876 return DECODE_CHAR (mother, code_point);
878 return decode_defined_char (mother, code_point);
885 decode_builtin_char (Lisp_Object charset, int code_point)
887 Lisp_Object mother = XCHARSET_MOTHER (charset);
890 if ( XCHARSET_MAX_CODE (charset) > 0 )
892 if ( CHARSETP (mother) )
894 int code = code_point;
896 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
898 int row = code_point >> 8;
899 int cell = code_point & 255;
903 else if (row < 16 + 32 + 30)
904 code = (row - (16 + 32)) * 94 + cell - 33;
905 else if (row < 18 + 32 + 30)
907 else if (row < 18 + 32 + 60)
908 code = (row - (18 + 32)) * 94 + cell - 33;
910 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
912 int plane = code_point >> 16;
913 int row = (code_point >> 8) & 255;
914 int cell = code_point & 255;
918 else if (row < 16 + 32 + 30)
920 = (plane - 33) * 94 * 60
921 + (row - (16 + 32)) * 94
923 else if (row < 18 + 32 + 30)
925 else if (row < 18 + 32 + 60)
927 = (plane - 33) * 94 * 60
928 + (row - (18 + 32)) * 94
932 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
937 = (XCHARSET_DIMENSION (charset) == 1
939 code_point - XCHARSET_BYTE_OFFSET (charset)
941 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
942 * XCHARSET_CHARS (charset)
943 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
944 + XCHARSET_CODE_OFFSET (charset);
945 if ((cid < XCHARSET_MIN_CODE (charset))
946 || (XCHARSET_MAX_CODE (charset) < cid))
951 else if ((final = XCHARSET_FINAL (charset)) >= '0')
953 if (XCHARSET_DIMENSION (charset) == 1)
955 switch (XCHARSET_CHARS (charset))
959 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
962 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
970 switch (XCHARSET_CHARS (charset))
973 return MIN_CHAR_94x94
974 + (final - '0') * 94 * 94
975 + (((code_point >> 8) & 0x7F) - 33) * 94
976 + ((code_point & 0x7F) - 33);
978 return MIN_CHAR_96x96
979 + (final - '0') * 96 * 96
980 + (((code_point >> 8) & 0x7F) - 32) * 96
981 + ((code_point & 0x7F) - 32);
993 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
995 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
998 if ( CHAR_TABLEP (encoding_table)
999 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
1004 Lisp_Object mother = XCHARSET_MOTHER (charset);
1005 int min = XCHARSET_MIN_CODE (charset);
1006 int max = XCHARSET_MAX_CODE (charset);
1009 if ( CHARSETP (mother) )
1011 if (XCHARSET_FINAL (charset) >= '0')
1012 code = charset_code_point (mother, ch, 1);
1014 code = charset_code_point (mother, ch, defined_only);
1016 else if (defined_only)
1018 else if ( ((max == 0) && CHARSETP (mother)
1019 && (XCHARSET_FINAL (charset) == 0))
1020 || ((min <= ch) && (ch <= max)) )
1022 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1023 || ((min <= code) && (code <= max)) )
1025 int d = code - XCHARSET_CODE_OFFSET (charset);
1027 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1029 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1031 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1033 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1036 int cell = d % 94 + 33;
1042 return (row << 8) | cell;
1044 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1045 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1046 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1047 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1048 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1050 int plane = d / (94 * 60) + 33;
1051 int row = (d % (94 * 60)) / 94;
1052 int cell = d % 94 + 33;
1058 return (plane << 16) | (row << 8) | cell;
1060 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1062 ( (d / (94 * 94) + 33) << 16)
1063 | ((d / 94 % 94 + 33) << 8)
1065 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1067 ( (d / (96 * 96) + 32) << 16)
1068 | ((d / 96 % 96 + 32) << 8)
1070 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1072 ( (d / (94 * 94 * 94) + 33) << 24)
1073 | ((d / (94 * 94) % 94 + 33) << 16)
1074 | ((d / 94 % 94 + 33) << 8)
1076 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1078 ( (d / (96 * 96 * 96) + 32) << 24)
1079 | ((d / (96 * 96) % 96 + 32) << 16)
1080 | ((d / 96 % 96 + 32) << 8)
1084 printf ("Unknown CCS-conversion %d is specified!",
1085 XCHARSET_CONVERSION (charset));
1089 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1090 ( XCHARSET_MIN_CODE (charset) == 0 )
1092 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1093 (XCHARSET_CODE_OFFSET (charset)
1094 == XCHARSET_MIN_CODE (charset))
1099 if (XCHARSET_DIMENSION (charset) == 1)
1101 if (XCHARSET_CHARS (charset) == 94)
1103 if (((d = ch - (MIN_CHAR_94
1104 + (XCHARSET_FINAL (charset) - '0') * 94))
1109 else if (XCHARSET_CHARS (charset) == 96)
1111 if (((d = ch - (MIN_CHAR_96
1112 + (XCHARSET_FINAL (charset) - '0') * 96))
1120 else if (XCHARSET_DIMENSION (charset) == 2)
1122 if (XCHARSET_CHARS (charset) == 94)
1124 if (((d = ch - (MIN_CHAR_94x94
1126 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1129 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1131 else if (XCHARSET_CHARS (charset) == 96)
1133 if (((d = ch - (MIN_CHAR_96x96
1135 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1138 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1149 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1151 if (c <= MAX_CHAR_BASIC_LATIN)
1153 *charset = Vcharset_ascii;
1158 *charset = Vcharset_control_1;
1163 *charset = Vcharset_latin_iso8859_1;
1167 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1169 *charset = Vcharset_hebrew_iso8859_8;
1170 return c - MIN_CHAR_HEBREW + 0x20;
1173 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1175 *charset = Vcharset_thai_tis620;
1176 return c - MIN_CHAR_THAI + 0x20;
1179 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1180 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1182 return list2 (Vcharset_katakana_jisx0201,
1183 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1186 else if (c <= MAX_CHAR_BMP)
1188 *charset = Vcharset_ucs_bmp;
1191 else if (c <= MAX_CHAR_SMP)
1193 *charset = Vcharset_ucs_smp;
1194 return c - MIN_CHAR_SMP;
1196 else if (c <= MAX_CHAR_SIP)
1198 *charset = Vcharset_ucs_sip;
1199 return c - MIN_CHAR_SIP;
1201 else if (c < MIN_CHAR_94)
1203 *charset = Vcharset_ucs;
1206 else if (c <= MAX_CHAR_94)
1208 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1209 ((c - MIN_CHAR_94) / 94) + '0',
1210 CHARSET_LEFT_TO_RIGHT);
1211 if (!NILP (*charset))
1212 return ((c - MIN_CHAR_94) % 94) + 33;
1215 *charset = Vcharset_ucs;
1219 else if (c <= MAX_CHAR_96)
1221 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1222 ((c - MIN_CHAR_96) / 96) + '0',
1223 CHARSET_LEFT_TO_RIGHT);
1224 if (!NILP (*charset))
1225 return ((c - MIN_CHAR_96) % 96) + 32;
1228 *charset = Vcharset_ucs;
1232 else if (c <= MAX_CHAR_94x94)
1235 = CHARSET_BY_ATTRIBUTES (94, 2,
1236 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1237 CHARSET_LEFT_TO_RIGHT);
1238 if (!NILP (*charset))
1239 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1240 | (((c - MIN_CHAR_94x94) % 94) + 33);
1243 *charset = Vcharset_ucs;
1247 else if (c <= MAX_CHAR_96x96)
1250 = CHARSET_BY_ATTRIBUTES (96, 2,
1251 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1252 CHARSET_LEFT_TO_RIGHT);
1253 if (!NILP (*charset))
1254 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1255 | (((c - MIN_CHAR_96x96) % 96) + 32);
1258 *charset = Vcharset_ucs;
1264 *charset = Vcharset_ucs;
1269 Lisp_Object Vdefault_coded_charset_priority_list;
1273 /************************************************************************/
1274 /* Basic charset Lisp functions */
1275 /************************************************************************/
1277 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1278 Return non-nil if OBJECT is a charset.
1282 return CHARSETP (object) ? Qt : Qnil;
1285 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1286 Retrieve the charset of the given name.
1287 If CHARSET-OR-NAME is a charset object, it is simply returned.
1288 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1289 nil is returned. Otherwise the associated charset object is returned.
1293 if (CHARSETP (charset_or_name))
1294 return charset_or_name;
1296 CHECK_SYMBOL (charset_or_name);
1297 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1300 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1301 Retrieve the charset of the given name.
1302 Same as `find-charset' except an error is signalled if there is no such
1303 charset instead of returning nil.
1307 Lisp_Object charset = Ffind_charset (name);
1310 signal_simple_error ("No such charset", name);
1314 /* We store the charsets in hash tables with the names as the key and the
1315 actual charset object as the value. Occasionally we need to use them
1316 in a list format. These routines provide us with that. */
1317 struct charset_list_closure
1319 Lisp_Object *charset_list;
1323 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1324 void *charset_list_closure)
1326 /* This function can GC */
1327 struct charset_list_closure *chcl =
1328 (struct charset_list_closure*) charset_list_closure;
1329 Lisp_Object *charset_list = chcl->charset_list;
1331 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1335 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1336 Return a list of the names of all defined charsets.
1340 Lisp_Object charset_list = Qnil;
1341 struct gcpro gcpro1;
1342 struct charset_list_closure charset_list_closure;
1344 GCPRO1 (charset_list);
1345 charset_list_closure.charset_list = &charset_list;
1346 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1347 &charset_list_closure);
1350 return charset_list;
1353 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1354 Return the name of charset CHARSET.
1358 return XCHARSET_NAME (Fget_charset (charset));
1361 /* #### SJT Should generic properties be allowed? */
1362 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1363 Define a new character set.
1364 This function is for use with Mule support.
1365 NAME is a symbol, the name by which the character set is normally referred.
1366 DOC-STRING is a string describing the character set.
1367 PROPS is a property list, describing the specific nature of the
1368 character set. Recognized properties are:
1370 'short-name Short version of the charset name (ex: Latin-1)
1371 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1372 'registry A regular expression matching the font registry field for
1374 'dimension Number of octets used to index a character in this charset.
1375 Either 1 or 2. Defaults to 1.
1376 If UTF-2000 feature is enabled, 3 or 4 are also available.
1377 'columns Number of columns used to display a character in this charset.
1378 Only used in TTY mode. (Under X, the actual width of a
1379 character can be derived from the font used to display the
1380 characters.) If unspecified, defaults to the dimension
1381 (this is almost always the correct value).
1382 'chars Number of characters in each dimension (94 or 96).
1383 Defaults to 94. Note that if the dimension is 2, the
1384 character set thus described is 94x94 or 96x96.
1385 If UTF-2000 feature is enabled, 128 or 256 are also available.
1386 'final Final byte of ISO 2022 escape sequence. Must be
1387 supplied. Each combination of (DIMENSION, CHARS) defines a
1388 separate namespace for final bytes. Note that ISO
1389 2022 restricts the final byte to the range
1390 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1391 dimension == 2. Note also that final bytes in the range
1392 0x30 - 0x3F are reserved for user-defined (not official)
1394 'graphic 0 (use left half of font on output) or 1 (use right half
1395 of font on output). Defaults to 0. For example, for
1396 a font whose registry is ISO8859-1, the left half
1397 (octets 0x20 - 0x7F) is the `ascii' character set, while
1398 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1399 character set. With 'graphic set to 0, the octets
1400 will have their high bit cleared; with it set to 1,
1401 the octets will have their high bit set.
1402 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1404 'ccl-program A compiled CCL program used to convert a character in
1405 this charset into an index into the font. This is in
1406 addition to the 'graphic property. The CCL program
1407 is passed the octets of the character, with the high
1408 bit cleared and set depending upon whether the value
1409 of the 'graphic property is 0 or 1.
1410 'mother [UTF-2000 only] Base coded-charset.
1411 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1412 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1413 'code-offset [UTF-2000 only] Offset for a code-point of a base
1415 'conversion [UTF-2000 only] Conversion for a code-point of a base
1416 coded-charset (94x60 or 94x94x60).
1418 (name, doc_string, props))
1420 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1421 int direction = CHARSET_LEFT_TO_RIGHT;
1422 Lisp_Object registry = Qnil;
1423 Lisp_Object charset;
1424 Lisp_Object ccl_program = Qnil;
1425 Lisp_Object short_name = Qnil, long_name = Qnil;
1426 Lisp_Object mother = Qnil;
1427 int min_code = 0, max_code = 0, code_offset = 0;
1428 int byte_offset = -1;
1431 CHECK_SYMBOL (name);
1432 if (!NILP (doc_string))
1433 CHECK_STRING (doc_string);
1435 charset = Ffind_charset (name);
1436 if (!NILP (charset))
1437 signal_simple_error ("Cannot redefine existing charset", name);
1440 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1442 if (EQ (keyword, Qshort_name))
1444 CHECK_STRING (value);
1448 if (EQ (keyword, Qlong_name))
1450 CHECK_STRING (value);
1454 else if (EQ (keyword, Qdimension))
1457 dimension = XINT (value);
1458 if (dimension < 1 ||
1465 signal_simple_error ("Invalid value for 'dimension", value);
1468 else if (EQ (keyword, Qchars))
1471 chars = XINT (value);
1472 if (chars != 94 && chars != 96
1474 && chars != 128 && chars != 256
1477 signal_simple_error ("Invalid value for 'chars", value);
1480 else if (EQ (keyword, Qcolumns))
1483 columns = XINT (value);
1484 if (columns != 1 && columns != 2)
1485 signal_simple_error ("Invalid value for 'columns", value);
1488 else if (EQ (keyword, Qgraphic))
1491 graphic = XINT (value);
1499 signal_simple_error ("Invalid value for 'graphic", value);
1502 else if (EQ (keyword, Qregistry))
1504 CHECK_STRING (value);
1508 else if (EQ (keyword, Qdirection))
1510 if (EQ (value, Ql2r))
1511 direction = CHARSET_LEFT_TO_RIGHT;
1512 else if (EQ (value, Qr2l))
1513 direction = CHARSET_RIGHT_TO_LEFT;
1515 signal_simple_error ("Invalid value for 'direction", value);
1518 else if (EQ (keyword, Qfinal))
1520 CHECK_CHAR_COERCE_INT (value);
1521 final = XCHAR (value);
1522 if (final < '0' || final > '~')
1523 signal_simple_error ("Invalid value for 'final", value);
1527 else if (EQ (keyword, Qmother))
1529 mother = Fget_charset (value);
1532 else if (EQ (keyword, Qmin_code))
1535 min_code = XUINT (value);
1538 else if (EQ (keyword, Qmax_code))
1541 max_code = XUINT (value);
1544 else if (EQ (keyword, Qcode_offset))
1547 code_offset = XUINT (value);
1550 else if (EQ (keyword, Qconversion))
1552 if (EQ (value, Q94x60))
1553 conversion = CONVERSION_94x60;
1554 else if (EQ (value, Q94x94x60))
1555 conversion = CONVERSION_94x94x60;
1557 signal_simple_error ("Unrecognized conversion", value);
1561 else if (EQ (keyword, Qccl_program))
1563 struct ccl_program test_ccl;
1565 if (setup_ccl_program (&test_ccl, value) < 0)
1566 signal_simple_error ("Invalid value for 'ccl-program", value);
1567 ccl_program = value;
1571 signal_simple_error ("Unrecognized property", keyword);
1577 error ("'final must be specified");
1579 if (dimension == 2 && final > 0x5F)
1581 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1584 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1585 CHARSET_LEFT_TO_RIGHT)) ||
1586 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1587 CHARSET_RIGHT_TO_LEFT)))
1589 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1591 id = get_unallocated_leading_byte (dimension);
1593 if (NILP (doc_string))
1594 doc_string = build_string ("");
1596 if (NILP (registry))
1597 registry = build_string ("");
1599 if (NILP (short_name))
1600 XSETSTRING (short_name, XSYMBOL (name)->name);
1602 if (NILP (long_name))
1603 long_name = doc_string;
1606 columns = dimension;
1608 if (byte_offset < 0)
1612 else if (chars == 96)
1618 charset = make_charset (id, name, chars, dimension, columns, graphic,
1619 final, direction, short_name, long_name,
1620 doc_string, registry,
1621 Qnil, min_code, max_code, code_offset, byte_offset,
1622 mother, conversion);
1623 if (!NILP (ccl_program))
1624 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1628 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1630 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1631 NEW-NAME is the name of the new charset. Return the new charset.
1633 (charset, new_name))
1635 Lisp_Object new_charset = Qnil;
1636 int id, chars, dimension, columns, graphic, final;
1638 Lisp_Object registry, doc_string, short_name, long_name;
1641 charset = Fget_charset (charset);
1642 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1643 signal_simple_error ("Charset already has reverse-direction charset",
1646 CHECK_SYMBOL (new_name);
1647 if (!NILP (Ffind_charset (new_name)))
1648 signal_simple_error ("Cannot redefine existing charset", new_name);
1650 cs = XCHARSET (charset);
1652 chars = CHARSET_CHARS (cs);
1653 dimension = CHARSET_DIMENSION (cs);
1654 columns = CHARSET_COLUMNS (cs);
1655 id = get_unallocated_leading_byte (dimension);
1657 graphic = CHARSET_GRAPHIC (cs);
1658 final = CHARSET_FINAL (cs);
1659 direction = CHARSET_RIGHT_TO_LEFT;
1660 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1661 direction = CHARSET_LEFT_TO_RIGHT;
1662 doc_string = CHARSET_DOC_STRING (cs);
1663 short_name = CHARSET_SHORT_NAME (cs);
1664 long_name = CHARSET_LONG_NAME (cs);
1665 registry = CHARSET_REGISTRY (cs);
1667 new_charset = make_charset (id, new_name, chars, dimension, columns,
1668 graphic, final, direction, short_name, long_name,
1669 doc_string, registry,
1671 CHARSET_DECODING_TABLE(cs),
1672 CHARSET_MIN_CODE(cs),
1673 CHARSET_MAX_CODE(cs),
1674 CHARSET_CODE_OFFSET(cs),
1675 CHARSET_BYTE_OFFSET(cs),
1677 CHARSET_CONVERSION (cs)
1679 Qnil, 0, 0, 0, 0, Qnil, 0
1683 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1684 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1689 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1690 Define symbol ALIAS as an alias for CHARSET.
1694 CHECK_SYMBOL (alias);
1695 charset = Fget_charset (charset);
1696 return Fputhash (alias, charset, Vcharset_hash_table);
1699 /* #### Reverse direction charsets not yet implemented. */
1701 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1703 Return the reverse-direction charset parallel to CHARSET, if any.
1704 This is the charset with the same properties (in particular, the same
1705 dimension, number of characters per dimension, and final byte) as
1706 CHARSET but whose characters are displayed in the opposite direction.
1710 charset = Fget_charset (charset);
1711 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1715 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1716 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1717 If DIRECTION is omitted, both directions will be checked (left-to-right
1718 will be returned if character sets exist for both directions).
1720 (dimension, chars, final, direction))
1722 int dm, ch, fi, di = -1;
1723 Lisp_Object obj = Qnil;
1725 CHECK_INT (dimension);
1726 dm = XINT (dimension);
1727 if (dm < 1 || dm > 2)
1728 signal_simple_error ("Invalid value for DIMENSION", dimension);
1732 if (ch != 94 && ch != 96)
1733 signal_simple_error ("Invalid value for CHARS", chars);
1735 CHECK_CHAR_COERCE_INT (final);
1737 if (fi < '0' || fi > '~')
1738 signal_simple_error ("Invalid value for FINAL", final);
1740 if (EQ (direction, Ql2r))
1741 di = CHARSET_LEFT_TO_RIGHT;
1742 else if (EQ (direction, Qr2l))
1743 di = CHARSET_RIGHT_TO_LEFT;
1744 else if (!NILP (direction))
1745 signal_simple_error ("Invalid value for DIRECTION", direction);
1747 if (dm == 2 && fi > 0x5F)
1749 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1753 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1755 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1758 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1761 return XCHARSET_NAME (obj);
1765 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1766 Return short name of CHARSET.
1770 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1773 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1774 Return long name of CHARSET.
1778 return XCHARSET_LONG_NAME (Fget_charset (charset));
1781 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1782 Return description of CHARSET.
1786 return XCHARSET_DOC_STRING (Fget_charset (charset));
1789 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1790 Return dimension of CHARSET.
1794 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1797 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1798 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1799 Recognized properties are those listed in `make-charset', as well as
1800 'name and 'doc-string.
1806 charset = Fget_charset (charset);
1807 cs = XCHARSET (charset);
1809 CHECK_SYMBOL (prop);
1810 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1811 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1812 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1813 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1814 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1815 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1816 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1817 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1818 Qnil : make_char (CHARSET_FINAL (cs));
1819 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1820 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1821 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1822 if (EQ (prop, Qdirection))
1823 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1824 if (EQ (prop, Qreverse_direction_charset))
1826 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1827 /* #### Is this translation OK? If so, error checking sufficient? */
1828 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1831 if (EQ (prop, Qmother))
1832 return CHARSET_MOTHER (cs);
1833 if (EQ (prop, Qmin_code))
1834 return make_int (CHARSET_MIN_CODE (cs));
1835 if (EQ (prop, Qmax_code))
1836 return make_int (CHARSET_MAX_CODE (cs));
1838 signal_simple_error ("Unrecognized charset property name", prop);
1839 return Qnil; /* not reached */
1842 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1843 Return charset identification number of CHARSET.
1847 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1850 /* #### We need to figure out which properties we really want to
1853 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1854 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1856 (charset, ccl_program))
1858 struct ccl_program test_ccl;
1860 charset = Fget_charset (charset);
1861 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1862 signal_simple_error ("Invalid ccl-program", ccl_program);
1863 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1868 invalidate_charset_font_caches (Lisp_Object charset)
1870 /* Invalidate font cache entries for charset on all devices. */
1871 Lisp_Object devcons, concons, hash_table;
1872 DEVICE_LOOP_NO_BREAK (devcons, concons)
1874 struct device *d = XDEVICE (XCAR (devcons));
1875 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1876 if (!UNBOUNDP (hash_table))
1877 Fclrhash (hash_table);
1881 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1882 Set the 'registry property of CHARSET to REGISTRY.
1884 (charset, registry))
1886 charset = Fget_charset (charset);
1887 CHECK_STRING (registry);
1888 XCHARSET_REGISTRY (charset) = registry;
1889 invalidate_charset_font_caches (charset);
1890 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1895 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1896 Return mapping-table of CHARSET.
1900 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1903 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1904 Set mapping-table of CHARSET to TABLE.
1908 struct Lisp_Charset *cs;
1912 charset = Fget_charset (charset);
1913 cs = XCHARSET (charset);
1917 CHARSET_DECODING_TABLE(cs) = Qnil;
1920 else if (VECTORP (table))
1922 int ccs_len = CHARSET_BYTE_SIZE (cs);
1923 int ret = decoding_table_check_elements (table,
1924 CHARSET_DIMENSION (cs),
1929 signal_simple_error ("Too big table", table);
1931 signal_simple_error ("Invalid element is found", table);
1933 signal_simple_error ("Something wrong", table);
1935 CHARSET_DECODING_TABLE(cs) = Qnil;
1938 signal_error (Qwrong_type_argument,
1939 list2 (build_translated_string ("vector-or-nil-p"),
1942 byte_offset = CHARSET_BYTE_OFFSET (cs);
1943 switch (CHARSET_DIMENSION (cs))
1946 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1948 Lisp_Object c = XVECTOR_DATA(table)[i];
1951 Fput_char_attribute (c, XCHARSET_NAME (charset),
1952 make_int (i + byte_offset));
1956 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1958 Lisp_Object v = XVECTOR_DATA(table)[i];
1964 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1966 Lisp_Object c = XVECTOR_DATA(v)[j];
1970 (c, XCHARSET_NAME (charset),
1971 make_int ( ( (i + byte_offset) << 8 )
1977 Fput_char_attribute (v, XCHARSET_NAME (charset),
1978 make_int (i + byte_offset));
1987 /************************************************************************/
1988 /* Lisp primitives for working with characters */
1989 /************************************************************************/
1992 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
1993 Make a character from CHARSET and code-point CODE.
1994 If DEFINED_ONLY is non-nil, builtin character is not returned.
1995 If corresponding character is not found, nil is returned.
1997 (charset, code, defined_only))
2001 charset = Fget_charset (charset);
2004 if (XCHARSET_GRAPHIC (charset) == 1)
2006 if (NILP (defined_only))
2007 c = DECODE_CHAR (charset, c);
2009 c = decode_defined_char (charset, c);
2010 return c >= 0 ? make_char (c) : Qnil;
2013 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2014 Make a builtin character from CHARSET and code-point CODE.
2020 charset = Fget_charset (charset);
2022 if (EQ (charset, Vcharset_latin_viscii))
2024 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2030 (ret = Fget_char_attribute (chr,
2031 Vcharset_latin_viscii_lower,
2034 charset = Vcharset_latin_viscii_lower;
2038 (ret = Fget_char_attribute (chr,
2039 Vcharset_latin_viscii_upper,
2042 charset = Vcharset_latin_viscii_upper;
2049 if (XCHARSET_GRAPHIC (charset) == 1)
2052 c = decode_builtin_char (charset, c);
2053 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2057 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2058 Make a character from CHARSET and octets ARG1 and ARG2.
2059 ARG2 is required only for characters from two-dimensional charsets.
2060 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2061 character s with caron.
2063 (charset, arg1, arg2))
2067 int lowlim, highlim;
2069 charset = Fget_charset (charset);
2070 cs = XCHARSET (charset);
2072 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2073 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2075 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2077 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2078 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2081 /* It is useful (and safe, according to Olivier Galibert) to strip
2082 the 8th bit off ARG1 and ARG2 because it allows programmers to
2083 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2084 Latin 2 code of the character. */
2092 if (a1 < lowlim || a1 > highlim)
2093 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2095 if (CHARSET_DIMENSION (cs) == 1)
2099 ("Charset is of dimension one; second octet must be nil", arg2);
2100 return make_char (MAKE_CHAR (charset, a1, 0));
2109 a2 = XINT (arg2) & 0x7f;
2111 if (a2 < lowlim || a2 > highlim)
2112 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2114 return make_char (MAKE_CHAR (charset, a1, a2));
2117 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2118 Return the character set of CHARACTER.
2122 CHECK_CHAR_COERCE_INT (character);
2124 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2127 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2128 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2129 N defaults to 0 if omitted.
2133 Lisp_Object charset;
2136 CHECK_CHAR_COERCE_INT (character);
2138 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2140 if (NILP (n) || EQ (n, Qzero))
2141 return make_int (octet0);
2142 else if (EQ (n, make_int (1)))
2143 return make_int (octet1);
2145 signal_simple_error ("Octet number must be 0 or 1", n);
2149 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2150 Return code-point of CHARACTER in specified CHARSET.
2152 (character, charset, defined_only))
2156 CHECK_CHAR_COERCE_INT (character);
2157 charset = Fget_charset (charset);
2158 code_point = charset_code_point (charset, XCHAR (character),
2159 !NILP (defined_only));
2160 if (code_point >= 0)
2161 return make_int (code_point);
2167 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2168 Return list of charset and one or two position-codes of CHARACTER.
2172 /* This function can GC */
2173 struct gcpro gcpro1, gcpro2;
2174 Lisp_Object charset = Qnil;
2175 Lisp_Object rc = Qnil;
2183 GCPRO2 (charset, rc);
2184 CHECK_CHAR_COERCE_INT (character);
2187 code_point = ENCODE_CHAR (XCHAR (character), charset);
2188 dimension = XCHARSET_DIMENSION (charset);
2189 while (dimension > 0)
2191 rc = Fcons (make_int (code_point & 255), rc);
2195 rc = Fcons (XCHARSET_NAME (charset), rc);
2197 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2199 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2201 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2205 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2214 #ifdef ENABLE_COMPOSITE_CHARS
2215 /************************************************************************/
2216 /* composite character functions */
2217 /************************************************************************/
2220 lookup_composite_char (Bufbyte *str, int len)
2222 Lisp_Object lispstr = make_string (str, len);
2223 Lisp_Object ch = Fgethash (lispstr,
2224 Vcomposite_char_string2char_hash_table,
2230 if (composite_char_row_next >= 128)
2231 signal_simple_error ("No more composite chars available", lispstr);
2232 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2233 composite_char_col_next);
2234 Fputhash (make_char (emch), lispstr,
2235 Vcomposite_char_char2string_hash_table);
2236 Fputhash (lispstr, make_char (emch),
2237 Vcomposite_char_string2char_hash_table);
2238 composite_char_col_next++;
2239 if (composite_char_col_next >= 128)
2241 composite_char_col_next = 32;
2242 composite_char_row_next++;
2251 composite_char_string (Emchar ch)
2253 Lisp_Object str = Fgethash (make_char (ch),
2254 Vcomposite_char_char2string_hash_table,
2256 assert (!UNBOUNDP (str));
2260 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2261 Convert a string into a single composite character.
2262 The character is the result of overstriking all the characters in
2267 CHECK_STRING (string);
2268 return make_char (lookup_composite_char (XSTRING_DATA (string),
2269 XSTRING_LENGTH (string)));
2272 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2273 Return a string of the characters comprising a composite character.
2281 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2282 signal_simple_error ("Must be composite char", ch);
2283 return composite_char_string (emch);
2285 #endif /* ENABLE_COMPOSITE_CHARS */
2288 /************************************************************************/
2289 /* initialization */
2290 /************************************************************************/
2293 syms_of_mule_charset (void)
2295 INIT_LRECORD_IMPLEMENTATION (charset);
2297 DEFSUBR (Fcharsetp);
2298 DEFSUBR (Ffind_charset);
2299 DEFSUBR (Fget_charset);
2300 DEFSUBR (Fcharset_list);
2301 DEFSUBR (Fcharset_name);
2302 DEFSUBR (Fmake_charset);
2303 DEFSUBR (Fmake_reverse_direction_charset);
2304 /* DEFSUBR (Freverse_direction_charset); */
2305 DEFSUBR (Fdefine_charset_alias);
2306 DEFSUBR (Fcharset_from_attributes);
2307 DEFSUBR (Fcharset_short_name);
2308 DEFSUBR (Fcharset_long_name);
2309 DEFSUBR (Fcharset_description);
2310 DEFSUBR (Fcharset_dimension);
2311 DEFSUBR (Fcharset_property);
2312 DEFSUBR (Fcharset_id);
2313 DEFSUBR (Fset_charset_ccl_program);
2314 DEFSUBR (Fset_charset_registry);
2316 DEFSUBR (Fcharset_mapping_table);
2317 DEFSUBR (Fset_charset_mapping_table);
2321 DEFSUBR (Fdecode_char);
2322 DEFSUBR (Fdecode_builtin_char);
2323 DEFSUBR (Fencode_char);
2325 DEFSUBR (Fmake_char);
2326 DEFSUBR (Fchar_charset);
2327 DEFSUBR (Fchar_octet);
2328 DEFSUBR (Fsplit_char);
2330 #ifdef ENABLE_COMPOSITE_CHARS
2331 DEFSUBR (Fmake_composite_char);
2332 DEFSUBR (Fcomposite_char_string);
2335 defsymbol (&Qcharsetp, "charsetp");
2336 defsymbol (&Qregistry, "registry");
2337 defsymbol (&Qfinal, "final");
2338 defsymbol (&Qgraphic, "graphic");
2339 defsymbol (&Qdirection, "direction");
2340 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2341 defsymbol (&Qshort_name, "short-name");
2342 defsymbol (&Qlong_name, "long-name");
2344 defsymbol (&Qmother, "mother");
2345 defsymbol (&Qmin_code, "min-code");
2346 defsymbol (&Qmax_code, "max-code");
2347 defsymbol (&Qcode_offset, "code-offset");
2348 defsymbol (&Qconversion, "conversion");
2349 defsymbol (&Q94x60, "94x60");
2350 defsymbol (&Q94x94x60, "94x94x60");
2353 defsymbol (&Ql2r, "l2r");
2354 defsymbol (&Qr2l, "r2l");
2356 /* Charsets, compatible with FSF 20.3
2357 Naming convention is Script-Charset[-Edition] */
2358 defsymbol (&Qascii, "ascii");
2359 defsymbol (&Qcontrol_1, "control-1");
2360 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2361 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2362 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2363 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2364 defsymbol (&Qthai_tis620, "thai-tis620");
2365 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2366 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2367 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2368 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2369 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2370 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2371 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2372 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2373 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2374 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2375 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2376 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
2377 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2378 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2379 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2380 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2382 defsymbol (&Qucs, "ucs");
2383 defsymbol (&Qucs_bmp, "ucs-bmp");
2384 defsymbol (&Qucs_smp, "ucs-smp");
2385 defsymbol (&Qucs_sip, "ucs-sip");
2386 defsymbol (&Qucs_gb, "ucs-gb");
2387 defsymbol (&Qucs_cns, "ucs-cns");
2388 defsymbol (&Qucs_jis, "ucs-jis");
2389 defsymbol (&Qucs_ks, "ucs-ks");
2390 defsymbol (&Qlatin_viscii, "latin-viscii");
2391 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2392 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2393 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2394 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2395 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2396 defsymbol (&Qjis_x0208, "=jis-x0208");
2397 defsymbol (&Qchinese_big5, "chinese-big5");
2398 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2400 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2401 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2403 defsymbol (&Qcomposite, "composite");
2407 vars_of_mule_charset (void)
2414 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2415 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2417 /* Table of charsets indexed by leading byte. */
2418 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2419 chlook->charset_by_leading_byte[i] = Qnil;
2422 /* Table of charsets indexed by type/final-byte. */
2423 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2424 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2425 chlook->charset_by_attributes[i][j] = Qnil;
2427 /* Table of charsets indexed by type/final-byte/direction. */
2428 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2429 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2430 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2431 chlook->charset_by_attributes[i][j][k] = Qnil;
2435 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2437 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2438 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2442 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2443 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2444 Leading-code of private TYPE9N charset of column-width 1.
2446 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2450 Vdefault_coded_charset_priority_list = Qnil;
2451 DEFVAR_LISP ("default-coded-charset-priority-list",
2452 &Vdefault_coded_charset_priority_list /*
2453 Default order of preferred coded-character-sets.
2459 complex_vars_of_mule_charset (void)
2461 staticpro (&Vcharset_hash_table);
2462 Vcharset_hash_table =
2463 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2465 /* Predefined character sets. We store them into variables for
2469 staticpro (&Vcharset_ucs);
2471 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2472 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2473 build_string ("UCS"),
2474 build_string ("UCS"),
2475 build_string ("ISO/IEC 10646"),
2477 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2478 staticpro (&Vcharset_ucs_bmp);
2480 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2481 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2482 build_string ("BMP"),
2483 build_string ("UCS-BMP"),
2484 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2486 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2487 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2488 staticpro (&Vcharset_ucs_smp);
2490 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2491 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2492 build_string ("SMP"),
2493 build_string ("UCS-SMP"),
2494 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2495 build_string ("UCS00-1"),
2496 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2497 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2498 staticpro (&Vcharset_ucs_sip);
2500 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2501 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2502 build_string ("SIP"),
2503 build_string ("UCS-SIP"),
2504 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2505 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2506 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2507 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2508 staticpro (&Vcharset_ucs_gb);
2510 make_charset (LEADING_BYTE_UCS_GB, Qucs_gb, 256, 3,
2511 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2512 build_string ("UCS for GB"),
2513 build_string ("UCS for GB"),
2514 build_string ("ISO/IEC 10646 for GB"),
2516 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2517 staticpro (&Vcharset_ucs_cns);
2519 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
2520 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2521 build_string ("UCS for CNS"),
2522 build_string ("UCS for CNS 11643"),
2523 build_string ("ISO/IEC 10646 for CNS 11643"),
2525 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2526 staticpro (&Vcharset_ucs_jis);
2528 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
2529 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2530 build_string ("UCS for JIS"),
2531 build_string ("UCS for JIS X 0208, 0212 and 0213"),
2533 ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
2535 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2536 staticpro (&Vcharset_ucs_ks);
2538 make_charset (LEADING_BYTE_UCS_KS, Qucs_ks, 256, 3,
2539 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2540 build_string ("UCS for KS"),
2541 build_string ("UCS for CCS defined by KS"),
2542 build_string ("ISO/IEC 10646 for Korean Standards"),
2544 Qnil, 0, 0, 0, 0, Vcharset_ucs, CONVERSION_IDENTICAL);
2546 # define MIN_CHAR_THAI 0
2547 # define MAX_CHAR_THAI 0
2548 /* # define MIN_CHAR_HEBREW 0 */
2549 /* # define MAX_CHAR_HEBREW 0 */
2550 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2551 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2553 staticpro (&Vcharset_ascii);
2555 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2556 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2557 build_string ("ASCII"),
2558 build_string ("ASCII)"),
2559 build_string ("ASCII (ISO646 IRV)"),
2560 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2561 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2562 staticpro (&Vcharset_control_1);
2563 Vcharset_control_1 =
2564 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2565 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2566 build_string ("C1"),
2567 build_string ("Control characters"),
2568 build_string ("Control characters 128-191"),
2570 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2571 staticpro (&Vcharset_latin_iso8859_1);
2572 Vcharset_latin_iso8859_1 =
2573 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2574 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2575 build_string ("Latin-1"),
2576 build_string ("ISO8859-1 (Latin-1)"),
2577 build_string ("ISO8859-1 (Latin-1)"),
2578 build_string ("iso8859-1"),
2579 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2580 staticpro (&Vcharset_latin_iso8859_2);
2581 Vcharset_latin_iso8859_2 =
2582 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2583 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2584 build_string ("Latin-2"),
2585 build_string ("ISO8859-2 (Latin-2)"),
2586 build_string ("ISO8859-2 (Latin-2)"),
2587 build_string ("iso8859-2"),
2588 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2589 staticpro (&Vcharset_latin_iso8859_3);
2590 Vcharset_latin_iso8859_3 =
2591 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2592 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2593 build_string ("Latin-3"),
2594 build_string ("ISO8859-3 (Latin-3)"),
2595 build_string ("ISO8859-3 (Latin-3)"),
2596 build_string ("iso8859-3"),
2597 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2598 staticpro (&Vcharset_latin_iso8859_4);
2599 Vcharset_latin_iso8859_4 =
2600 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2601 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2602 build_string ("Latin-4"),
2603 build_string ("ISO8859-4 (Latin-4)"),
2604 build_string ("ISO8859-4 (Latin-4)"),
2605 build_string ("iso8859-4"),
2606 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2607 staticpro (&Vcharset_thai_tis620);
2608 Vcharset_thai_tis620 =
2609 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2610 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2611 build_string ("TIS620"),
2612 build_string ("TIS620 (Thai)"),
2613 build_string ("TIS620.2529 (Thai)"),
2614 build_string ("tis620"),
2615 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2616 staticpro (&Vcharset_greek_iso8859_7);
2617 Vcharset_greek_iso8859_7 =
2618 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2619 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2620 build_string ("ISO8859-7"),
2621 build_string ("ISO8859-7 (Greek)"),
2622 build_string ("ISO8859-7 (Greek)"),
2623 build_string ("iso8859-7"),
2624 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2625 staticpro (&Vcharset_arabic_iso8859_6);
2626 Vcharset_arabic_iso8859_6 =
2627 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2628 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2629 build_string ("ISO8859-6"),
2630 build_string ("ISO8859-6 (Arabic)"),
2631 build_string ("ISO8859-6 (Arabic)"),
2632 build_string ("iso8859-6"),
2633 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2634 staticpro (&Vcharset_hebrew_iso8859_8);
2635 Vcharset_hebrew_iso8859_8 =
2636 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2637 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2638 build_string ("ISO8859-8"),
2639 build_string ("ISO8859-8 (Hebrew)"),
2640 build_string ("ISO8859-8 (Hebrew)"),
2641 build_string ("iso8859-8"),
2643 0 /* MIN_CHAR_HEBREW */,
2644 0 /* MAX_CHAR_HEBREW */, 0, 32,
2645 Qnil, CONVERSION_IDENTICAL);
2646 staticpro (&Vcharset_katakana_jisx0201);
2647 Vcharset_katakana_jisx0201 =
2648 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2649 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2650 build_string ("JISX0201 Kana"),
2651 build_string ("JISX0201.1976 (Japanese Kana)"),
2652 build_string ("JISX0201.1976 Japanese Kana"),
2653 build_string ("jisx0201\\.1976"),
2654 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2655 staticpro (&Vcharset_latin_jisx0201);
2656 Vcharset_latin_jisx0201 =
2657 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2658 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2659 build_string ("JISX0201 Roman"),
2660 build_string ("JISX0201.1976 (Japanese Roman)"),
2661 build_string ("JISX0201.1976 Japanese Roman"),
2662 build_string ("jisx0201\\.1976"),
2663 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2664 staticpro (&Vcharset_cyrillic_iso8859_5);
2665 Vcharset_cyrillic_iso8859_5 =
2666 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2667 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2668 build_string ("ISO8859-5"),
2669 build_string ("ISO8859-5 (Cyrillic)"),
2670 build_string ("ISO8859-5 (Cyrillic)"),
2671 build_string ("iso8859-5"),
2672 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2673 staticpro (&Vcharset_latin_iso8859_9);
2674 Vcharset_latin_iso8859_9 =
2675 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2676 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2677 build_string ("Latin-5"),
2678 build_string ("ISO8859-9 (Latin-5)"),
2679 build_string ("ISO8859-9 (Latin-5)"),
2680 build_string ("iso8859-9"),
2681 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2683 staticpro (&Vcharset_jis_x0208);
2684 Vcharset_jis_x0208 =
2685 make_charset (LEADING_BYTE_JIS_X0208,
2687 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2688 build_string ("JIS X0208"),
2689 build_string ("JIS X0208 Common"),
2690 build_string ("JIS X0208 Common part"),
2691 build_string ("jisx0208\\.1990"),
2693 MIN_CHAR_JIS_X0208_1990,
2694 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2695 Qnil, CONVERSION_94x94);
2697 staticpro (&Vcharset_japanese_jisx0208_1978);
2698 Vcharset_japanese_jisx0208_1978 =
2699 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2700 Qjapanese_jisx0208_1978, 94, 2,
2701 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2702 build_string ("JIS X0208:1978"),
2703 build_string ("JIS X0208:1978 (Japanese)"),
2705 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2706 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2713 CONVERSION_IDENTICAL);
2714 staticpro (&Vcharset_chinese_gb2312);
2715 Vcharset_chinese_gb2312 =
2716 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2717 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2718 build_string ("GB2312"),
2719 build_string ("GB2312)"),
2720 build_string ("GB2312 Chinese simplified"),
2721 build_string ("gb2312"),
2722 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2723 staticpro (&Vcharset_chinese_gb12345);
2724 Vcharset_chinese_gb12345 =
2725 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2726 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2727 build_string ("G1"),
2728 build_string ("GB 12345)"),
2729 build_string ("GB 12345-1990"),
2730 build_string ("GB12345\\(\\.1990\\)?-0"),
2731 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2732 staticpro (&Vcharset_japanese_jisx0208);
2733 Vcharset_japanese_jisx0208 =
2734 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2735 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2736 build_string ("JISX0208"),
2737 build_string ("JIS X0208:1983 (Japanese)"),
2738 build_string ("JIS X0208:1983 Japanese Kanji"),
2739 build_string ("jisx0208\\.1983"),
2746 CONVERSION_IDENTICAL);
2748 staticpro (&Vcharset_japanese_jisx0208_1990);
2749 Vcharset_japanese_jisx0208_1990 =
2750 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2751 Qjapanese_jisx0208_1990, 94, 2,
2752 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2753 build_string ("JISX0208-1990"),
2754 build_string ("JIS X0208:1990 (Japanese)"),
2755 build_string ("JIS X0208:1990 Japanese Kanji"),
2756 build_string ("jisx0208\\.1990"),
2758 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
2759 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
2760 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
2761 Vcharset_jis_x0208 /* Qnil */,
2762 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
2764 staticpro (&Vcharset_korean_ksc5601);
2765 Vcharset_korean_ksc5601 =
2766 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2767 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2768 build_string ("KSC5601"),
2769 build_string ("KSC5601 (Korean"),
2770 build_string ("KSC5601 Korean Hangul and Hanja"),
2771 build_string ("ksc5601"),
2772 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2773 staticpro (&Vcharset_japanese_jisx0212);
2774 Vcharset_japanese_jisx0212 =
2775 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2776 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2777 build_string ("JISX0212"),
2778 build_string ("JISX0212 (Japanese)"),
2779 build_string ("JISX0212 Japanese Supplement"),
2780 build_string ("jisx0212"),
2781 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2783 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2784 staticpro (&Vcharset_chinese_cns11643_1);
2785 Vcharset_chinese_cns11643_1 =
2786 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2787 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2788 build_string ("CNS11643-1"),
2789 build_string ("CNS11643-1 (Chinese traditional)"),
2791 ("CNS 11643 Plane 1 Chinese traditional"),
2792 build_string (CHINESE_CNS_PLANE_RE("1")),
2793 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2794 staticpro (&Vcharset_chinese_cns11643_2);
2795 Vcharset_chinese_cns11643_2 =
2796 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2797 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2798 build_string ("CNS11643-2"),
2799 build_string ("CNS11643-2 (Chinese traditional)"),
2801 ("CNS 11643 Plane 2 Chinese traditional"),
2802 build_string (CHINESE_CNS_PLANE_RE("2")),
2803 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2805 staticpro (&Vcharset_latin_tcvn5712);
2806 Vcharset_latin_tcvn5712 =
2807 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2808 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2809 build_string ("TCVN 5712"),
2810 build_string ("TCVN 5712 (VSCII-2)"),
2811 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2812 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2813 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2814 staticpro (&Vcharset_latin_viscii_lower);
2815 Vcharset_latin_viscii_lower =
2816 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2817 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2818 build_string ("VISCII lower"),
2819 build_string ("VISCII lower (Vietnamese)"),
2820 build_string ("VISCII lower (Vietnamese)"),
2821 build_string ("MULEVISCII-LOWER"),
2822 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2823 staticpro (&Vcharset_latin_viscii_upper);
2824 Vcharset_latin_viscii_upper =
2825 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2826 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2827 build_string ("VISCII upper"),
2828 build_string ("VISCII upper (Vietnamese)"),
2829 build_string ("VISCII upper (Vietnamese)"),
2830 build_string ("MULEVISCII-UPPER"),
2831 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2832 staticpro (&Vcharset_latin_viscii);
2833 Vcharset_latin_viscii =
2834 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2835 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2836 build_string ("VISCII"),
2837 build_string ("VISCII 1.1 (Vietnamese)"),
2838 build_string ("VISCII 1.1 (Vietnamese)"),
2839 build_string ("VISCII1\\.1"),
2840 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2841 staticpro (&Vcharset_chinese_big5);
2842 Vcharset_chinese_big5 =
2843 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2844 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2845 build_string ("Big5"),
2846 build_string ("Big5"),
2847 build_string ("Big5 Chinese traditional"),
2848 build_string ("big5-0"),
2850 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2851 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2853 staticpro (&Vcharset_ethiopic_ucs);
2854 Vcharset_ethiopic_ucs =
2855 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
2856 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2857 build_string ("Ethiopic (UCS)"),
2858 build_string ("Ethiopic (UCS)"),
2859 build_string ("Ethiopic of UCS"),
2860 build_string ("Ethiopic-Unicode"),
2861 Qnil, 0x1200, 0x137F, 0, 0,
2862 Qnil, CONVERSION_IDENTICAL);
2864 staticpro (&Vcharset_chinese_big5_1);
2865 Vcharset_chinese_big5_1 =
2866 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
2867 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
2868 build_string ("Big5"),
2869 build_string ("Big5 (Level-1)"),
2871 ("Big5 Level-1 Chinese traditional"),
2872 build_string ("big5"),
2873 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2874 staticpro (&Vcharset_chinese_big5_2);
2875 Vcharset_chinese_big5_2 =
2876 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
2877 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
2878 build_string ("Big5"),
2879 build_string ("Big5 (Level-2)"),
2881 ("Big5 Level-2 Chinese traditional"),
2882 build_string ("big5"),
2883 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2885 #ifdef ENABLE_COMPOSITE_CHARS
2886 /* #### For simplicity, we put composite chars into a 96x96 charset.
2887 This is going to lead to problems because you can run out of
2888 room, esp. as we don't yet recycle numbers. */
2889 staticpro (&Vcharset_composite);
2890 Vcharset_composite =
2891 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
2892 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2893 build_string ("Composite"),
2894 build_string ("Composite characters"),
2895 build_string ("Composite characters"),
2898 /* #### not dumped properly */
2899 composite_char_row_next = 32;
2900 composite_char_col_next = 32;
2902 Vcomposite_char_string2char_hash_table =
2903 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2904 Vcomposite_char_char2string_hash_table =
2905 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2906 staticpro (&Vcomposite_char_string2char_hash_table);
2907 staticpro (&Vcomposite_char_char2string_hash_table);
2908 #endif /* ENABLE_COMPOSITE_CHARS */