1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_latin_viscii;
72 Lisp_Object Vcharset_latin_tcvn5712;
73 Lisp_Object Vcharset_latin_viscii_lower;
74 Lisp_Object Vcharset_latin_viscii_upper;
75 Lisp_Object Vcharset_jis_x0208;
76 Lisp_Object Vcharset_chinese_big5;
77 Lisp_Object Vcharset_ethiopic_ucs;
79 Lisp_Object Vcharset_chinese_big5_1;
80 Lisp_Object Vcharset_chinese_big5_2;
82 #ifdef ENABLE_COMPOSITE_CHARS
83 Lisp_Object Vcharset_composite;
85 /* Hash tables for composite chars. One maps string representing
86 composed chars to their equivalent chars; one goes the
88 Lisp_Object Vcomposite_char_char2string_hash_table;
89 Lisp_Object Vcomposite_char_string2char_hash_table;
91 static int composite_char_row_next;
92 static int composite_char_col_next;
94 #endif /* ENABLE_COMPOSITE_CHARS */
96 struct charset_lookup *chlook;
98 static const struct lrecord_description charset_lookup_description_1[] = {
99 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
101 NUM_LEADING_BYTES+4*128
108 static const struct struct_description charset_lookup_description = {
109 sizeof (struct charset_lookup),
110 charset_lookup_description_1
114 /* Table of number of bytes in the string representation of a character
115 indexed by the first byte of that representation.
117 rep_bytes_by_first_byte(c) is more efficient than the equivalent
118 canonical computation:
120 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
122 const Bytecount rep_bytes_by_first_byte[0xA0] =
123 { /* 0x00 - 0x7f are for straight ASCII */
124 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
125 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
126 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
127 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 /* 0x80 - 0x8f are for Dimension-1 official charsets */
134 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
136 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
138 /* 0x90 - 0x9d are for Dimension-2 official charsets */
139 /* 0x9e is for Dimension-1 private charsets */
140 /* 0x9f is for Dimension-2 private charsets */
141 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
147 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
149 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
153 if (XVECTOR_LENGTH (v) > ccs_len)
156 for (i = 0; i < XVECTOR_LENGTH (v); i++)
158 Lisp_Object c = XVECTOR_DATA(v)[i];
160 if (!NILP (c) && !CHARP (c))
164 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
176 put_char_ccs_code_point (Lisp_Object character,
177 Lisp_Object ccs, Lisp_Object value)
179 if (!EQ (XCHARSET_NAME (ccs), Qucs)
181 || (XCHAR (character) != XINT (value)))
183 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
187 { /* obsolete representation: value must be a list of bytes */
188 Lisp_Object ret = Fcar (value);
192 signal_simple_error ("Invalid value for coded-charset", value);
193 code_point = XINT (ret);
194 if (XCHARSET_GRAPHIC (ccs) == 1)
202 signal_simple_error ("Invalid value for coded-charset",
206 signal_simple_error ("Invalid value for coded-charset",
209 if (XCHARSET_GRAPHIC (ccs) == 1)
211 code_point = (code_point << 8) | j;
214 value = make_int (code_point);
216 else if (INTP (value))
218 code_point = XINT (value);
219 if (XCHARSET_GRAPHIC (ccs) == 1)
221 code_point &= 0x7F7F7F7F;
222 value = make_int (code_point);
226 signal_simple_error ("Invalid value for coded-charset", value);
230 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
233 decoding_table_remove_char (ccs, XINT (cpos));
236 decoding_table_put_char (ccs, code_point, character);
242 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
244 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
245 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
247 if (VECTORP (decoding_table))
249 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
253 decoding_table_remove_char (ccs, XINT (cpos));
256 if (CHAR_TABLEP (encoding_table))
258 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
266 int leading_code_private_11;
269 Lisp_Object Qcharsetp;
271 /* Qdoc_string, Qdimension, Qchars defined in general.c */
272 Lisp_Object Qregistry, Qfinal, Qgraphic;
273 Lisp_Object Qdirection;
274 Lisp_Object Qreverse_direction_charset;
275 Lisp_Object Qleading_byte;
276 Lisp_Object Qshort_name, Qlong_name;
278 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
279 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
296 Qjapanese_jisx0208_1978,
313 Qvietnamese_viscii_lower,
314 Qvietnamese_viscii_upper,
324 Lisp_Object Ql2r, Qr2l;
326 Lisp_Object Vcharset_hash_table;
328 /* Composite characters are characters constructed by overstriking two
329 or more regular characters.
331 1) The old Mule implementation involves storing composite characters
332 in a buffer as a tag followed by all of the actual characters
333 used to make up the composite character. I think this is a bad
334 idea; it greatly complicates code that wants to handle strings
335 one character at a time because it has to deal with the possibility
336 of great big ungainly characters. It's much more reasonable to
337 simply store an index into a table of composite characters.
339 2) The current implementation only allows for 16,384 separate
340 composite characters over the lifetime of the XEmacs process.
341 This could become a potential problem if the user
342 edited lots of different files that use composite characters.
343 Due to FSF bogosity, increasing the number of allowable
344 composite characters under Mule would decrease the number
345 of possible faces that can exist. Mule already has shrunk
346 this to 2048, and further shrinkage would become uncomfortable.
347 No such problems exist in XEmacs.
349 Composite characters could be represented as 0x80 C1 C2 C3,
350 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
351 for slightly under 2^20 (one million) composite characters
352 over the XEmacs process lifetime, and you only need to
353 increase the size of a Mule character from 19 to 21 bits.
354 Or you could use 0x80 C1 C2 C3 C4, allowing for about
355 85 million (slightly over 2^26) composite characters. */
358 /************************************************************************/
359 /* Basic Emchar functions */
360 /************************************************************************/
362 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
363 string in STR. Returns the number of bytes stored.
364 Do not call this directly. Use the macro set_charptr_emchar() instead.
368 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
383 else if ( c <= 0x7ff )
385 *p++ = (c >> 6) | 0xc0;
386 *p++ = (c & 0x3f) | 0x80;
388 else if ( c <= 0xffff )
390 *p++ = (c >> 12) | 0xe0;
391 *p++ = ((c >> 6) & 0x3f) | 0x80;
392 *p++ = (c & 0x3f) | 0x80;
394 else if ( c <= 0x1fffff )
396 *p++ = (c >> 18) | 0xf0;
397 *p++ = ((c >> 12) & 0x3f) | 0x80;
398 *p++ = ((c >> 6) & 0x3f) | 0x80;
399 *p++ = (c & 0x3f) | 0x80;
401 else if ( c <= 0x3ffffff )
403 *p++ = (c >> 24) | 0xf8;
404 *p++ = ((c >> 18) & 0x3f) | 0x80;
405 *p++ = ((c >> 12) & 0x3f) | 0x80;
406 *p++ = ((c >> 6) & 0x3f) | 0x80;
407 *p++ = (c & 0x3f) | 0x80;
411 *p++ = (c >> 30) | 0xfc;
412 *p++ = ((c >> 24) & 0x3f) | 0x80;
413 *p++ = ((c >> 18) & 0x3f) | 0x80;
414 *p++ = ((c >> 12) & 0x3f) | 0x80;
415 *p++ = ((c >> 6) & 0x3f) | 0x80;
416 *p++ = (c & 0x3f) | 0x80;
419 BREAKUP_CHAR (c, charset, c1, c2);
420 lb = CHAR_LEADING_BYTE (c);
421 if (LEADING_BYTE_PRIVATE_P (lb))
422 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
424 if (EQ (charset, Vcharset_control_1))
433 /* Return the first character from a Mule-encoded string in STR,
434 assuming it's non-ASCII. Do not call this directly.
435 Use the macro charptr_emchar() instead. */
438 non_ascii_charptr_emchar (const Bufbyte *str)
451 else if ( b >= 0xf8 )
456 else if ( b >= 0xf0 )
461 else if ( b >= 0xe0 )
466 else if ( b >= 0xc0 )
476 for( ; len > 0; len-- )
479 ch = ( ch << 6 ) | ( b & 0x3f );
483 Bufbyte i0 = *str, i1, i2 = 0;
486 if (i0 == LEADING_BYTE_CONTROL_1)
487 return (Emchar) (*++str - 0x20);
489 if (LEADING_BYTE_PREFIX_P (i0))
494 charset = CHARSET_BY_LEADING_BYTE (i0);
495 if (XCHARSET_DIMENSION (charset) == 2)
498 return MAKE_CHAR (charset, i1, i2);
502 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
503 Do not call this directly. Use the macro valid_char_p() instead. */
507 non_ascii_valid_char_p (Emchar ch)
511 /* Must have only lowest 19 bits set */
515 f1 = CHAR_FIELD1 (ch);
516 f2 = CHAR_FIELD2 (ch);
517 f3 = CHAR_FIELD3 (ch);
523 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
524 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
525 f2 > MAX_CHAR_FIELD2_PRIVATE)
530 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
531 f2 <= MAX_CHAR_FIELD2_PRIVATE))
535 NOTE: This takes advantage of the fact that
536 FIELD2_TO_OFFICIAL_LEADING_BYTE and
537 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
539 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
540 if (EQ (charset, Qnil))
542 return (XCHARSET_CHARS (charset) == 96);
548 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
549 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
550 f1 > MAX_CHAR_FIELD1_PRIVATE)
552 if (f2 < 0x20 || f3 < 0x20)
555 #ifdef ENABLE_COMPOSITE_CHARS
556 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
558 if (UNBOUNDP (Fgethash (make_int (ch),
559 Vcomposite_char_char2string_hash_table,
564 #endif /* ENABLE_COMPOSITE_CHARS */
566 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
567 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
570 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
572 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
575 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
577 if (EQ (charset, Qnil))
579 return (XCHARSET_CHARS (charset) == 96);
585 /************************************************************************/
586 /* Basic string functions */
587 /************************************************************************/
589 /* Copy the character pointed to by SRC into DST. Do not call this
590 directly. Use the macro charptr_copy_char() instead.
591 Return the number of bytes copied. */
594 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
596 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
598 for (i = bytes; i; i--, dst++, src++)
604 /************************************************************************/
605 /* streams of Emchars */
606 /************************************************************************/
608 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
609 The functions below are not meant to be called directly; use
610 the macros in insdel.h. */
613 Lstream_get_emchar_1 (Lstream *stream, int ch)
615 Bufbyte str[MAX_EMCHAR_LEN];
616 Bufbyte *strptr = str;
619 str[0] = (Bufbyte) ch;
621 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
623 int c = Lstream_getc (stream);
624 bufpos_checking_assert (c >= 0);
625 *++strptr = (Bufbyte) c;
627 return charptr_emchar (str);
631 Lstream_fput_emchar (Lstream *stream, Emchar ch)
633 Bufbyte str[MAX_EMCHAR_LEN];
634 Bytecount len = set_charptr_emchar (str, ch);
635 return Lstream_write (stream, str, len);
639 Lstream_funget_emchar (Lstream *stream, Emchar ch)
641 Bufbyte str[MAX_EMCHAR_LEN];
642 Bytecount len = set_charptr_emchar (str, ch);
643 Lstream_unread (stream, str, len);
647 /************************************************************************/
649 /************************************************************************/
652 mark_charset (Lisp_Object obj)
654 Lisp_Charset *cs = XCHARSET (obj);
656 mark_object (cs->short_name);
657 mark_object (cs->long_name);
658 mark_object (cs->doc_string);
659 mark_object (cs->registry);
660 mark_object (cs->ccl_program);
662 mark_object (cs->decoding_table);
663 mark_object (cs->mother);
669 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
671 Lisp_Charset *cs = XCHARSET (obj);
675 error ("printing unreadable object #<charset %s 0x%x>",
676 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
679 write_c_string ("#<charset ", printcharfun);
680 print_internal (CHARSET_NAME (cs), printcharfun, 0);
681 write_c_string (" ", printcharfun);
682 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
683 write_c_string (" ", printcharfun);
684 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
685 write_c_string (" ", printcharfun);
686 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
687 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
689 CHARSET_DIMENSION (cs),
690 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
691 CHARSET_COLUMNS (cs),
692 CHARSET_GRAPHIC (cs),
694 write_c_string (buf, printcharfun);
695 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
696 sprintf (buf, " 0x%x>", cs->header.uid);
697 write_c_string (buf, printcharfun);
700 static const struct lrecord_description charset_description[] = {
701 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
702 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
703 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
704 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
705 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
706 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
707 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
709 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
710 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
715 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
716 mark_charset, print_charset, 0, 0, 0,
720 /* Make a new charset. */
721 /* #### SJT Should generic properties be allowed? */
723 make_charset (Charset_ID id, Lisp_Object name,
724 unsigned short chars, unsigned char dimension,
725 unsigned char columns, unsigned char graphic,
726 Bufbyte final, unsigned char direction, Lisp_Object short_name,
727 Lisp_Object long_name, Lisp_Object doc,
729 Lisp_Object decoding_table,
730 Emchar min_code, Emchar max_code,
731 Emchar code_offset, unsigned char byte_offset,
732 Lisp_Object mother, unsigned char conversion)
735 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
739 XSETCHARSET (obj, cs);
741 CHARSET_ID (cs) = id;
742 CHARSET_NAME (cs) = name;
743 CHARSET_SHORT_NAME (cs) = short_name;
744 CHARSET_LONG_NAME (cs) = long_name;
745 CHARSET_CHARS (cs) = chars;
746 CHARSET_DIMENSION (cs) = dimension;
747 CHARSET_DIRECTION (cs) = direction;
748 CHARSET_COLUMNS (cs) = columns;
749 CHARSET_GRAPHIC (cs) = graphic;
750 CHARSET_FINAL (cs) = final;
751 CHARSET_DOC_STRING (cs) = doc;
752 CHARSET_REGISTRY (cs) = reg;
753 CHARSET_CCL_PROGRAM (cs) = Qnil;
754 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
756 CHARSET_DECODING_TABLE(cs) = Qunbound;
757 CHARSET_MIN_CODE (cs) = min_code;
758 CHARSET_MAX_CODE (cs) = max_code;
759 CHARSET_CODE_OFFSET (cs) = code_offset;
760 CHARSET_BYTE_OFFSET (cs) = byte_offset;
761 CHARSET_MOTHER (cs) = mother;
762 CHARSET_CONVERSION (cs) = conversion;
766 if (id == LEADING_BYTE_ASCII)
767 CHARSET_REP_BYTES (cs) = 1;
769 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
771 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
776 /* some charsets do not have final characters. This includes
777 ASCII, Control-1, Composite, and the two faux private
779 unsigned char iso2022_type
780 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
782 if (code_offset == 0)
784 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
785 chlook->charset_by_attributes[iso2022_type][final] = obj;
789 (chlook->charset_by_attributes[iso2022_type][final][direction]));
790 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
794 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
795 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
797 /* Some charsets are "faux" and don't have names or really exist at
798 all except in the leading-byte table. */
800 Fputhash (name, obj, Vcharset_hash_table);
805 get_unallocated_leading_byte (int dimension)
810 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
813 lb = chlook->next_allocated_leading_byte++;
817 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
820 lb = chlook->next_allocated_1_byte_leading_byte++;
824 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
827 lb = chlook->next_allocated_2_byte_leading_byte++;
833 ("No more character sets free for this dimension",
834 make_int (dimension));
840 /* Number of Big5 characters which have the same code in 1st byte. */
842 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
845 decode_defined_char (Lisp_Object ccs, int code_point)
847 int dim = XCHARSET_DIMENSION (ccs);
848 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
856 = get_ccs_octet_table (decoding_table, ccs,
857 (code_point >> (dim * 8)) & 255);
859 if (CHARP (decoding_table))
860 return XCHAR (decoding_table);
863 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
865 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
867 if ( EQ (mother, Vcharset_ucs) )
868 return DECODE_CHAR (mother, code_point);
870 return decode_defined_char (mother, code_point);
877 decode_builtin_char (Lisp_Object charset, int code_point)
879 Lisp_Object mother = XCHARSET_MOTHER (charset);
882 if ( XCHARSET_MAX_CODE (charset) > 0 )
884 if ( CHARSETP (mother) )
886 int code = code_point;
888 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
890 int row = code_point >> 8;
891 int cell = code_point & 255;
895 else if (row < 16 + 32 + 30)
896 code = (row - (16 + 32)) * 94 + cell - 33;
897 else if (row < 18 + 32 + 30)
899 else if (row < 18 + 32 + 60)
900 code = (row - (18 + 32)) * 94 + cell - 33;
902 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
904 int plane = code_point >> 16;
905 int row = (code_point >> 8) & 255;
906 int cell = code_point & 255;
910 else if (row < 16 + 32 + 30)
912 = (plane - 33) * 94 * 60
913 + (row - (16 + 32)) * 94
915 else if (row < 18 + 32 + 30)
917 else if (row < 18 + 32 + 60)
919 = (plane - 33) * 94 * 60
920 + (row - (18 + 32)) * 94
924 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
929 = (XCHARSET_DIMENSION (charset) == 1
931 code_point - XCHARSET_BYTE_OFFSET (charset)
933 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
934 * XCHARSET_CHARS (charset)
935 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
936 + XCHARSET_CODE_OFFSET (charset);
937 if ((cid < XCHARSET_MIN_CODE (charset))
938 || (XCHARSET_MAX_CODE (charset) < cid))
943 else if ((final = XCHARSET_FINAL (charset)) >= '0')
945 if (XCHARSET_DIMENSION (charset) == 1)
947 switch (XCHARSET_CHARS (charset))
951 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
954 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
962 switch (XCHARSET_CHARS (charset))
965 return MIN_CHAR_94x94
966 + (final - '0') * 94 * 94
967 + (((code_point >> 8) & 0x7F) - 33) * 94
968 + ((code_point & 0x7F) - 33);
970 return MIN_CHAR_96x96
971 + (final - '0') * 96 * 96
972 + (((code_point >> 8) & 0x7F) - 32) * 96
973 + ((code_point & 0x7F) - 32);
985 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
987 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
990 if ( CHAR_TABLEP (encoding_table)
991 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
996 Lisp_Object mother = XCHARSET_MOTHER (charset);
997 int min = XCHARSET_MIN_CODE (charset);
998 int max = XCHARSET_MAX_CODE (charset);
1001 if ( CHARSETP (mother) )
1003 if (XCHARSET_FINAL (charset) >= '0')
1004 code = charset_code_point (mother, ch, 1);
1006 code = charset_code_point (mother, ch, defined_only);
1008 else if (defined_only)
1010 else if ( ((max == 0) && CHARSETP (mother)
1011 && (XCHARSET_FINAL (charset) == 0))
1012 || ((min <= ch) && (ch <= max)) )
1014 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1015 || ((min <= code) && (code <= max)) )
1017 int d = code - XCHARSET_CODE_OFFSET (charset);
1019 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1021 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1023 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1025 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1028 int cell = d % 94 + 33;
1034 return (row << 8) | cell;
1036 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1037 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1038 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1039 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1040 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1042 int plane = d / (94 * 60) + 33;
1043 int row = (d % (94 * 60)) / 94;
1044 int cell = d % 94 + 33;
1050 return (plane << 16) | (row << 8) | cell;
1052 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1054 ( (d / (94 * 94) + 33) << 16)
1055 | ((d / 94 % 94 + 33) << 8)
1057 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1059 ( (d / (96 * 96) + 32) << 16)
1060 | ((d / 96 % 96 + 32) << 8)
1062 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1064 ( (d / (94 * 94 * 94) + 33) << 24)
1065 | ((d / (94 * 94) % 94 + 33) << 16)
1066 | ((d / 94 % 94 + 33) << 8)
1068 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1070 ( (d / (96 * 96 * 96) + 32) << 24)
1071 | ((d / (96 * 96) % 96 + 32) << 16)
1072 | ((d / 96 % 96 + 32) << 8)
1076 printf ("Unknown CCS-conversion %d is specified!",
1077 XCHARSET_CONVERSION (charset));
1081 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1082 ( XCHARSET_MIN_CODE (charset) == 0 )
1084 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1085 (XCHARSET_CODE_OFFSET (charset)
1086 == XCHARSET_MIN_CODE (charset))
1091 if (XCHARSET_DIMENSION (charset) == 1)
1093 if (XCHARSET_CHARS (charset) == 94)
1095 if (((d = ch - (MIN_CHAR_94
1096 + (XCHARSET_FINAL (charset) - '0') * 94))
1101 else if (XCHARSET_CHARS (charset) == 96)
1103 if (((d = ch - (MIN_CHAR_96
1104 + (XCHARSET_FINAL (charset) - '0') * 96))
1112 else if (XCHARSET_DIMENSION (charset) == 2)
1114 if (XCHARSET_CHARS (charset) == 94)
1116 if (((d = ch - (MIN_CHAR_94x94
1118 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1121 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1123 else if (XCHARSET_CHARS (charset) == 96)
1125 if (((d = ch - (MIN_CHAR_96x96
1127 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1130 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1141 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1143 if (c <= MAX_CHAR_BASIC_LATIN)
1145 *charset = Vcharset_ascii;
1150 *charset = Vcharset_control_1;
1155 *charset = Vcharset_latin_iso8859_1;
1159 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1161 *charset = Vcharset_hebrew_iso8859_8;
1162 return c - MIN_CHAR_HEBREW + 0x20;
1165 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1167 *charset = Vcharset_thai_tis620;
1168 return c - MIN_CHAR_THAI + 0x20;
1171 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1172 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1174 return list2 (Vcharset_katakana_jisx0201,
1175 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1178 else if (c <= MAX_CHAR_BMP)
1180 *charset = Vcharset_ucs_bmp;
1183 else if (c <= MAX_CHAR_SMP)
1185 *charset = Vcharset_ucs_smp;
1186 return c - MIN_CHAR_SMP;
1188 else if (c <= MAX_CHAR_SIP)
1190 *charset = Vcharset_ucs_sip;
1191 return c - MIN_CHAR_SIP;
1193 else if (c < MIN_CHAR_94)
1195 *charset = Vcharset_ucs;
1198 else if (c <= MAX_CHAR_94)
1200 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1201 ((c - MIN_CHAR_94) / 94) + '0',
1202 CHARSET_LEFT_TO_RIGHT);
1203 if (!NILP (*charset))
1204 return ((c - MIN_CHAR_94) % 94) + 33;
1207 *charset = Vcharset_ucs;
1211 else if (c <= MAX_CHAR_96)
1213 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1214 ((c - MIN_CHAR_96) / 96) + '0',
1215 CHARSET_LEFT_TO_RIGHT);
1216 if (!NILP (*charset))
1217 return ((c - MIN_CHAR_96) % 96) + 32;
1220 *charset = Vcharset_ucs;
1224 else if (c <= MAX_CHAR_94x94)
1227 = CHARSET_BY_ATTRIBUTES (94, 2,
1228 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1229 CHARSET_LEFT_TO_RIGHT);
1230 if (!NILP (*charset))
1231 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1232 | (((c - MIN_CHAR_94x94) % 94) + 33);
1235 *charset = Vcharset_ucs;
1239 else if (c <= MAX_CHAR_96x96)
1242 = CHARSET_BY_ATTRIBUTES (96, 2,
1243 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1244 CHARSET_LEFT_TO_RIGHT);
1245 if (!NILP (*charset))
1246 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1247 | (((c - MIN_CHAR_96x96) % 96) + 32);
1250 *charset = Vcharset_ucs;
1256 *charset = Vcharset_ucs;
1261 Lisp_Object Vdefault_coded_charset_priority_list;
1265 /************************************************************************/
1266 /* Basic charset Lisp functions */
1267 /************************************************************************/
1269 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1270 Return non-nil if OBJECT is a charset.
1274 return CHARSETP (object) ? Qt : Qnil;
1277 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1278 Retrieve the charset of the given name.
1279 If CHARSET-OR-NAME is a charset object, it is simply returned.
1280 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1281 nil is returned. Otherwise the associated charset object is returned.
1285 if (CHARSETP (charset_or_name))
1286 return charset_or_name;
1288 CHECK_SYMBOL (charset_or_name);
1289 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1292 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1293 Retrieve the charset of the given name.
1294 Same as `find-charset' except an error is signalled if there is no such
1295 charset instead of returning nil.
1299 Lisp_Object charset = Ffind_charset (name);
1302 signal_simple_error ("No such charset", name);
1306 /* We store the charsets in hash tables with the names as the key and the
1307 actual charset object as the value. Occasionally we need to use them
1308 in a list format. These routines provide us with that. */
1309 struct charset_list_closure
1311 Lisp_Object *charset_list;
1315 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1316 void *charset_list_closure)
1318 /* This function can GC */
1319 struct charset_list_closure *chcl =
1320 (struct charset_list_closure*) charset_list_closure;
1321 Lisp_Object *charset_list = chcl->charset_list;
1323 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1327 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1328 Return a list of the names of all defined charsets.
1332 Lisp_Object charset_list = Qnil;
1333 struct gcpro gcpro1;
1334 struct charset_list_closure charset_list_closure;
1336 GCPRO1 (charset_list);
1337 charset_list_closure.charset_list = &charset_list;
1338 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1339 &charset_list_closure);
1342 return charset_list;
1345 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1346 Return the name of charset CHARSET.
1350 return XCHARSET_NAME (Fget_charset (charset));
1353 /* #### SJT Should generic properties be allowed? */
1354 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1355 Define a new character set.
1356 This function is for use with Mule support.
1357 NAME is a symbol, the name by which the character set is normally referred.
1358 DOC-STRING is a string describing the character set.
1359 PROPS is a property list, describing the specific nature of the
1360 character set. Recognized properties are:
1362 'short-name Short version of the charset name (ex: Latin-1)
1363 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1364 'registry A regular expression matching the font registry field for
1366 'dimension Number of octets used to index a character in this charset.
1367 Either 1 or 2. Defaults to 1.
1368 If UTF-2000 feature is enabled, 3 or 4 are also available.
1369 'columns Number of columns used to display a character in this charset.
1370 Only used in TTY mode. (Under X, the actual width of a
1371 character can be derived from the font used to display the
1372 characters.) If unspecified, defaults to the dimension
1373 (this is almost always the correct value).
1374 'chars Number of characters in each dimension (94 or 96).
1375 Defaults to 94. Note that if the dimension is 2, the
1376 character set thus described is 94x94 or 96x96.
1377 If UTF-2000 feature is enabled, 128 or 256 are also available.
1378 'final Final byte of ISO 2022 escape sequence. Must be
1379 supplied. Each combination of (DIMENSION, CHARS) defines a
1380 separate namespace for final bytes. Note that ISO
1381 2022 restricts the final byte to the range
1382 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1383 dimension == 2. Note also that final bytes in the range
1384 0x30 - 0x3F are reserved for user-defined (not official)
1386 'graphic 0 (use left half of font on output) or 1 (use right half
1387 of font on output). Defaults to 0. For example, for
1388 a font whose registry is ISO8859-1, the left half
1389 (octets 0x20 - 0x7F) is the `ascii' character set, while
1390 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1391 character set. With 'graphic set to 0, the octets
1392 will have their high bit cleared; with it set to 1,
1393 the octets will have their high bit set.
1394 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1396 'ccl-program A compiled CCL program used to convert a character in
1397 this charset into an index into the font. This is in
1398 addition to the 'graphic property. The CCL program
1399 is passed the octets of the character, with the high
1400 bit cleared and set depending upon whether the value
1401 of the 'graphic property is 0 or 1.
1402 'mother [UTF-2000 only] Base coded-charset.
1403 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1404 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1405 'code-offset [UTF-2000 only] Offset for a code-point of a base
1407 'conversion [UTF-2000 only] Conversion for a code-point of a base
1408 coded-charset (94x60 or 94x94x60).
1410 (name, doc_string, props))
1412 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1413 int direction = CHARSET_LEFT_TO_RIGHT;
1414 Lisp_Object registry = Qnil;
1415 Lisp_Object charset;
1416 Lisp_Object ccl_program = Qnil;
1417 Lisp_Object short_name = Qnil, long_name = Qnil;
1418 Lisp_Object mother = Qnil;
1419 int min_code = 0, max_code = 0, code_offset = 0;
1420 int byte_offset = -1;
1423 CHECK_SYMBOL (name);
1424 if (!NILP (doc_string))
1425 CHECK_STRING (doc_string);
1427 charset = Ffind_charset (name);
1428 if (!NILP (charset))
1429 signal_simple_error ("Cannot redefine existing charset", name);
1432 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1434 if (EQ (keyword, Qshort_name))
1436 CHECK_STRING (value);
1440 if (EQ (keyword, Qlong_name))
1442 CHECK_STRING (value);
1446 else if (EQ (keyword, Qdimension))
1449 dimension = XINT (value);
1450 if (dimension < 1 ||
1457 signal_simple_error ("Invalid value for 'dimension", value);
1460 else if (EQ (keyword, Qchars))
1463 chars = XINT (value);
1464 if (chars != 94 && chars != 96
1466 && chars != 128 && chars != 256
1469 signal_simple_error ("Invalid value for 'chars", value);
1472 else if (EQ (keyword, Qcolumns))
1475 columns = XINT (value);
1476 if (columns != 1 && columns != 2)
1477 signal_simple_error ("Invalid value for 'columns", value);
1480 else if (EQ (keyword, Qgraphic))
1483 graphic = XINT (value);
1491 signal_simple_error ("Invalid value for 'graphic", value);
1494 else if (EQ (keyword, Qregistry))
1496 CHECK_STRING (value);
1500 else if (EQ (keyword, Qdirection))
1502 if (EQ (value, Ql2r))
1503 direction = CHARSET_LEFT_TO_RIGHT;
1504 else if (EQ (value, Qr2l))
1505 direction = CHARSET_RIGHT_TO_LEFT;
1507 signal_simple_error ("Invalid value for 'direction", value);
1510 else if (EQ (keyword, Qfinal))
1512 CHECK_CHAR_COERCE_INT (value);
1513 final = XCHAR (value);
1514 if (final < '0' || final > '~')
1515 signal_simple_error ("Invalid value for 'final", value);
1519 else if (EQ (keyword, Qmother))
1521 mother = Fget_charset (value);
1524 else if (EQ (keyword, Qmin_code))
1527 min_code = XUINT (value);
1530 else if (EQ (keyword, Qmax_code))
1533 max_code = XUINT (value);
1536 else if (EQ (keyword, Qcode_offset))
1539 code_offset = XUINT (value);
1542 else if (EQ (keyword, Qconversion))
1544 if (EQ (value, Q94x60))
1545 conversion = CONVERSION_94x60;
1546 else if (EQ (value, Q94x94x60))
1547 conversion = CONVERSION_94x94x60;
1549 signal_simple_error ("Unrecognized conversion", value);
1553 else if (EQ (keyword, Qccl_program))
1555 struct ccl_program test_ccl;
1557 if (setup_ccl_program (&test_ccl, value) < 0)
1558 signal_simple_error ("Invalid value for 'ccl-program", value);
1559 ccl_program = value;
1563 signal_simple_error ("Unrecognized property", keyword);
1569 error ("'final must be specified");
1571 if (dimension == 2 && final > 0x5F)
1573 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1576 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1577 CHARSET_LEFT_TO_RIGHT)) ||
1578 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1579 CHARSET_RIGHT_TO_LEFT)))
1581 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1583 id = get_unallocated_leading_byte (dimension);
1585 if (NILP (doc_string))
1586 doc_string = build_string ("");
1588 if (NILP (registry))
1589 registry = build_string ("");
1591 if (NILP (short_name))
1592 XSETSTRING (short_name, XSYMBOL (name)->name);
1594 if (NILP (long_name))
1595 long_name = doc_string;
1598 columns = dimension;
1600 if (byte_offset < 0)
1604 else if (chars == 96)
1610 charset = make_charset (id, name, chars, dimension, columns, graphic,
1611 final, direction, short_name, long_name,
1612 doc_string, registry,
1613 Qnil, min_code, max_code, code_offset, byte_offset,
1614 mother, conversion);
1615 if (!NILP (ccl_program))
1616 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1620 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1622 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1623 NEW-NAME is the name of the new charset. Return the new charset.
1625 (charset, new_name))
1627 Lisp_Object new_charset = Qnil;
1628 int id, chars, dimension, columns, graphic, final;
1630 Lisp_Object registry, doc_string, short_name, long_name;
1633 charset = Fget_charset (charset);
1634 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1635 signal_simple_error ("Charset already has reverse-direction charset",
1638 CHECK_SYMBOL (new_name);
1639 if (!NILP (Ffind_charset (new_name)))
1640 signal_simple_error ("Cannot redefine existing charset", new_name);
1642 cs = XCHARSET (charset);
1644 chars = CHARSET_CHARS (cs);
1645 dimension = CHARSET_DIMENSION (cs);
1646 columns = CHARSET_COLUMNS (cs);
1647 id = get_unallocated_leading_byte (dimension);
1649 graphic = CHARSET_GRAPHIC (cs);
1650 final = CHARSET_FINAL (cs);
1651 direction = CHARSET_RIGHT_TO_LEFT;
1652 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1653 direction = CHARSET_LEFT_TO_RIGHT;
1654 doc_string = CHARSET_DOC_STRING (cs);
1655 short_name = CHARSET_SHORT_NAME (cs);
1656 long_name = CHARSET_LONG_NAME (cs);
1657 registry = CHARSET_REGISTRY (cs);
1659 new_charset = make_charset (id, new_name, chars, dimension, columns,
1660 graphic, final, direction, short_name, long_name,
1661 doc_string, registry,
1663 CHARSET_DECODING_TABLE(cs),
1664 CHARSET_MIN_CODE(cs),
1665 CHARSET_MAX_CODE(cs),
1666 CHARSET_CODE_OFFSET(cs),
1667 CHARSET_BYTE_OFFSET(cs),
1669 CHARSET_CONVERSION (cs)
1671 Qnil, 0, 0, 0, 0, Qnil, 0
1675 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1676 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1681 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1682 Define symbol ALIAS as an alias for CHARSET.
1686 CHECK_SYMBOL (alias);
1687 charset = Fget_charset (charset);
1688 return Fputhash (alias, charset, Vcharset_hash_table);
1691 /* #### Reverse direction charsets not yet implemented. */
1693 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1695 Return the reverse-direction charset parallel to CHARSET, if any.
1696 This is the charset with the same properties (in particular, the same
1697 dimension, number of characters per dimension, and final byte) as
1698 CHARSET but whose characters are displayed in the opposite direction.
1702 charset = Fget_charset (charset);
1703 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1707 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1708 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1709 If DIRECTION is omitted, both directions will be checked (left-to-right
1710 will be returned if character sets exist for both directions).
1712 (dimension, chars, final, direction))
1714 int dm, ch, fi, di = -1;
1715 Lisp_Object obj = Qnil;
1717 CHECK_INT (dimension);
1718 dm = XINT (dimension);
1719 if (dm < 1 || dm > 2)
1720 signal_simple_error ("Invalid value for DIMENSION", dimension);
1724 if (ch != 94 && ch != 96)
1725 signal_simple_error ("Invalid value for CHARS", chars);
1727 CHECK_CHAR_COERCE_INT (final);
1729 if (fi < '0' || fi > '~')
1730 signal_simple_error ("Invalid value for FINAL", final);
1732 if (EQ (direction, Ql2r))
1733 di = CHARSET_LEFT_TO_RIGHT;
1734 else if (EQ (direction, Qr2l))
1735 di = CHARSET_RIGHT_TO_LEFT;
1736 else if (!NILP (direction))
1737 signal_simple_error ("Invalid value for DIRECTION", direction);
1739 if (dm == 2 && fi > 0x5F)
1741 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1745 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1747 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1750 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1753 return XCHARSET_NAME (obj);
1757 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1758 Return short name of CHARSET.
1762 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1765 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1766 Return long name of CHARSET.
1770 return XCHARSET_LONG_NAME (Fget_charset (charset));
1773 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1774 Return description of CHARSET.
1778 return XCHARSET_DOC_STRING (Fget_charset (charset));
1781 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1782 Return dimension of CHARSET.
1786 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1789 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1790 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1791 Recognized properties are those listed in `make-charset', as well as
1792 'name and 'doc-string.
1798 charset = Fget_charset (charset);
1799 cs = XCHARSET (charset);
1801 CHECK_SYMBOL (prop);
1802 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1803 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1804 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1805 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1806 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1807 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1808 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1809 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1810 Qnil : make_char (CHARSET_FINAL (cs));
1811 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1812 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1813 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1814 if (EQ (prop, Qdirection))
1815 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1816 if (EQ (prop, Qreverse_direction_charset))
1818 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1819 /* #### Is this translation OK? If so, error checking sufficient? */
1820 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1823 if (EQ (prop, Qmother))
1824 return CHARSET_MOTHER (cs);
1825 if (EQ (prop, Qmin_code))
1826 return make_int (CHARSET_MIN_CODE (cs));
1827 if (EQ (prop, Qmax_code))
1828 return make_int (CHARSET_MAX_CODE (cs));
1830 signal_simple_error ("Unrecognized charset property name", prop);
1831 return Qnil; /* not reached */
1834 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1835 Return charset identification number of CHARSET.
1839 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1842 /* #### We need to figure out which properties we really want to
1845 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1846 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1848 (charset, ccl_program))
1850 struct ccl_program test_ccl;
1852 charset = Fget_charset (charset);
1853 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1854 signal_simple_error ("Invalid ccl-program", ccl_program);
1855 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1860 invalidate_charset_font_caches (Lisp_Object charset)
1862 /* Invalidate font cache entries for charset on all devices. */
1863 Lisp_Object devcons, concons, hash_table;
1864 DEVICE_LOOP_NO_BREAK (devcons, concons)
1866 struct device *d = XDEVICE (XCAR (devcons));
1867 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1868 if (!UNBOUNDP (hash_table))
1869 Fclrhash (hash_table);
1873 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1874 Set the 'registry property of CHARSET to REGISTRY.
1876 (charset, registry))
1878 charset = Fget_charset (charset);
1879 CHECK_STRING (registry);
1880 XCHARSET_REGISTRY (charset) = registry;
1881 invalidate_charset_font_caches (charset);
1882 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1887 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1888 Return mapping-table of CHARSET.
1892 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1895 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1896 Set mapping-table of CHARSET to TABLE.
1900 struct Lisp_Charset *cs;
1904 charset = Fget_charset (charset);
1905 cs = XCHARSET (charset);
1909 CHARSET_DECODING_TABLE(cs) = Qnil;
1912 else if (VECTORP (table))
1914 int ccs_len = CHARSET_BYTE_SIZE (cs);
1915 int ret = decoding_table_check_elements (table,
1916 CHARSET_DIMENSION (cs),
1921 signal_simple_error ("Too big table", table);
1923 signal_simple_error ("Invalid element is found", table);
1925 signal_simple_error ("Something wrong", table);
1927 CHARSET_DECODING_TABLE(cs) = Qnil;
1930 signal_error (Qwrong_type_argument,
1931 list2 (build_translated_string ("vector-or-nil-p"),
1934 byte_offset = CHARSET_BYTE_OFFSET (cs);
1935 switch (CHARSET_DIMENSION (cs))
1938 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1940 Lisp_Object c = XVECTOR_DATA(table)[i];
1943 Fput_char_attribute (c, XCHARSET_NAME (charset),
1944 make_int (i + byte_offset));
1948 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1950 Lisp_Object v = XVECTOR_DATA(table)[i];
1956 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1958 Lisp_Object c = XVECTOR_DATA(v)[j];
1962 (c, XCHARSET_NAME (charset),
1963 make_int ( ( (i + byte_offset) << 8 )
1969 Fput_char_attribute (v, XCHARSET_NAME (charset),
1970 make_int (i + byte_offset));
1979 /************************************************************************/
1980 /* Lisp primitives for working with characters */
1981 /************************************************************************/
1984 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
1985 Make a character from CHARSET and code-point CODE.
1986 If DEFINED_ONLY is non-nil, builtin character is not returned.
1987 If corresponding character is not found, nil is returned.
1989 (charset, code, defined_only))
1993 charset = Fget_charset (charset);
1996 if (XCHARSET_GRAPHIC (charset) == 1)
1998 if (NILP (defined_only))
1999 c = DECODE_CHAR (charset, c);
2001 c = decode_defined_char (charset, c);
2002 return c >= 0 ? make_char (c) : Qnil;
2005 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2006 Make a builtin character from CHARSET and code-point CODE.
2012 charset = Fget_charset (charset);
2014 if (EQ (charset, Vcharset_latin_viscii))
2016 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2022 (ret = Fget_char_attribute (chr,
2023 Vcharset_latin_viscii_lower,
2026 charset = Vcharset_latin_viscii_lower;
2030 (ret = Fget_char_attribute (chr,
2031 Vcharset_latin_viscii_upper,
2034 charset = Vcharset_latin_viscii_upper;
2041 if (XCHARSET_GRAPHIC (charset) == 1)
2044 c = decode_builtin_char (charset, c);
2045 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2049 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2050 Make a character from CHARSET and octets ARG1 and ARG2.
2051 ARG2 is required only for characters from two-dimensional charsets.
2052 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2053 character s with caron.
2055 (charset, arg1, arg2))
2059 int lowlim, highlim;
2061 charset = Fget_charset (charset);
2062 cs = XCHARSET (charset);
2064 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2065 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2067 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2069 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2070 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2073 /* It is useful (and safe, according to Olivier Galibert) to strip
2074 the 8th bit off ARG1 and ARG2 because it allows programmers to
2075 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2076 Latin 2 code of the character. */
2084 if (a1 < lowlim || a1 > highlim)
2085 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2087 if (CHARSET_DIMENSION (cs) == 1)
2091 ("Charset is of dimension one; second octet must be nil", arg2);
2092 return make_char (MAKE_CHAR (charset, a1, 0));
2101 a2 = XINT (arg2) & 0x7f;
2103 if (a2 < lowlim || a2 > highlim)
2104 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2106 return make_char (MAKE_CHAR (charset, a1, a2));
2109 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2110 Return the character set of CHARACTER.
2114 CHECK_CHAR_COERCE_INT (character);
2116 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2119 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2120 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2121 N defaults to 0 if omitted.
2125 Lisp_Object charset;
2128 CHECK_CHAR_COERCE_INT (character);
2130 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2132 if (NILP (n) || EQ (n, Qzero))
2133 return make_int (octet0);
2134 else if (EQ (n, make_int (1)))
2135 return make_int (octet1);
2137 signal_simple_error ("Octet number must be 0 or 1", n);
2141 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2142 Return code-point of CHARACTER in specified CHARSET.
2144 (character, charset, defined_only))
2148 CHECK_CHAR_COERCE_INT (character);
2149 charset = Fget_charset (charset);
2150 code_point = charset_code_point (charset, XCHAR (character),
2151 !NILP (defined_only));
2152 if (code_point >= 0)
2153 return make_int (code_point);
2159 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2160 Return list of charset and one or two position-codes of CHARACTER.
2164 /* This function can GC */
2165 struct gcpro gcpro1, gcpro2;
2166 Lisp_Object charset = Qnil;
2167 Lisp_Object rc = Qnil;
2175 GCPRO2 (charset, rc);
2176 CHECK_CHAR_COERCE_INT (character);
2179 code_point = ENCODE_CHAR (XCHAR (character), charset);
2180 dimension = XCHARSET_DIMENSION (charset);
2181 while (dimension > 0)
2183 rc = Fcons (make_int (code_point & 255), rc);
2187 rc = Fcons (XCHARSET_NAME (charset), rc);
2189 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2191 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2193 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2197 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2206 #ifdef ENABLE_COMPOSITE_CHARS
2207 /************************************************************************/
2208 /* composite character functions */
2209 /************************************************************************/
2212 lookup_composite_char (Bufbyte *str, int len)
2214 Lisp_Object lispstr = make_string (str, len);
2215 Lisp_Object ch = Fgethash (lispstr,
2216 Vcomposite_char_string2char_hash_table,
2222 if (composite_char_row_next >= 128)
2223 signal_simple_error ("No more composite chars available", lispstr);
2224 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2225 composite_char_col_next);
2226 Fputhash (make_char (emch), lispstr,
2227 Vcomposite_char_char2string_hash_table);
2228 Fputhash (lispstr, make_char (emch),
2229 Vcomposite_char_string2char_hash_table);
2230 composite_char_col_next++;
2231 if (composite_char_col_next >= 128)
2233 composite_char_col_next = 32;
2234 composite_char_row_next++;
2243 composite_char_string (Emchar ch)
2245 Lisp_Object str = Fgethash (make_char (ch),
2246 Vcomposite_char_char2string_hash_table,
2248 assert (!UNBOUNDP (str));
2252 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2253 Convert a string into a single composite character.
2254 The character is the result of overstriking all the characters in
2259 CHECK_STRING (string);
2260 return make_char (lookup_composite_char (XSTRING_DATA (string),
2261 XSTRING_LENGTH (string)));
2264 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2265 Return a string of the characters comprising a composite character.
2273 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2274 signal_simple_error ("Must be composite char", ch);
2275 return composite_char_string (emch);
2277 #endif /* ENABLE_COMPOSITE_CHARS */
2280 /************************************************************************/
2281 /* initialization */
2282 /************************************************************************/
2285 syms_of_mule_charset (void)
2287 INIT_LRECORD_IMPLEMENTATION (charset);
2289 DEFSUBR (Fcharsetp);
2290 DEFSUBR (Ffind_charset);
2291 DEFSUBR (Fget_charset);
2292 DEFSUBR (Fcharset_list);
2293 DEFSUBR (Fcharset_name);
2294 DEFSUBR (Fmake_charset);
2295 DEFSUBR (Fmake_reverse_direction_charset);
2296 /* DEFSUBR (Freverse_direction_charset); */
2297 DEFSUBR (Fdefine_charset_alias);
2298 DEFSUBR (Fcharset_from_attributes);
2299 DEFSUBR (Fcharset_short_name);
2300 DEFSUBR (Fcharset_long_name);
2301 DEFSUBR (Fcharset_description);
2302 DEFSUBR (Fcharset_dimension);
2303 DEFSUBR (Fcharset_property);
2304 DEFSUBR (Fcharset_id);
2305 DEFSUBR (Fset_charset_ccl_program);
2306 DEFSUBR (Fset_charset_registry);
2308 DEFSUBR (Fcharset_mapping_table);
2309 DEFSUBR (Fset_charset_mapping_table);
2313 DEFSUBR (Fdecode_char);
2314 DEFSUBR (Fdecode_builtin_char);
2315 DEFSUBR (Fencode_char);
2317 DEFSUBR (Fmake_char);
2318 DEFSUBR (Fchar_charset);
2319 DEFSUBR (Fchar_octet);
2320 DEFSUBR (Fsplit_char);
2322 #ifdef ENABLE_COMPOSITE_CHARS
2323 DEFSUBR (Fmake_composite_char);
2324 DEFSUBR (Fcomposite_char_string);
2327 defsymbol (&Qcharsetp, "charsetp");
2328 defsymbol (&Qregistry, "registry");
2329 defsymbol (&Qfinal, "final");
2330 defsymbol (&Qgraphic, "graphic");
2331 defsymbol (&Qdirection, "direction");
2332 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2333 defsymbol (&Qshort_name, "short-name");
2334 defsymbol (&Qlong_name, "long-name");
2336 defsymbol (&Qmother, "mother");
2337 defsymbol (&Qmin_code, "min-code");
2338 defsymbol (&Qmax_code, "max-code");
2339 defsymbol (&Qcode_offset, "code-offset");
2340 defsymbol (&Qconversion, "conversion");
2341 defsymbol (&Q94x60, "94x60");
2342 defsymbol (&Q94x94x60, "94x94x60");
2345 defsymbol (&Ql2r, "l2r");
2346 defsymbol (&Qr2l, "r2l");
2348 /* Charsets, compatible with FSF 20.3
2349 Naming convention is Script-Charset[-Edition] */
2350 defsymbol (&Qascii, "ascii");
2351 defsymbol (&Qcontrol_1, "control-1");
2352 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2353 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2354 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2355 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2356 defsymbol (&Qthai_tis620, "thai-tis620");
2357 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2358 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2359 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2360 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2361 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2362 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2363 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2364 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2365 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2366 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2367 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2368 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2369 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2370 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2371 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2373 defsymbol (&Qucs, "ucs");
2374 defsymbol (&Qucs_bmp, "ucs-bmp");
2375 defsymbol (&Qucs_smp, "ucs-smp");
2376 defsymbol (&Qucs_sip, "ucs-sip");
2377 defsymbol (&Qlatin_viscii, "latin-viscii");
2378 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2379 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2380 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2381 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2382 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2383 defsymbol (&Qmap_jis_x0208, "=jis-x0208");
2384 defsymbol (&Qmap_jis_x0208_1990, "=jis-x0208-1990");
2385 defsymbol (&Qchinese_big5, "chinese-big5");
2386 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2388 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2389 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2391 defsymbol (&Qcomposite, "composite");
2395 vars_of_mule_charset (void)
2402 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2403 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2405 /* Table of charsets indexed by leading byte. */
2406 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2407 chlook->charset_by_leading_byte[i] = Qnil;
2410 /* Table of charsets indexed by type/final-byte. */
2411 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2412 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2413 chlook->charset_by_attributes[i][j] = Qnil;
2415 /* Table of charsets indexed by type/final-byte/direction. */
2416 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2417 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2418 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2419 chlook->charset_by_attributes[i][j][k] = Qnil;
2423 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2425 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2426 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2430 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2431 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2432 Leading-code of private TYPE9N charset of column-width 1.
2434 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2438 Vdefault_coded_charset_priority_list = Qnil;
2439 DEFVAR_LISP ("default-coded-charset-priority-list",
2440 &Vdefault_coded_charset_priority_list /*
2441 Default order of preferred coded-character-sets.
2447 complex_vars_of_mule_charset (void)
2449 staticpro (&Vcharset_hash_table);
2450 Vcharset_hash_table =
2451 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2453 /* Predefined character sets. We store them into variables for
2457 staticpro (&Vcharset_ucs);
2459 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
2460 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2461 build_string ("UCS"),
2462 build_string ("UCS"),
2463 build_string ("ISO/IEC 10646"),
2465 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2466 staticpro (&Vcharset_ucs_bmp);
2468 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2469 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2470 build_string ("BMP"),
2471 build_string ("UCS-BMP"),
2472 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2474 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2475 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2476 staticpro (&Vcharset_ucs_smp);
2478 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2479 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2480 build_string ("SMP"),
2481 build_string ("UCS-SMP"),
2482 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2483 build_string ("UCS00-1"),
2484 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2485 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2486 staticpro (&Vcharset_ucs_sip);
2488 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2489 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2490 build_string ("SIP"),
2491 build_string ("UCS-SIP"),
2492 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2493 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2494 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2495 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2497 # define MIN_CHAR_THAI 0
2498 # define MAX_CHAR_THAI 0
2499 /* # define MIN_CHAR_HEBREW 0 */
2500 /* # define MAX_CHAR_HEBREW 0 */
2501 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2502 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2504 staticpro (&Vcharset_ascii);
2506 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2507 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2508 build_string ("ASCII"),
2509 build_string ("ASCII)"),
2510 build_string ("ASCII (ISO646 IRV)"),
2511 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2512 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2513 staticpro (&Vcharset_control_1);
2514 Vcharset_control_1 =
2515 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2516 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2517 build_string ("C1"),
2518 build_string ("Control characters"),
2519 build_string ("Control characters 128-191"),
2521 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2522 staticpro (&Vcharset_latin_iso8859_1);
2523 Vcharset_latin_iso8859_1 =
2524 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2525 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2526 build_string ("Latin-1"),
2527 build_string ("ISO8859-1 (Latin-1)"),
2528 build_string ("ISO8859-1 (Latin-1)"),
2529 build_string ("iso8859-1"),
2530 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2531 staticpro (&Vcharset_latin_iso8859_2);
2532 Vcharset_latin_iso8859_2 =
2533 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2534 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2535 build_string ("Latin-2"),
2536 build_string ("ISO8859-2 (Latin-2)"),
2537 build_string ("ISO8859-2 (Latin-2)"),
2538 build_string ("iso8859-2"),
2539 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2540 staticpro (&Vcharset_latin_iso8859_3);
2541 Vcharset_latin_iso8859_3 =
2542 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2543 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2544 build_string ("Latin-3"),
2545 build_string ("ISO8859-3 (Latin-3)"),
2546 build_string ("ISO8859-3 (Latin-3)"),
2547 build_string ("iso8859-3"),
2548 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2549 staticpro (&Vcharset_latin_iso8859_4);
2550 Vcharset_latin_iso8859_4 =
2551 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2552 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2553 build_string ("Latin-4"),
2554 build_string ("ISO8859-4 (Latin-4)"),
2555 build_string ("ISO8859-4 (Latin-4)"),
2556 build_string ("iso8859-4"),
2557 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2558 staticpro (&Vcharset_thai_tis620);
2559 Vcharset_thai_tis620 =
2560 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2561 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2562 build_string ("TIS620"),
2563 build_string ("TIS620 (Thai)"),
2564 build_string ("TIS620.2529 (Thai)"),
2565 build_string ("tis620"),
2566 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2567 staticpro (&Vcharset_greek_iso8859_7);
2568 Vcharset_greek_iso8859_7 =
2569 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2570 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2571 build_string ("ISO8859-7"),
2572 build_string ("ISO8859-7 (Greek)"),
2573 build_string ("ISO8859-7 (Greek)"),
2574 build_string ("iso8859-7"),
2575 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2576 staticpro (&Vcharset_arabic_iso8859_6);
2577 Vcharset_arabic_iso8859_6 =
2578 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2579 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2580 build_string ("ISO8859-6"),
2581 build_string ("ISO8859-6 (Arabic)"),
2582 build_string ("ISO8859-6 (Arabic)"),
2583 build_string ("iso8859-6"),
2584 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2585 staticpro (&Vcharset_hebrew_iso8859_8);
2586 Vcharset_hebrew_iso8859_8 =
2587 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2588 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2589 build_string ("ISO8859-8"),
2590 build_string ("ISO8859-8 (Hebrew)"),
2591 build_string ("ISO8859-8 (Hebrew)"),
2592 build_string ("iso8859-8"),
2594 0 /* MIN_CHAR_HEBREW */,
2595 0 /* MAX_CHAR_HEBREW */, 0, 32,
2596 Qnil, CONVERSION_IDENTICAL);
2597 staticpro (&Vcharset_katakana_jisx0201);
2598 Vcharset_katakana_jisx0201 =
2599 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2600 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2601 build_string ("JISX0201 Kana"),
2602 build_string ("JISX0201.1976 (Japanese Kana)"),
2603 build_string ("JISX0201.1976 Japanese Kana"),
2604 build_string ("jisx0201\\.1976"),
2605 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2606 staticpro (&Vcharset_latin_jisx0201);
2607 Vcharset_latin_jisx0201 =
2608 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2609 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2610 build_string ("JISX0201 Roman"),
2611 build_string ("JISX0201.1976 (Japanese Roman)"),
2612 build_string ("JISX0201.1976 Japanese Roman"),
2613 build_string ("jisx0201\\.1976"),
2614 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2615 staticpro (&Vcharset_cyrillic_iso8859_5);
2616 Vcharset_cyrillic_iso8859_5 =
2617 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2618 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2619 build_string ("ISO8859-5"),
2620 build_string ("ISO8859-5 (Cyrillic)"),
2621 build_string ("ISO8859-5 (Cyrillic)"),
2622 build_string ("iso8859-5"),
2623 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2624 staticpro (&Vcharset_latin_iso8859_9);
2625 Vcharset_latin_iso8859_9 =
2626 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2627 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2628 build_string ("Latin-5"),
2629 build_string ("ISO8859-9 (Latin-5)"),
2630 build_string ("ISO8859-9 (Latin-5)"),
2631 build_string ("iso8859-9"),
2632 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2634 staticpro (&Vcharset_jis_x0208);
2635 Vcharset_jis_x0208 =
2636 make_charset (LEADING_BYTE_JIS_X0208,
2637 Qmap_jis_x0208, 94, 2,
2638 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2639 build_string ("JIS X0208"),
2640 build_string ("JIS X0208 Common"),
2641 build_string ("JIS X0208 Common part"),
2642 build_string ("jisx0208\\.1990"),
2644 MIN_CHAR_JIS_X0208_1990,
2645 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2646 Qnil, CONVERSION_94x94);
2648 staticpro (&Vcharset_japanese_jisx0208_1978);
2649 Vcharset_japanese_jisx0208_1978 =
2650 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2651 Qjapanese_jisx0208_1978, 94, 2,
2652 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2653 build_string ("JIS X0208:1978"),
2654 build_string ("JIS X0208:1978 (Japanese)"),
2656 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2657 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2664 CONVERSION_IDENTICAL);
2665 staticpro (&Vcharset_chinese_gb2312);
2666 Vcharset_chinese_gb2312 =
2667 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2668 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2669 build_string ("GB2312"),
2670 build_string ("GB2312)"),
2671 build_string ("GB2312 Chinese simplified"),
2672 build_string ("gb2312"),
2673 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2674 staticpro (&Vcharset_chinese_gb12345);
2675 Vcharset_chinese_gb12345 =
2676 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2677 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2678 build_string ("G1"),
2679 build_string ("GB 12345)"),
2680 build_string ("GB 12345-1990"),
2681 build_string ("GB12345\\(\\.1990\\)?-0"),
2682 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2683 staticpro (&Vcharset_japanese_jisx0208);
2684 Vcharset_japanese_jisx0208 =
2685 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2686 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2687 build_string ("JISX0208"),
2688 build_string ("JIS X0208:1983 (Japanese)"),
2689 build_string ("JIS X0208:1983 Japanese Kanji"),
2690 build_string ("jisx0208\\.1983"),
2697 CONVERSION_IDENTICAL);
2699 staticpro (&Vcharset_japanese_jisx0208_1990);
2700 Vcharset_japanese_jisx0208_1990 =
2701 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2702 Qmap_jis_x0208_1990, 94, 2,
2703 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2704 build_string ("JISX0208-1990"),
2705 build_string ("JIS X0208:1990 (Japanese)"),
2706 build_string ("JIS X0208:1990 Japanese Kanji"),
2707 build_string ("jisx0208\\.1990"),
2709 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
2710 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
2711 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
2712 Vcharset_jis_x0208 /* Qnil */,
2713 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
2715 staticpro (&Vcharset_korean_ksc5601);
2716 Vcharset_korean_ksc5601 =
2717 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2718 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2719 build_string ("KSC5601"),
2720 build_string ("KSC5601 (Korean"),
2721 build_string ("KSC5601 Korean Hangul and Hanja"),
2722 build_string ("ksc5601"),
2723 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2724 staticpro (&Vcharset_japanese_jisx0212);
2725 Vcharset_japanese_jisx0212 =
2726 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2727 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2728 build_string ("JISX0212"),
2729 build_string ("JISX0212 (Japanese)"),
2730 build_string ("JISX0212 Japanese Supplement"),
2731 build_string ("jisx0212"),
2732 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2734 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2735 staticpro (&Vcharset_chinese_cns11643_1);
2736 Vcharset_chinese_cns11643_1 =
2737 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2738 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2739 build_string ("CNS11643-1"),
2740 build_string ("CNS11643-1 (Chinese traditional)"),
2742 ("CNS 11643 Plane 1 Chinese traditional"),
2743 build_string (CHINESE_CNS_PLANE_RE("1")),
2744 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2745 staticpro (&Vcharset_chinese_cns11643_2);
2746 Vcharset_chinese_cns11643_2 =
2747 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2748 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2749 build_string ("CNS11643-2"),
2750 build_string ("CNS11643-2 (Chinese traditional)"),
2752 ("CNS 11643 Plane 2 Chinese traditional"),
2753 build_string (CHINESE_CNS_PLANE_RE("2")),
2754 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2756 staticpro (&Vcharset_latin_tcvn5712);
2757 Vcharset_latin_tcvn5712 =
2758 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2759 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2760 build_string ("TCVN 5712"),
2761 build_string ("TCVN 5712 (VSCII-2)"),
2762 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2763 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2764 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2765 staticpro (&Vcharset_latin_viscii_lower);
2766 Vcharset_latin_viscii_lower =
2767 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2768 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2769 build_string ("VISCII lower"),
2770 build_string ("VISCII lower (Vietnamese)"),
2771 build_string ("VISCII lower (Vietnamese)"),
2772 build_string ("MULEVISCII-LOWER"),
2773 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2774 staticpro (&Vcharset_latin_viscii_upper);
2775 Vcharset_latin_viscii_upper =
2776 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2777 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2778 build_string ("VISCII upper"),
2779 build_string ("VISCII upper (Vietnamese)"),
2780 build_string ("VISCII upper (Vietnamese)"),
2781 build_string ("MULEVISCII-UPPER"),
2782 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2783 staticpro (&Vcharset_latin_viscii);
2784 Vcharset_latin_viscii =
2785 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2786 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2787 build_string ("VISCII"),
2788 build_string ("VISCII 1.1 (Vietnamese)"),
2789 build_string ("VISCII 1.1 (Vietnamese)"),
2790 build_string ("VISCII1\\.1"),
2791 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2792 staticpro (&Vcharset_chinese_big5);
2793 Vcharset_chinese_big5 =
2794 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2795 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2796 build_string ("Big5"),
2797 build_string ("Big5"),
2798 build_string ("Big5 Chinese traditional"),
2799 build_string ("big5-0"),
2801 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2802 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2804 staticpro (&Vcharset_ethiopic_ucs);
2805 Vcharset_ethiopic_ucs =
2806 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
2807 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2808 build_string ("Ethiopic (UCS)"),
2809 build_string ("Ethiopic (UCS)"),
2810 build_string ("Ethiopic of UCS"),
2811 build_string ("Ethiopic-Unicode"),
2812 Qnil, 0x1200, 0x137F, 0, 0,
2813 Qnil, CONVERSION_IDENTICAL);
2815 staticpro (&Vcharset_chinese_big5_1);
2816 Vcharset_chinese_big5_1 =
2817 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
2818 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
2819 build_string ("Big5"),
2820 build_string ("Big5 (Level-1)"),
2822 ("Big5 Level-1 Chinese traditional"),
2823 build_string ("big5"),
2824 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2825 staticpro (&Vcharset_chinese_big5_2);
2826 Vcharset_chinese_big5_2 =
2827 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
2828 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
2829 build_string ("Big5"),
2830 build_string ("Big5 (Level-2)"),
2832 ("Big5 Level-2 Chinese traditional"),
2833 build_string ("big5"),
2834 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2836 #ifdef ENABLE_COMPOSITE_CHARS
2837 /* #### For simplicity, we put composite chars into a 96x96 charset.
2838 This is going to lead to problems because you can run out of
2839 room, esp. as we don't yet recycle numbers. */
2840 staticpro (&Vcharset_composite);
2841 Vcharset_composite =
2842 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
2843 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2844 build_string ("Composite"),
2845 build_string ("Composite characters"),
2846 build_string ("Composite characters"),
2849 /* #### not dumped properly */
2850 composite_char_row_next = 32;
2851 composite_char_col_next = 32;
2853 Vcomposite_char_string2char_hash_table =
2854 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2855 Vcomposite_char_char2string_hash_table =
2856 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2857 staticpro (&Vcomposite_char_string2char_hash_table);
2858 staticpro (&Vcomposite_char_char2string_hash_table);
2859 #endif /* ENABLE_COMPOSITE_CHARS */