1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_latin_viscii;
72 Lisp_Object Vcharset_latin_tcvn5712;
73 Lisp_Object Vcharset_latin_viscii_lower;
74 Lisp_Object Vcharset_latin_viscii_upper;
75 Lisp_Object Vcharset_jis_x0208;
76 Lisp_Object Vcharset_chinese_big5;
77 Lisp_Object Vcharset_ethiopic_ucs;
79 Lisp_Object Vcharset_chinese_big5_1;
80 Lisp_Object Vcharset_chinese_big5_2;
82 #ifdef ENABLE_COMPOSITE_CHARS
83 Lisp_Object Vcharset_composite;
85 /* Hash tables for composite chars. One maps string representing
86 composed chars to their equivalent chars; one goes the
88 Lisp_Object Vcomposite_char_char2string_hash_table;
89 Lisp_Object Vcomposite_char_string2char_hash_table;
91 static int composite_char_row_next;
92 static int composite_char_col_next;
94 #endif /* ENABLE_COMPOSITE_CHARS */
96 struct charset_lookup *chlook;
98 static const struct lrecord_description charset_lookup_description_1[] = {
99 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
101 NUM_LEADING_BYTES+4*128
108 static const struct struct_description charset_lookup_description = {
109 sizeof (struct charset_lookup),
110 charset_lookup_description_1
114 /* Table of number of bytes in the string representation of a character
115 indexed by the first byte of that representation.
117 rep_bytes_by_first_byte(c) is more efficient than the equivalent
118 canonical computation:
120 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
122 const Bytecount rep_bytes_by_first_byte[0xA0] =
123 { /* 0x00 - 0x7f are for straight ASCII */
124 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
125 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
126 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
127 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 /* 0x80 - 0x8f are for Dimension-1 official charsets */
134 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
136 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
138 /* 0x90 - 0x9d are for Dimension-2 official charsets */
139 /* 0x9e is for Dimension-1 private charsets */
140 /* 0x9f is for Dimension-2 private charsets */
141 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
147 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
149 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
153 if (XVECTOR_LENGTH (v) > ccs_len)
156 for (i = 0; i < XVECTOR_LENGTH (v); i++)
158 Lisp_Object c = XVECTOR_DATA(v)[i];
160 if (!NILP (c) && !CHARP (c))
164 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
176 put_char_ccs_code_point (Lisp_Object character,
177 Lisp_Object ccs, Lisp_Object value)
179 if (!EQ (XCHARSET_NAME (ccs), Qmap_ucs)
181 || (XCHAR (character) != XINT (value)))
183 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
187 { /* obsolete representation: value must be a list of bytes */
188 Lisp_Object ret = Fcar (value);
192 signal_simple_error ("Invalid value for coded-charset", value);
193 code_point = XINT (ret);
194 if (XCHARSET_GRAPHIC (ccs) == 1)
202 signal_simple_error ("Invalid value for coded-charset",
206 signal_simple_error ("Invalid value for coded-charset",
209 if (XCHARSET_GRAPHIC (ccs) == 1)
211 code_point = (code_point << 8) | j;
214 value = make_int (code_point);
216 else if (INTP (value))
218 code_point = XINT (value);
219 if (XCHARSET_GRAPHIC (ccs) == 1)
221 code_point &= 0x7F7F7F7F;
222 value = make_int (code_point);
226 signal_simple_error ("Invalid value for coded-charset", value);
230 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
233 decoding_table_remove_char (ccs, XINT (cpos));
236 decoding_table_put_char (ccs, code_point, character);
242 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
244 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
245 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
247 if (VECTORP (decoding_table))
249 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
253 decoding_table_remove_char (ccs, XINT (cpos));
256 if (CHAR_TABLEP (encoding_table))
258 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
266 int leading_code_private_11;
269 Lisp_Object Qcharsetp;
271 /* Qdoc_string, Qdimension, Qchars defined in general.c */
272 Lisp_Object Qregistry, Qfinal, Qgraphic;
273 Lisp_Object Qdirection;
274 Lisp_Object Qreverse_direction_charset;
275 Lisp_Object Qleading_byte;
276 Lisp_Object Qshort_name, Qlong_name;
278 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
279 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
296 Qjapanese_jisx0208_1978,
313 Qvietnamese_viscii_lower,
314 Qvietnamese_viscii_upper,
324 Lisp_Object Ql2r, Qr2l;
326 Lisp_Object Vcharset_hash_table;
328 /* Composite characters are characters constructed by overstriking two
329 or more regular characters.
331 1) The old Mule implementation involves storing composite characters
332 in a buffer as a tag followed by all of the actual characters
333 used to make up the composite character. I think this is a bad
334 idea; it greatly complicates code that wants to handle strings
335 one character at a time because it has to deal with the possibility
336 of great big ungainly characters. It's much more reasonable to
337 simply store an index into a table of composite characters.
339 2) The current implementation only allows for 16,384 separate
340 composite characters over the lifetime of the XEmacs process.
341 This could become a potential problem if the user
342 edited lots of different files that use composite characters.
343 Due to FSF bogosity, increasing the number of allowable
344 composite characters under Mule would decrease the number
345 of possible faces that can exist. Mule already has shrunk
346 this to 2048, and further shrinkage would become uncomfortable.
347 No such problems exist in XEmacs.
349 Composite characters could be represented as 0x80 C1 C2 C3,
350 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
351 for slightly under 2^20 (one million) composite characters
352 over the XEmacs process lifetime, and you only need to
353 increase the size of a Mule character from 19 to 21 bits.
354 Or you could use 0x80 C1 C2 C3 C4, allowing for about
355 85 million (slightly over 2^26) composite characters. */
358 /************************************************************************/
359 /* Basic Emchar functions */
360 /************************************************************************/
362 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
363 string in STR. Returns the number of bytes stored.
364 Do not call this directly. Use the macro set_charptr_emchar() instead.
368 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
383 else if ( c <= 0x7ff )
385 *p++ = (c >> 6) | 0xc0;
386 *p++ = (c & 0x3f) | 0x80;
388 else if ( c <= 0xffff )
390 *p++ = (c >> 12) | 0xe0;
391 *p++ = ((c >> 6) & 0x3f) | 0x80;
392 *p++ = (c & 0x3f) | 0x80;
394 else if ( c <= 0x1fffff )
396 *p++ = (c >> 18) | 0xf0;
397 *p++ = ((c >> 12) & 0x3f) | 0x80;
398 *p++ = ((c >> 6) & 0x3f) | 0x80;
399 *p++ = (c & 0x3f) | 0x80;
401 else if ( c <= 0x3ffffff )
403 *p++ = (c >> 24) | 0xf8;
404 *p++ = ((c >> 18) & 0x3f) | 0x80;
405 *p++ = ((c >> 12) & 0x3f) | 0x80;
406 *p++ = ((c >> 6) & 0x3f) | 0x80;
407 *p++ = (c & 0x3f) | 0x80;
411 *p++ = (c >> 30) | 0xfc;
412 *p++ = ((c >> 24) & 0x3f) | 0x80;
413 *p++ = ((c >> 18) & 0x3f) | 0x80;
414 *p++ = ((c >> 12) & 0x3f) | 0x80;
415 *p++ = ((c >> 6) & 0x3f) | 0x80;
416 *p++ = (c & 0x3f) | 0x80;
419 BREAKUP_CHAR (c, charset, c1, c2);
420 lb = CHAR_LEADING_BYTE (c);
421 if (LEADING_BYTE_PRIVATE_P (lb))
422 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
424 if (EQ (charset, Vcharset_control_1))
433 /* Return the first character from a Mule-encoded string in STR,
434 assuming it's non-ASCII. Do not call this directly.
435 Use the macro charptr_emchar() instead. */
438 non_ascii_charptr_emchar (const Bufbyte *str)
451 else if ( b >= 0xf8 )
456 else if ( b >= 0xf0 )
461 else if ( b >= 0xe0 )
466 else if ( b >= 0xc0 )
476 for( ; len > 0; len-- )
479 ch = ( ch << 6 ) | ( b & 0x3f );
483 Bufbyte i0 = *str, i1, i2 = 0;
486 if (i0 == LEADING_BYTE_CONTROL_1)
487 return (Emchar) (*++str - 0x20);
489 if (LEADING_BYTE_PREFIX_P (i0))
494 charset = CHARSET_BY_LEADING_BYTE (i0);
495 if (XCHARSET_DIMENSION (charset) == 2)
498 return MAKE_CHAR (charset, i1, i2);
502 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
503 Do not call this directly. Use the macro valid_char_p() instead. */
507 non_ascii_valid_char_p (Emchar ch)
511 /* Must have only lowest 19 bits set */
515 f1 = CHAR_FIELD1 (ch);
516 f2 = CHAR_FIELD2 (ch);
517 f3 = CHAR_FIELD3 (ch);
523 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
524 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
525 f2 > MAX_CHAR_FIELD2_PRIVATE)
530 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
531 f2 <= MAX_CHAR_FIELD2_PRIVATE))
535 NOTE: This takes advantage of the fact that
536 FIELD2_TO_OFFICIAL_LEADING_BYTE and
537 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
539 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
540 if (EQ (charset, Qnil))
542 return (XCHARSET_CHARS (charset) == 96);
548 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
549 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
550 f1 > MAX_CHAR_FIELD1_PRIVATE)
552 if (f2 < 0x20 || f3 < 0x20)
555 #ifdef ENABLE_COMPOSITE_CHARS
556 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
558 if (UNBOUNDP (Fgethash (make_int (ch),
559 Vcomposite_char_char2string_hash_table,
564 #endif /* ENABLE_COMPOSITE_CHARS */
566 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
567 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
570 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
572 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
575 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
577 if (EQ (charset, Qnil))
579 return (XCHARSET_CHARS (charset) == 96);
585 /************************************************************************/
586 /* Basic string functions */
587 /************************************************************************/
589 /* Copy the character pointed to by SRC into DST. Do not call this
590 directly. Use the macro charptr_copy_char() instead.
591 Return the number of bytes copied. */
594 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
596 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
598 for (i = bytes; i; i--, dst++, src++)
604 /************************************************************************/
605 /* streams of Emchars */
606 /************************************************************************/
608 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
609 The functions below are not meant to be called directly; use
610 the macros in insdel.h. */
613 Lstream_get_emchar_1 (Lstream *stream, int ch)
615 Bufbyte str[MAX_EMCHAR_LEN];
616 Bufbyte *strptr = str;
619 str[0] = (Bufbyte) ch;
621 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
623 int c = Lstream_getc (stream);
624 bufpos_checking_assert (c >= 0);
625 *++strptr = (Bufbyte) c;
627 return charptr_emchar (str);
631 Lstream_fput_emchar (Lstream *stream, Emchar ch)
633 Bufbyte str[MAX_EMCHAR_LEN];
634 Bytecount len = set_charptr_emchar (str, ch);
635 return Lstream_write (stream, str, len);
639 Lstream_funget_emchar (Lstream *stream, Emchar ch)
641 Bufbyte str[MAX_EMCHAR_LEN];
642 Bytecount len = set_charptr_emchar (str, ch);
643 Lstream_unread (stream, str, len);
647 /************************************************************************/
649 /************************************************************************/
652 mark_charset (Lisp_Object obj)
654 Lisp_Charset *cs = XCHARSET (obj);
656 mark_object (cs->short_name);
657 mark_object (cs->long_name);
658 mark_object (cs->doc_string);
659 mark_object (cs->registry);
660 mark_object (cs->ccl_program);
662 mark_object (cs->decoding_table);
663 mark_object (cs->mother);
669 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
671 Lisp_Charset *cs = XCHARSET (obj);
675 error ("printing unreadable object #<charset %s 0x%x>",
676 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
679 write_c_string ("#<charset ", printcharfun);
680 print_internal (CHARSET_NAME (cs), printcharfun, 0);
681 write_c_string (" ", printcharfun);
682 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
683 write_c_string (" ", printcharfun);
684 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
685 write_c_string (" ", printcharfun);
686 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
687 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
689 CHARSET_DIMENSION (cs),
690 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
691 CHARSET_COLUMNS (cs),
692 CHARSET_GRAPHIC (cs),
694 write_c_string (buf, printcharfun);
695 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
696 sprintf (buf, " 0x%x>", cs->header.uid);
697 write_c_string (buf, printcharfun);
700 static const struct lrecord_description charset_description[] = {
701 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
702 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
703 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
704 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
705 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
706 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
707 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
709 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
710 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
715 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
716 mark_charset, print_charset, 0, 0, 0,
720 /* Make a new charset. */
721 /* #### SJT Should generic properties be allowed? */
723 make_charset (Charset_ID id, Lisp_Object name,
724 unsigned short chars, unsigned char dimension,
725 unsigned char columns, unsigned char graphic,
726 Bufbyte final, unsigned char direction, Lisp_Object short_name,
727 Lisp_Object long_name, Lisp_Object doc,
729 Lisp_Object decoding_table,
730 Emchar min_code, Emchar max_code,
731 Emchar code_offset, unsigned char byte_offset,
732 Lisp_Object mother, unsigned char conversion)
735 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
739 XSETCHARSET (obj, cs);
741 CHARSET_ID (cs) = id;
742 CHARSET_NAME (cs) = name;
743 CHARSET_SHORT_NAME (cs) = short_name;
744 CHARSET_LONG_NAME (cs) = long_name;
745 CHARSET_CHARS (cs) = chars;
746 CHARSET_DIMENSION (cs) = dimension;
747 CHARSET_DIRECTION (cs) = direction;
748 CHARSET_COLUMNS (cs) = columns;
749 CHARSET_GRAPHIC (cs) = graphic;
750 CHARSET_FINAL (cs) = final;
751 CHARSET_DOC_STRING (cs) = doc;
752 CHARSET_REGISTRY (cs) = reg;
753 CHARSET_CCL_PROGRAM (cs) = Qnil;
754 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
756 CHARSET_DECODING_TABLE(cs) = Qunbound;
757 CHARSET_MIN_CODE (cs) = min_code;
758 CHARSET_MAX_CODE (cs) = max_code;
759 CHARSET_CODE_OFFSET (cs) = code_offset;
760 CHARSET_BYTE_OFFSET (cs) = byte_offset;
761 CHARSET_MOTHER (cs) = mother;
762 CHARSET_CONVERSION (cs) = conversion;
766 if (id == LEADING_BYTE_ASCII)
767 CHARSET_REP_BYTES (cs) = 1;
769 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
771 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
776 /* some charsets do not have final characters. This includes
777 ASCII, Control-1, Composite, and the two faux private
779 unsigned char iso2022_type
780 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
782 if (code_offset == 0)
784 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
785 chlook->charset_by_attributes[iso2022_type][final] = obj;
789 (chlook->charset_by_attributes[iso2022_type][final][direction]));
790 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
794 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
795 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
797 /* Some charsets are "faux" and don't have names or really exist at
798 all except in the leading-byte table. */
800 Fputhash (name, obj, Vcharset_hash_table);
805 get_unallocated_leading_byte (int dimension)
810 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
813 lb = chlook->next_allocated_leading_byte++;
817 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
820 lb = chlook->next_allocated_1_byte_leading_byte++;
824 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
827 lb = chlook->next_allocated_2_byte_leading_byte++;
833 ("No more character sets free for this dimension",
834 make_int (dimension));
840 /* Number of Big5 characters which have the same code in 1st byte. */
842 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
845 decode_defined_char (Lisp_Object ccs, int code_point)
847 int dim = XCHARSET_DIMENSION (ccs);
848 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
856 = get_ccs_octet_table (decoding_table, ccs,
857 (code_point >> (dim * 8)) & 255);
859 if (CHARP (decoding_table))
860 return XCHAR (decoding_table);
863 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
865 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
867 if ( EQ (mother, Vcharset_ucs) )
868 return DECODE_CHAR (mother, code_point);
870 return decode_defined_char (mother, code_point);
877 decode_builtin_char (Lisp_Object charset, int code_point)
879 Lisp_Object mother = XCHARSET_MOTHER (charset);
882 if ( XCHARSET_MAX_CODE (charset) > 0 )
884 if ( CHARSETP (mother) )
886 int code = code_point;
888 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
890 int row = code_point >> 8;
891 int cell = code_point & 255;
895 else if (row < 16 + 32 + 30)
896 code = (row - (16 + 32)) * 94 + cell - 33;
897 else if (row < 18 + 32 + 30)
899 else if (row < 18 + 32 + 60)
900 code = (row - (18 + 32)) * 94 + cell - 33;
902 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
904 int plane = code_point >> 16;
905 int row = (code_point >> 8) & 255;
906 int cell = code_point & 255;
910 else if (row < 16 + 32 + 30)
912 = (plane - 33) * 94 * 60
913 + (row - (16 + 32)) * 94
915 else if (row < 18 + 32 + 30)
917 else if (row < 18 + 32 + 60)
919 = (plane - 33) * 94 * 60
920 + (row - (18 + 32)) * 94
924 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
929 = (XCHARSET_DIMENSION (charset) == 1
931 code_point - XCHARSET_BYTE_OFFSET (charset)
933 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
934 * XCHARSET_CHARS (charset)
935 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
936 + XCHARSET_CODE_OFFSET (charset);
937 if ((cid < XCHARSET_MIN_CODE (charset))
938 || (XCHARSET_MAX_CODE (charset) < cid))
943 else if ((final = XCHARSET_FINAL (charset)) >= '0')
945 if (XCHARSET_DIMENSION (charset) == 1)
947 switch (XCHARSET_CHARS (charset))
951 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
954 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
962 switch (XCHARSET_CHARS (charset))
965 return MIN_CHAR_94x94
966 + (final - '0') * 94 * 94
967 + (((code_point >> 8) & 0x7F) - 33) * 94
968 + ((code_point & 0x7F) - 33);
970 return MIN_CHAR_96x96
971 + (final - '0') * 96 * 96
972 + (((code_point >> 8) & 0x7F) - 32) * 96
973 + ((code_point & 0x7F) - 32);
985 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
987 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
990 if ( CHAR_TABLEP (encoding_table)
991 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
996 Lisp_Object mother = XCHARSET_MOTHER (charset);
997 int min = XCHARSET_MIN_CODE (charset);
998 int max = XCHARSET_MAX_CODE (charset);
1001 if ( CHARSETP (mother) )
1003 if (XCHARSET_FINAL (charset) >= '0')
1004 code = charset_code_point (mother, ch, 1);
1006 code = charset_code_point (mother, ch, defined_only);
1008 else if (defined_only)
1010 else if ( ((max == 0) && CHARSETP (mother)
1011 && (XCHARSET_FINAL (charset) == 0))
1012 || ((min <= ch) && (ch <= max)) )
1014 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1015 || ((min <= code) && (code <= max)) )
1017 int d = code - XCHARSET_CODE_OFFSET (charset);
1019 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1021 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1023 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1025 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1028 int cell = d % 94 + 33;
1034 return (row << 8) | cell;
1036 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1037 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1038 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1039 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1040 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1042 int plane = d / (94 * 60) + 33;
1043 int row = (d % (94 * 60)) / 94;
1044 int cell = d % 94 + 33;
1050 return (plane << 16) | (row << 8) | cell;
1052 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1054 ( (d / (94 * 94) + 33) << 16)
1055 | ((d / 94 % 94 + 33) << 8)
1057 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1059 ( (d / (96 * 96) + 32) << 16)
1060 | ((d / 96 % 96 + 32) << 8)
1062 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1064 ( (d / (94 * 94 * 94) + 33) << 24)
1065 | ((d / (94 * 94) % 94 + 33) << 16)
1066 | ((d / 94 % 94 + 33) << 8)
1068 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1070 ( (d / (96 * 96 * 96) + 32) << 24)
1071 | ((d / (96 * 96) % 96 + 32) << 16)
1072 | ((d / 96 % 96 + 32) << 8)
1076 printf ("Unknown CCS-conversion %d is specified!",
1077 XCHARSET_CONVERSION (charset));
1081 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1082 ( XCHARSET_MIN_CODE (charset) == 0 )
1084 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1085 (XCHARSET_CODE_OFFSET (charset)
1086 == XCHARSET_MIN_CODE (charset))
1091 if (XCHARSET_DIMENSION (charset) == 1)
1093 if (XCHARSET_CHARS (charset) == 94)
1095 if (((d = ch - (MIN_CHAR_94
1096 + (XCHARSET_FINAL (charset) - '0') * 94))
1101 else if (XCHARSET_CHARS (charset) == 96)
1103 if (((d = ch - (MIN_CHAR_96
1104 + (XCHARSET_FINAL (charset) - '0') * 96))
1112 else if (XCHARSET_DIMENSION (charset) == 2)
1114 if (XCHARSET_CHARS (charset) == 94)
1116 if (((d = ch - (MIN_CHAR_94x94
1118 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1121 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1123 else if (XCHARSET_CHARS (charset) == 96)
1125 if (((d = ch - (MIN_CHAR_96x96
1127 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1130 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1141 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1143 if (c <= MAX_CHAR_BASIC_LATIN)
1145 *charset = Vcharset_ascii;
1150 *charset = Vcharset_control_1;
1155 *charset = Vcharset_latin_iso8859_1;
1159 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1161 *charset = Vcharset_hebrew_iso8859_8;
1162 return c - MIN_CHAR_HEBREW + 0x20;
1165 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1167 *charset = Vcharset_thai_tis620;
1168 return c - MIN_CHAR_THAI + 0x20;
1171 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1172 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1174 return list2 (Vcharset_katakana_jisx0201,
1175 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1178 else if (c <= MAX_CHAR_BMP)
1180 *charset = Vcharset_ucs_bmp;
1183 else if (c <= MAX_CHAR_SMP)
1185 *charset = Vcharset_ucs_smp;
1186 return c - MIN_CHAR_SMP;
1188 else if (c <= MAX_CHAR_SIP)
1190 *charset = Vcharset_ucs_sip;
1191 return c - MIN_CHAR_SIP;
1193 else if (c < MIN_CHAR_94)
1195 *charset = Vcharset_ucs;
1198 else if (c <= MAX_CHAR_94)
1200 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1201 ((c - MIN_CHAR_94) / 94) + '0',
1202 CHARSET_LEFT_TO_RIGHT);
1203 if (!NILP (*charset))
1204 return ((c - MIN_CHAR_94) % 94) + 33;
1207 *charset = Vcharset_ucs;
1211 else if (c <= MAX_CHAR_96)
1213 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1214 ((c - MIN_CHAR_96) / 96) + '0',
1215 CHARSET_LEFT_TO_RIGHT);
1216 if (!NILP (*charset))
1217 return ((c - MIN_CHAR_96) % 96) + 32;
1220 *charset = Vcharset_ucs;
1224 else if (c <= MAX_CHAR_94x94)
1227 = CHARSET_BY_ATTRIBUTES (94, 2,
1228 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1229 CHARSET_LEFT_TO_RIGHT);
1230 if (!NILP (*charset))
1231 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1232 | (((c - MIN_CHAR_94x94) % 94) + 33);
1235 *charset = Vcharset_ucs;
1239 else if (c <= MAX_CHAR_96x96)
1242 = CHARSET_BY_ATTRIBUTES (96, 2,
1243 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1244 CHARSET_LEFT_TO_RIGHT);
1245 if (!NILP (*charset))
1246 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1247 | (((c - MIN_CHAR_96x96) % 96) + 32);
1250 *charset = Vcharset_ucs;
1256 *charset = Vcharset_ucs;
1261 Lisp_Object Vdefault_coded_charset_priority_list;
1265 /************************************************************************/
1266 /* Basic charset Lisp functions */
1267 /************************************************************************/
1269 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1270 Return non-nil if OBJECT is a charset.
1274 return CHARSETP (object) ? Qt : Qnil;
1277 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1278 Retrieve the charset of the given name.
1279 If CHARSET-OR-NAME is a charset object, it is simply returned.
1280 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1281 nil is returned. Otherwise the associated charset object is returned.
1285 if (CHARSETP (charset_or_name))
1286 return charset_or_name;
1288 CHECK_SYMBOL (charset_or_name);
1289 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1292 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1293 Retrieve the charset of the given name.
1294 Same as `find-charset' except an error is signalled if there is no such
1295 charset instead of returning nil.
1299 Lisp_Object charset = Ffind_charset (name);
1302 signal_simple_error ("No such charset", name);
1306 /* We store the charsets in hash tables with the names as the key and the
1307 actual charset object as the value. Occasionally we need to use them
1308 in a list format. These routines provide us with that. */
1309 struct charset_list_closure
1311 Lisp_Object *charset_list;
1315 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1316 void *charset_list_closure)
1318 /* This function can GC */
1319 struct charset_list_closure *chcl =
1320 (struct charset_list_closure*) charset_list_closure;
1321 Lisp_Object *charset_list = chcl->charset_list;
1323 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1327 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1328 Return a list of the names of all defined charsets.
1332 Lisp_Object charset_list = Qnil;
1333 struct gcpro gcpro1;
1334 struct charset_list_closure charset_list_closure;
1336 GCPRO1 (charset_list);
1337 charset_list_closure.charset_list = &charset_list;
1338 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1339 &charset_list_closure);
1342 return charset_list;
1345 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1346 Return the name of charset CHARSET.
1350 return XCHARSET_NAME (Fget_charset (charset));
1353 /* #### SJT Should generic properties be allowed? */
1354 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1355 Define a new character set.
1356 This function is for use with Mule support.
1357 NAME is a symbol, the name by which the character set is normally referred.
1358 DOC-STRING is a string describing the character set.
1359 PROPS is a property list, describing the specific nature of the
1360 character set. Recognized properties are:
1362 'short-name Short version of the charset name (ex: Latin-1)
1363 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1364 'registry A regular expression matching the font registry field for
1366 'dimension Number of octets used to index a character in this charset.
1367 Either 1 or 2. Defaults to 1.
1368 If UTF-2000 feature is enabled, 3 or 4 are also available.
1369 'columns Number of columns used to display a character in this charset.
1370 Only used in TTY mode. (Under X, the actual width of a
1371 character can be derived from the font used to display the
1372 characters.) If unspecified, defaults to the dimension
1373 (this is almost always the correct value).
1374 'chars Number of characters in each dimension (94 or 96).
1375 Defaults to 94. Note that if the dimension is 2, the
1376 character set thus described is 94x94 or 96x96.
1377 If UTF-2000 feature is enabled, 128 or 256 are also available.
1378 'final Final byte of ISO 2022 escape sequence. Must be
1379 supplied. Each combination of (DIMENSION, CHARS) defines a
1380 separate namespace for final bytes. Note that ISO
1381 2022 restricts the final byte to the range
1382 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1383 dimension == 2. Note also that final bytes in the range
1384 0x30 - 0x3F are reserved for user-defined (not official)
1386 'graphic 0 (use left half of font on output) or 1 (use right half
1387 of font on output). Defaults to 0. For example, for
1388 a font whose registry is ISO8859-1, the left half
1389 (octets 0x20 - 0x7F) is the `ascii' character set, while
1390 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1391 character set. With 'graphic set to 0, the octets
1392 will have their high bit cleared; with it set to 1,
1393 the octets will have their high bit set.
1394 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1396 'ccl-program A compiled CCL program used to convert a character in
1397 this charset into an index into the font. This is in
1398 addition to the 'graphic property. The CCL program
1399 is passed the octets of the character, with the high
1400 bit cleared and set depending upon whether the value
1401 of the 'graphic property is 0 or 1.
1402 'mother [UTF-2000 only] Base coded-charset.
1403 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1404 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1405 'code-offset [UTF-2000 only] Offset for a code-point of a base
1407 'conversion [UTF-2000 only] Conversion for a code-point of a base
1408 coded-charset (94x60 or 94x94x60).
1410 (name, doc_string, props))
1412 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1413 int direction = CHARSET_LEFT_TO_RIGHT;
1414 Lisp_Object registry = Qnil;
1415 Lisp_Object charset;
1416 Lisp_Object ccl_program = Qnil;
1417 Lisp_Object short_name = Qnil, long_name = Qnil;
1418 Lisp_Object mother = Qnil;
1419 int min_code = 0, max_code = 0, code_offset = 0;
1420 int byte_offset = -1;
1423 CHECK_SYMBOL (name);
1424 if (!NILP (doc_string))
1425 CHECK_STRING (doc_string);
1427 charset = Ffind_charset (name);
1428 if (!NILP (charset))
1429 signal_simple_error ("Cannot redefine existing charset", name);
1432 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1434 if (EQ (keyword, Qshort_name))
1436 CHECK_STRING (value);
1440 if (EQ (keyword, Qlong_name))
1442 CHECK_STRING (value);
1446 else if (EQ (keyword, Qdimension))
1449 dimension = XINT (value);
1450 if (dimension < 1 ||
1457 signal_simple_error ("Invalid value for 'dimension", value);
1460 else if (EQ (keyword, Qchars))
1463 chars = XINT (value);
1464 if (chars != 94 && chars != 96
1466 && chars != 128 && chars != 256
1469 signal_simple_error ("Invalid value for 'chars", value);
1472 else if (EQ (keyword, Qcolumns))
1475 columns = XINT (value);
1476 if (columns != 1 && columns != 2)
1477 signal_simple_error ("Invalid value for 'columns", value);
1480 else if (EQ (keyword, Qgraphic))
1483 graphic = XINT (value);
1491 signal_simple_error ("Invalid value for 'graphic", value);
1494 else if (EQ (keyword, Qregistry))
1496 CHECK_STRING (value);
1500 else if (EQ (keyword, Qdirection))
1502 if (EQ (value, Ql2r))
1503 direction = CHARSET_LEFT_TO_RIGHT;
1504 else if (EQ (value, Qr2l))
1505 direction = CHARSET_RIGHT_TO_LEFT;
1507 signal_simple_error ("Invalid value for 'direction", value);
1510 else if (EQ (keyword, Qfinal))
1512 CHECK_CHAR_COERCE_INT (value);
1513 final = XCHAR (value);
1514 if (final < '0' || final > '~')
1515 signal_simple_error ("Invalid value for 'final", value);
1519 else if (EQ (keyword, Qmother))
1521 mother = Fget_charset (value);
1524 else if (EQ (keyword, Qmin_code))
1527 min_code = XUINT (value);
1530 else if (EQ (keyword, Qmax_code))
1533 max_code = XUINT (value);
1536 else if (EQ (keyword, Qcode_offset))
1539 code_offset = XUINT (value);
1542 else if (EQ (keyword, Qconversion))
1544 if (EQ (value, Q94x60))
1545 conversion = CONVERSION_94x60;
1546 else if (EQ (value, Q94x94x60))
1547 conversion = CONVERSION_94x94x60;
1549 signal_simple_error ("Unrecognized conversion", value);
1553 else if (EQ (keyword, Qccl_program))
1555 struct ccl_program test_ccl;
1557 if (setup_ccl_program (&test_ccl, value) < 0)
1558 signal_simple_error ("Invalid value for 'ccl-program", value);
1559 ccl_program = value;
1563 signal_simple_error ("Unrecognized property", keyword);
1569 error ("'final must be specified");
1571 if (dimension == 2 && final > 0x5F)
1573 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1576 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1577 CHARSET_LEFT_TO_RIGHT)) ||
1578 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1579 CHARSET_RIGHT_TO_LEFT)))
1581 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1583 id = get_unallocated_leading_byte (dimension);
1585 if (NILP (doc_string))
1586 doc_string = build_string ("");
1588 if (NILP (registry))
1589 registry = build_string ("");
1591 if (NILP (short_name))
1592 XSETSTRING (short_name, XSYMBOL (name)->name);
1594 if (NILP (long_name))
1595 long_name = doc_string;
1598 columns = dimension;
1600 if (byte_offset < 0)
1604 else if (chars == 96)
1610 charset = make_charset (id, name, chars, dimension, columns, graphic,
1611 final, direction, short_name, long_name,
1612 doc_string, registry,
1613 Qnil, min_code, max_code, code_offset, byte_offset,
1614 mother, conversion);
1615 if (!NILP (ccl_program))
1616 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1620 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1622 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1623 NEW-NAME is the name of the new charset. Return the new charset.
1625 (charset, new_name))
1627 Lisp_Object new_charset = Qnil;
1628 int id, chars, dimension, columns, graphic, final;
1630 Lisp_Object registry, doc_string, short_name, long_name;
1633 charset = Fget_charset (charset);
1634 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1635 signal_simple_error ("Charset already has reverse-direction charset",
1638 CHECK_SYMBOL (new_name);
1639 if (!NILP (Ffind_charset (new_name)))
1640 signal_simple_error ("Cannot redefine existing charset", new_name);
1642 cs = XCHARSET (charset);
1644 chars = CHARSET_CHARS (cs);
1645 dimension = CHARSET_DIMENSION (cs);
1646 columns = CHARSET_COLUMNS (cs);
1647 id = get_unallocated_leading_byte (dimension);
1649 graphic = CHARSET_GRAPHIC (cs);
1650 final = CHARSET_FINAL (cs);
1651 direction = CHARSET_RIGHT_TO_LEFT;
1652 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1653 direction = CHARSET_LEFT_TO_RIGHT;
1654 doc_string = CHARSET_DOC_STRING (cs);
1655 short_name = CHARSET_SHORT_NAME (cs);
1656 long_name = CHARSET_LONG_NAME (cs);
1657 registry = CHARSET_REGISTRY (cs);
1659 new_charset = make_charset (id, new_name, chars, dimension, columns,
1660 graphic, final, direction, short_name, long_name,
1661 doc_string, registry,
1663 CHARSET_DECODING_TABLE(cs),
1664 CHARSET_MIN_CODE(cs),
1665 CHARSET_MAX_CODE(cs),
1666 CHARSET_CODE_OFFSET(cs),
1667 CHARSET_BYTE_OFFSET(cs),
1669 CHARSET_CONVERSION (cs)
1671 Qnil, 0, 0, 0, 0, Qnil, 0
1675 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1676 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1681 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1682 Define symbol ALIAS as an alias for CHARSET.
1686 CHECK_SYMBOL (alias);
1687 charset = Fget_charset (charset);
1688 return Fputhash (alias, charset, Vcharset_hash_table);
1691 /* #### Reverse direction charsets not yet implemented. */
1693 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1695 Return the reverse-direction charset parallel to CHARSET, if any.
1696 This is the charset with the same properties (in particular, the same
1697 dimension, number of characters per dimension, and final byte) as
1698 CHARSET but whose characters are displayed in the opposite direction.
1702 charset = Fget_charset (charset);
1703 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1707 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1708 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1709 If DIRECTION is omitted, both directions will be checked (left-to-right
1710 will be returned if character sets exist for both directions).
1712 (dimension, chars, final, direction))
1714 int dm, ch, fi, di = -1;
1715 Lisp_Object obj = Qnil;
1717 CHECK_INT (dimension);
1718 dm = XINT (dimension);
1719 if (dm < 1 || dm > 2)
1720 signal_simple_error ("Invalid value for DIMENSION", dimension);
1724 if (ch != 94 && ch != 96)
1725 signal_simple_error ("Invalid value for CHARS", chars);
1727 CHECK_CHAR_COERCE_INT (final);
1729 if (fi < '0' || fi > '~')
1730 signal_simple_error ("Invalid value for FINAL", final);
1732 if (EQ (direction, Ql2r))
1733 di = CHARSET_LEFT_TO_RIGHT;
1734 else if (EQ (direction, Qr2l))
1735 di = CHARSET_RIGHT_TO_LEFT;
1736 else if (!NILP (direction))
1737 signal_simple_error ("Invalid value for DIRECTION", direction);
1739 if (dm == 2 && fi > 0x5F)
1741 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1745 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1747 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1750 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1753 return XCHARSET_NAME (obj);
1757 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1758 Return short name of CHARSET.
1762 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1765 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1766 Return long name of CHARSET.
1770 return XCHARSET_LONG_NAME (Fget_charset (charset));
1773 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1774 Return description of CHARSET.
1778 return XCHARSET_DOC_STRING (Fget_charset (charset));
1781 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1782 Return dimension of CHARSET.
1786 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1789 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1790 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1791 Recognized properties are those listed in `make-charset', as well as
1792 'name and 'doc-string.
1798 charset = Fget_charset (charset);
1799 cs = XCHARSET (charset);
1801 CHECK_SYMBOL (prop);
1802 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1803 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1804 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1805 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1806 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1807 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1808 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1809 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1810 Qnil : make_char (CHARSET_FINAL (cs));
1811 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1812 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1813 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1814 if (EQ (prop, Qdirection))
1815 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1816 if (EQ (prop, Qreverse_direction_charset))
1818 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1819 /* #### Is this translation OK? If so, error checking sufficient? */
1820 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1823 if (EQ (prop, Qmother))
1824 return CHARSET_MOTHER (cs);
1825 if (EQ (prop, Qmin_code))
1826 return make_int (CHARSET_MIN_CODE (cs));
1827 if (EQ (prop, Qmax_code))
1828 return make_int (CHARSET_MAX_CODE (cs));
1830 signal_simple_error ("Unrecognized charset property name", prop);
1831 return Qnil; /* not reached */
1834 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1835 Return charset identification number of CHARSET.
1839 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1842 /* #### We need to figure out which properties we really want to
1845 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1846 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1848 (charset, ccl_program))
1850 struct ccl_program test_ccl;
1852 charset = Fget_charset (charset);
1853 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1854 signal_simple_error ("Invalid ccl-program", ccl_program);
1855 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1860 invalidate_charset_font_caches (Lisp_Object charset)
1862 /* Invalidate font cache entries for charset on all devices. */
1863 Lisp_Object devcons, concons, hash_table;
1864 DEVICE_LOOP_NO_BREAK (devcons, concons)
1866 struct device *d = XDEVICE (XCAR (devcons));
1867 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1868 if (!UNBOUNDP (hash_table))
1869 Fclrhash (hash_table);
1873 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1874 Set the 'registry property of CHARSET to REGISTRY.
1876 (charset, registry))
1878 charset = Fget_charset (charset);
1879 CHECK_STRING (registry);
1880 XCHARSET_REGISTRY (charset) = registry;
1881 invalidate_charset_font_caches (charset);
1882 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1887 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1888 Return mapping-table of CHARSET.
1892 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1895 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1896 Set mapping-table of CHARSET to TABLE.
1900 struct Lisp_Charset *cs;
1904 charset = Fget_charset (charset);
1905 cs = XCHARSET (charset);
1909 CHARSET_DECODING_TABLE(cs) = Qnil;
1912 else if (VECTORP (table))
1914 int ccs_len = CHARSET_BYTE_SIZE (cs);
1915 int ret = decoding_table_check_elements (table,
1916 CHARSET_DIMENSION (cs),
1921 signal_simple_error ("Too big table", table);
1923 signal_simple_error ("Invalid element is found", table);
1925 signal_simple_error ("Something wrong", table);
1927 CHARSET_DECODING_TABLE(cs) = Qnil;
1930 signal_error (Qwrong_type_argument,
1931 list2 (build_translated_string ("vector-or-nil-p"),
1934 byte_offset = CHARSET_BYTE_OFFSET (cs);
1935 switch (CHARSET_DIMENSION (cs))
1938 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1940 Lisp_Object c = XVECTOR_DATA(table)[i];
1943 Fput_char_attribute (c, XCHARSET_NAME (charset),
1944 make_int (i + byte_offset));
1948 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1950 Lisp_Object v = XVECTOR_DATA(table)[i];
1956 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1958 Lisp_Object c = XVECTOR_DATA(v)[j];
1962 (c, XCHARSET_NAME (charset),
1963 make_int ( ( (i + byte_offset) << 8 )
1969 Fput_char_attribute (v, XCHARSET_NAME (charset),
1970 make_int (i + byte_offset));
1979 /************************************************************************/
1980 /* Lisp primitives for working with characters */
1981 /************************************************************************/
1984 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
1985 Make a character from CHARSET and code-point CODE.
1986 If DEFINED_ONLY is non-nil, builtin character is not returned.
1987 If corresponding character is not found, nil is returned.
1989 (charset, code, defined_only))
1993 charset = Fget_charset (charset);
1996 if (XCHARSET_GRAPHIC (charset) == 1)
1998 if (NILP (defined_only))
1999 c = DECODE_CHAR (charset, c);
2001 c = decode_defined_char (charset, c);
2002 return c >= 0 ? make_char (c) : Qnil;
2005 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2006 Make a builtin character from CHARSET and code-point CODE.
2012 charset = Fget_charset (charset);
2014 if (EQ (charset, Vcharset_latin_viscii))
2016 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2022 (ret = Fget_char_attribute (chr,
2023 Vcharset_latin_viscii_lower,
2026 charset = Vcharset_latin_viscii_lower;
2030 (ret = Fget_char_attribute (chr,
2031 Vcharset_latin_viscii_upper,
2034 charset = Vcharset_latin_viscii_upper;
2041 if (XCHARSET_GRAPHIC (charset) == 1)
2044 c = decode_builtin_char (charset, c);
2045 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2049 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2050 Make a character from CHARSET and octets ARG1 and ARG2.
2051 ARG2 is required only for characters from two-dimensional charsets.
2052 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2053 character s with caron.
2055 (charset, arg1, arg2))
2059 int lowlim, highlim;
2061 charset = Fget_charset (charset);
2062 cs = XCHARSET (charset);
2064 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2065 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2067 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2069 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2070 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2073 /* It is useful (and safe, according to Olivier Galibert) to strip
2074 the 8th bit off ARG1 and ARG2 because it allows programmers to
2075 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2076 Latin 2 code of the character. */
2084 if (a1 < lowlim || a1 > highlim)
2085 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2087 if (CHARSET_DIMENSION (cs) == 1)
2091 ("Charset is of dimension one; second octet must be nil", arg2);
2092 return make_char (MAKE_CHAR (charset, a1, 0));
2101 a2 = XINT (arg2) & 0x7f;
2103 if (a2 < lowlim || a2 > highlim)
2104 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2106 return make_char (MAKE_CHAR (charset, a1, a2));
2109 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2110 Return the character set of CHARACTER.
2114 CHECK_CHAR_COERCE_INT (character);
2116 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2119 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2120 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2121 N defaults to 0 if omitted.
2125 Lisp_Object charset;
2128 CHECK_CHAR_COERCE_INT (character);
2130 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2132 if (NILP (n) || EQ (n, Qzero))
2133 return make_int (octet0);
2134 else if (EQ (n, make_int (1)))
2135 return make_int (octet1);
2137 signal_simple_error ("Octet number must be 0 or 1", n);
2141 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2142 Return code-point of CHARACTER in specified CHARSET.
2144 (character, charset, defined_only))
2148 CHECK_CHAR_COERCE_INT (character);
2149 charset = Fget_charset (charset);
2150 code_point = charset_code_point (charset, XCHAR (character),
2151 !NILP (defined_only));
2152 if (code_point >= 0)
2153 return make_int (code_point);
2159 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2160 Return list of charset and one or two position-codes of CHARACTER.
2164 /* This function can GC */
2165 struct gcpro gcpro1, gcpro2;
2166 Lisp_Object charset = Qnil;
2167 Lisp_Object rc = Qnil;
2175 GCPRO2 (charset, rc);
2176 CHECK_CHAR_COERCE_INT (character);
2179 code_point = ENCODE_CHAR (XCHAR (character), charset);
2180 dimension = XCHARSET_DIMENSION (charset);
2181 while (dimension > 0)
2183 rc = Fcons (make_int (code_point & 255), rc);
2187 rc = Fcons (XCHARSET_NAME (charset), rc);
2189 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2191 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2193 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2197 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2206 #ifdef ENABLE_COMPOSITE_CHARS
2207 /************************************************************************/
2208 /* composite character functions */
2209 /************************************************************************/
2212 lookup_composite_char (Bufbyte *str, int len)
2214 Lisp_Object lispstr = make_string (str, len);
2215 Lisp_Object ch = Fgethash (lispstr,
2216 Vcomposite_char_string2char_hash_table,
2222 if (composite_char_row_next >= 128)
2223 signal_simple_error ("No more composite chars available", lispstr);
2224 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2225 composite_char_col_next);
2226 Fputhash (make_char (emch), lispstr,
2227 Vcomposite_char_char2string_hash_table);
2228 Fputhash (lispstr, make_char (emch),
2229 Vcomposite_char_string2char_hash_table);
2230 composite_char_col_next++;
2231 if (composite_char_col_next >= 128)
2233 composite_char_col_next = 32;
2234 composite_char_row_next++;
2243 composite_char_string (Emchar ch)
2245 Lisp_Object str = Fgethash (make_char (ch),
2246 Vcomposite_char_char2string_hash_table,
2248 assert (!UNBOUNDP (str));
2252 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2253 Convert a string into a single composite character.
2254 The character is the result of overstriking all the characters in
2259 CHECK_STRING (string);
2260 return make_char (lookup_composite_char (XSTRING_DATA (string),
2261 XSTRING_LENGTH (string)));
2264 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2265 Return a string of the characters comprising a composite character.
2273 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2274 signal_simple_error ("Must be composite char", ch);
2275 return composite_char_string (emch);
2277 #endif /* ENABLE_COMPOSITE_CHARS */
2280 /************************************************************************/
2281 /* initialization */
2282 /************************************************************************/
2285 syms_of_mule_charset (void)
2287 INIT_LRECORD_IMPLEMENTATION (charset);
2289 DEFSUBR (Fcharsetp);
2290 DEFSUBR (Ffind_charset);
2291 DEFSUBR (Fget_charset);
2292 DEFSUBR (Fcharset_list);
2293 DEFSUBR (Fcharset_name);
2294 DEFSUBR (Fmake_charset);
2295 DEFSUBR (Fmake_reverse_direction_charset);
2296 /* DEFSUBR (Freverse_direction_charset); */
2297 DEFSUBR (Fdefine_charset_alias);
2298 DEFSUBR (Fcharset_from_attributes);
2299 DEFSUBR (Fcharset_short_name);
2300 DEFSUBR (Fcharset_long_name);
2301 DEFSUBR (Fcharset_description);
2302 DEFSUBR (Fcharset_dimension);
2303 DEFSUBR (Fcharset_property);
2304 DEFSUBR (Fcharset_id);
2305 DEFSUBR (Fset_charset_ccl_program);
2306 DEFSUBR (Fset_charset_registry);
2308 DEFSUBR (Fcharset_mapping_table);
2309 DEFSUBR (Fset_charset_mapping_table);
2313 DEFSUBR (Fdecode_char);
2314 DEFSUBR (Fdecode_builtin_char);
2315 DEFSUBR (Fencode_char);
2317 DEFSUBR (Fmake_char);
2318 DEFSUBR (Fchar_charset);
2319 DEFSUBR (Fchar_octet);
2320 DEFSUBR (Fsplit_char);
2322 #ifdef ENABLE_COMPOSITE_CHARS
2323 DEFSUBR (Fmake_composite_char);
2324 DEFSUBR (Fcomposite_char_string);
2327 defsymbol (&Qcharsetp, "charsetp");
2328 defsymbol (&Qregistry, "registry");
2329 defsymbol (&Qfinal, "final");
2330 defsymbol (&Qgraphic, "graphic");
2331 defsymbol (&Qdirection, "direction");
2332 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2333 defsymbol (&Qshort_name, "short-name");
2334 defsymbol (&Qlong_name, "long-name");
2336 defsymbol (&Qmother, "mother");
2337 defsymbol (&Qmin_code, "min-code");
2338 defsymbol (&Qmax_code, "max-code");
2339 defsymbol (&Qcode_offset, "code-offset");
2340 defsymbol (&Qconversion, "conversion");
2341 defsymbol (&Q94x60, "94x60");
2342 defsymbol (&Q94x94x60, "94x94x60");
2345 defsymbol (&Ql2r, "l2r");
2346 defsymbol (&Qr2l, "r2l");
2348 /* Charsets, compatible with FSF 20.3
2349 Naming convention is Script-Charset[-Edition] */
2350 defsymbol (&Qascii, "ascii");
2351 defsymbol (&Qcontrol_1, "control-1");
2352 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2353 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2354 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2355 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2356 defsymbol (&Qthai_tis620, "thai-tis620");
2357 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2358 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2359 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2360 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2361 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2362 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2363 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2364 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2365 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2366 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
2367 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2368 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2369 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2370 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2371 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2373 defsymbol (&Qmap_ucs, "=ucs");
2374 defsymbol (&Qucs, "ucs");
2375 defsymbol (&Qucs_bmp, "ucs-bmp");
2376 defsymbol (&Qucs_smp, "ucs-smp");
2377 defsymbol (&Qucs_sip, "ucs-sip");
2378 defsymbol (&Qlatin_viscii, "latin-viscii");
2379 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2380 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2381 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2382 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2383 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2384 defsymbol (&Qmap_jis_x0208, "=jis-x0208");
2385 defsymbol (&Qmap_jis_x0208_1990, "=jis-x0208-1990");
2386 defsymbol (&Qchinese_big5, "chinese-big5");
2387 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2389 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2390 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2392 defsymbol (&Qcomposite, "composite");
2396 vars_of_mule_charset (void)
2403 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2404 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2406 /* Table of charsets indexed by leading byte. */
2407 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2408 chlook->charset_by_leading_byte[i] = Qnil;
2411 /* Table of charsets indexed by type/final-byte. */
2412 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2413 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2414 chlook->charset_by_attributes[i][j] = Qnil;
2416 /* Table of charsets indexed by type/final-byte/direction. */
2417 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2418 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2419 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2420 chlook->charset_by_attributes[i][j][k] = Qnil;
2424 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2426 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2427 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2431 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2432 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2433 Leading-code of private TYPE9N charset of column-width 1.
2435 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2439 Vdefault_coded_charset_priority_list = Qnil;
2440 DEFVAR_LISP ("default-coded-charset-priority-list",
2441 &Vdefault_coded_charset_priority_list /*
2442 Default order of preferred coded-character-sets.
2448 complex_vars_of_mule_charset (void)
2450 staticpro (&Vcharset_hash_table);
2451 Vcharset_hash_table =
2452 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2454 /* Predefined character sets. We store them into variables for
2458 staticpro (&Vcharset_ucs);
2460 make_charset (LEADING_BYTE_UCS, Qmap_ucs, 256, 4,
2461 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2462 build_string ("UCS"),
2463 build_string ("UCS"),
2464 build_string ("ISO/IEC 10646"),
2466 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2467 staticpro (&Vcharset_ucs_bmp);
2469 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2470 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2471 build_string ("BMP"),
2472 build_string ("UCS-BMP"),
2473 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2475 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2476 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2477 staticpro (&Vcharset_ucs_smp);
2479 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2480 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2481 build_string ("SMP"),
2482 build_string ("UCS-SMP"),
2483 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2484 build_string ("UCS00-1"),
2485 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2486 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2487 staticpro (&Vcharset_ucs_sip);
2489 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2490 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2491 build_string ("SIP"),
2492 build_string ("UCS-SIP"),
2493 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2494 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2495 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2496 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2498 # define MIN_CHAR_THAI 0
2499 # define MAX_CHAR_THAI 0
2500 /* # define MIN_CHAR_HEBREW 0 */
2501 /* # define MAX_CHAR_HEBREW 0 */
2502 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2503 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2505 staticpro (&Vcharset_ascii);
2507 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2508 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2509 build_string ("ASCII"),
2510 build_string ("ASCII)"),
2511 build_string ("ASCII (ISO646 IRV)"),
2512 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2513 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2514 staticpro (&Vcharset_control_1);
2515 Vcharset_control_1 =
2516 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2517 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2518 build_string ("C1"),
2519 build_string ("Control characters"),
2520 build_string ("Control characters 128-191"),
2522 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2523 staticpro (&Vcharset_latin_iso8859_1);
2524 Vcharset_latin_iso8859_1 =
2525 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2526 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2527 build_string ("Latin-1"),
2528 build_string ("ISO8859-1 (Latin-1)"),
2529 build_string ("ISO8859-1 (Latin-1)"),
2530 build_string ("iso8859-1"),
2531 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2532 staticpro (&Vcharset_latin_iso8859_2);
2533 Vcharset_latin_iso8859_2 =
2534 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2535 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2536 build_string ("Latin-2"),
2537 build_string ("ISO8859-2 (Latin-2)"),
2538 build_string ("ISO8859-2 (Latin-2)"),
2539 build_string ("iso8859-2"),
2540 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2541 staticpro (&Vcharset_latin_iso8859_3);
2542 Vcharset_latin_iso8859_3 =
2543 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2544 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2545 build_string ("Latin-3"),
2546 build_string ("ISO8859-3 (Latin-3)"),
2547 build_string ("ISO8859-3 (Latin-3)"),
2548 build_string ("iso8859-3"),
2549 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2550 staticpro (&Vcharset_latin_iso8859_4);
2551 Vcharset_latin_iso8859_4 =
2552 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2553 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2554 build_string ("Latin-4"),
2555 build_string ("ISO8859-4 (Latin-4)"),
2556 build_string ("ISO8859-4 (Latin-4)"),
2557 build_string ("iso8859-4"),
2558 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2559 staticpro (&Vcharset_thai_tis620);
2560 Vcharset_thai_tis620 =
2561 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2562 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2563 build_string ("TIS620"),
2564 build_string ("TIS620 (Thai)"),
2565 build_string ("TIS620.2529 (Thai)"),
2566 build_string ("tis620"),
2567 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2568 staticpro (&Vcharset_greek_iso8859_7);
2569 Vcharset_greek_iso8859_7 =
2570 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2571 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2572 build_string ("ISO8859-7"),
2573 build_string ("ISO8859-7 (Greek)"),
2574 build_string ("ISO8859-7 (Greek)"),
2575 build_string ("iso8859-7"),
2576 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2577 staticpro (&Vcharset_arabic_iso8859_6);
2578 Vcharset_arabic_iso8859_6 =
2579 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2580 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2581 build_string ("ISO8859-6"),
2582 build_string ("ISO8859-6 (Arabic)"),
2583 build_string ("ISO8859-6 (Arabic)"),
2584 build_string ("iso8859-6"),
2585 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2586 staticpro (&Vcharset_hebrew_iso8859_8);
2587 Vcharset_hebrew_iso8859_8 =
2588 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2589 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2590 build_string ("ISO8859-8"),
2591 build_string ("ISO8859-8 (Hebrew)"),
2592 build_string ("ISO8859-8 (Hebrew)"),
2593 build_string ("iso8859-8"),
2595 0 /* MIN_CHAR_HEBREW */,
2596 0 /* MAX_CHAR_HEBREW */, 0, 32,
2597 Qnil, CONVERSION_IDENTICAL);
2598 staticpro (&Vcharset_katakana_jisx0201);
2599 Vcharset_katakana_jisx0201 =
2600 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2601 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2602 build_string ("JISX0201 Kana"),
2603 build_string ("JISX0201.1976 (Japanese Kana)"),
2604 build_string ("JISX0201.1976 Japanese Kana"),
2605 build_string ("jisx0201\\.1976"),
2606 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2607 staticpro (&Vcharset_latin_jisx0201);
2608 Vcharset_latin_jisx0201 =
2609 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2610 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2611 build_string ("JISX0201 Roman"),
2612 build_string ("JISX0201.1976 (Japanese Roman)"),
2613 build_string ("JISX0201.1976 Japanese Roman"),
2614 build_string ("jisx0201\\.1976"),
2615 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2616 staticpro (&Vcharset_cyrillic_iso8859_5);
2617 Vcharset_cyrillic_iso8859_5 =
2618 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2619 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2620 build_string ("ISO8859-5"),
2621 build_string ("ISO8859-5 (Cyrillic)"),
2622 build_string ("ISO8859-5 (Cyrillic)"),
2623 build_string ("iso8859-5"),
2624 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2625 staticpro (&Vcharset_latin_iso8859_9);
2626 Vcharset_latin_iso8859_9 =
2627 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2628 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2629 build_string ("Latin-5"),
2630 build_string ("ISO8859-9 (Latin-5)"),
2631 build_string ("ISO8859-9 (Latin-5)"),
2632 build_string ("iso8859-9"),
2633 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2635 staticpro (&Vcharset_jis_x0208);
2636 Vcharset_jis_x0208 =
2637 make_charset (LEADING_BYTE_JIS_X0208,
2638 Qmap_jis_x0208, 94, 2,
2639 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2640 build_string ("JIS X0208"),
2641 build_string ("JIS X0208 Common"),
2642 build_string ("JIS X0208 Common part"),
2643 build_string ("jisx0208\\.1990"),
2645 MIN_CHAR_JIS_X0208_1990,
2646 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2647 Qnil, CONVERSION_94x94);
2649 staticpro (&Vcharset_japanese_jisx0208_1978);
2650 Vcharset_japanese_jisx0208_1978 =
2651 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2652 Qjapanese_jisx0208_1978, 94, 2,
2653 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2654 build_string ("JIS X0208:1978"),
2655 build_string ("JIS X0208:1978 (Japanese)"),
2657 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2658 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2665 CONVERSION_IDENTICAL);
2666 staticpro (&Vcharset_chinese_gb2312);
2667 Vcharset_chinese_gb2312 =
2668 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
2669 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2670 build_string ("GB2312"),
2671 build_string ("GB2312)"),
2672 build_string ("GB2312 Chinese simplified"),
2673 build_string ("gb2312"),
2674 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2675 staticpro (&Vcharset_chinese_gb12345);
2676 Vcharset_chinese_gb12345 =
2677 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
2678 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2679 build_string ("G1"),
2680 build_string ("GB 12345)"),
2681 build_string ("GB 12345-1990"),
2682 build_string ("GB12345\\(\\.1990\\)?-0"),
2683 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2684 staticpro (&Vcharset_japanese_jisx0208);
2685 Vcharset_japanese_jisx0208 =
2686 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
2687 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2688 build_string ("JISX0208"),
2689 build_string ("JIS X0208:1983 (Japanese)"),
2690 build_string ("JIS X0208:1983 Japanese Kanji"),
2691 build_string ("jisx0208\\.1983"),
2698 CONVERSION_IDENTICAL);
2700 staticpro (&Vcharset_japanese_jisx0208_1990);
2701 Vcharset_japanese_jisx0208_1990 =
2702 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2703 Qmap_jis_x0208_1990, 94, 2,
2704 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2705 build_string ("JISX0208-1990"),
2706 build_string ("JIS X0208:1990 (Japanese)"),
2707 build_string ("JIS X0208:1990 Japanese Kanji"),
2708 build_string ("jisx0208\\.1990"),
2710 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
2711 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
2712 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
2713 Vcharset_jis_x0208 /* Qnil */,
2714 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
2716 staticpro (&Vcharset_korean_ksc5601);
2717 Vcharset_korean_ksc5601 =
2718 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
2719 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2720 build_string ("KSC5601"),
2721 build_string ("KSC5601 (Korean"),
2722 build_string ("KSC5601 Korean Hangul and Hanja"),
2723 build_string ("ksc5601"),
2724 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2725 staticpro (&Vcharset_japanese_jisx0212);
2726 Vcharset_japanese_jisx0212 =
2727 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
2728 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2729 build_string ("JISX0212"),
2730 build_string ("JISX0212 (Japanese)"),
2731 build_string ("JISX0212 Japanese Supplement"),
2732 build_string ("jisx0212"),
2733 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2735 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2736 staticpro (&Vcharset_chinese_cns11643_1);
2737 Vcharset_chinese_cns11643_1 =
2738 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
2739 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2740 build_string ("CNS11643-1"),
2741 build_string ("CNS11643-1 (Chinese traditional)"),
2743 ("CNS 11643 Plane 1 Chinese traditional"),
2744 build_string (CHINESE_CNS_PLANE_RE("1")),
2745 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2746 staticpro (&Vcharset_chinese_cns11643_2);
2747 Vcharset_chinese_cns11643_2 =
2748 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
2749 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2750 build_string ("CNS11643-2"),
2751 build_string ("CNS11643-2 (Chinese traditional)"),
2753 ("CNS 11643 Plane 2 Chinese traditional"),
2754 build_string (CHINESE_CNS_PLANE_RE("2")),
2755 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2757 staticpro (&Vcharset_latin_tcvn5712);
2758 Vcharset_latin_tcvn5712 =
2759 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2760 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2761 build_string ("TCVN 5712"),
2762 build_string ("TCVN 5712 (VSCII-2)"),
2763 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2764 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2765 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2766 staticpro (&Vcharset_latin_viscii_lower);
2767 Vcharset_latin_viscii_lower =
2768 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2769 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2770 build_string ("VISCII lower"),
2771 build_string ("VISCII lower (Vietnamese)"),
2772 build_string ("VISCII lower (Vietnamese)"),
2773 build_string ("MULEVISCII-LOWER"),
2774 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2775 staticpro (&Vcharset_latin_viscii_upper);
2776 Vcharset_latin_viscii_upper =
2777 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2778 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2779 build_string ("VISCII upper"),
2780 build_string ("VISCII upper (Vietnamese)"),
2781 build_string ("VISCII upper (Vietnamese)"),
2782 build_string ("MULEVISCII-UPPER"),
2783 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2784 staticpro (&Vcharset_latin_viscii);
2785 Vcharset_latin_viscii =
2786 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2787 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2788 build_string ("VISCII"),
2789 build_string ("VISCII 1.1 (Vietnamese)"),
2790 build_string ("VISCII 1.1 (Vietnamese)"),
2791 build_string ("VISCII1\\.1"),
2792 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2793 staticpro (&Vcharset_chinese_big5);
2794 Vcharset_chinese_big5 =
2795 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
2796 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2797 build_string ("Big5"),
2798 build_string ("Big5"),
2799 build_string ("Big5 Chinese traditional"),
2800 build_string ("big5-0"),
2802 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2803 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2805 staticpro (&Vcharset_ethiopic_ucs);
2806 Vcharset_ethiopic_ucs =
2807 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
2808 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2809 build_string ("Ethiopic (UCS)"),
2810 build_string ("Ethiopic (UCS)"),
2811 build_string ("Ethiopic of UCS"),
2812 build_string ("Ethiopic-Unicode"),
2813 Qnil, 0x1200, 0x137F, 0, 0,
2814 Qnil, CONVERSION_IDENTICAL);
2816 staticpro (&Vcharset_chinese_big5_1);
2817 Vcharset_chinese_big5_1 =
2818 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
2819 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
2820 build_string ("Big5"),
2821 build_string ("Big5 (Level-1)"),
2823 ("Big5 Level-1 Chinese traditional"),
2824 build_string ("big5"),
2825 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2826 staticpro (&Vcharset_chinese_big5_2);
2827 Vcharset_chinese_big5_2 =
2828 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
2829 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
2830 build_string ("Big5"),
2831 build_string ("Big5 (Level-2)"),
2833 ("Big5 Level-2 Chinese traditional"),
2834 build_string ("big5"),
2835 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2837 #ifdef ENABLE_COMPOSITE_CHARS
2838 /* #### For simplicity, we put composite chars into a 96x96 charset.
2839 This is going to lead to problems because you can run out of
2840 room, esp. as we don't yet recycle numbers. */
2841 staticpro (&Vcharset_composite);
2842 Vcharset_composite =
2843 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
2844 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2845 build_string ("Composite"),
2846 build_string ("Composite characters"),
2847 build_string ("Composite characters"),
2850 /* #### not dumped properly */
2851 composite_char_row_next = 32;
2852 composite_char_col_next = 32;
2854 Vcomposite_char_string2char_hash_table =
2855 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2856 Vcomposite_char_char2string_hash_table =
2857 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2858 staticpro (&Vcomposite_char_string2char_hash_table);
2859 staticpro (&Vcomposite_char_char2string_hash_table);
2860 #endif /* ENABLE_COMPOSITE_CHARS */