1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_latin_viscii;
72 Lisp_Object Vcharset_latin_tcvn5712;
73 Lisp_Object Vcharset_latin_viscii_lower;
74 Lisp_Object Vcharset_latin_viscii_upper;
75 Lisp_Object Vcharset_jis_x0208;
76 Lisp_Object Vcharset_chinese_big5;
77 Lisp_Object Vcharset_ethiopic_ucs;
79 Lisp_Object Vcharset_chinese_big5_1;
80 Lisp_Object Vcharset_chinese_big5_2;
82 #ifdef ENABLE_COMPOSITE_CHARS
83 Lisp_Object Vcharset_composite;
85 /* Hash tables for composite chars. One maps string representing
86 composed chars to their equivalent chars; one goes the
88 Lisp_Object Vcomposite_char_char2string_hash_table;
89 Lisp_Object Vcomposite_char_string2char_hash_table;
91 static int composite_char_row_next;
92 static int composite_char_col_next;
94 #endif /* ENABLE_COMPOSITE_CHARS */
96 struct charset_lookup *chlook;
98 static const struct lrecord_description charset_lookup_description_1[] = {
99 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
101 NUM_LEADING_BYTES+4*128
108 static const struct struct_description charset_lookup_description = {
109 sizeof (struct charset_lookup),
110 charset_lookup_description_1
114 /* Table of number of bytes in the string representation of a character
115 indexed by the first byte of that representation.
117 rep_bytes_by_first_byte(c) is more efficient than the equivalent
118 canonical computation:
120 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
122 const Bytecount rep_bytes_by_first_byte[0xA0] =
123 { /* 0x00 - 0x7f are for straight ASCII */
124 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
125 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
126 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
127 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 /* 0x80 - 0x8f are for Dimension-1 official charsets */
134 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
136 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
138 /* 0x90 - 0x9d are for Dimension-2 official charsets */
139 /* 0x9e is for Dimension-1 private charsets */
140 /* 0x9f is for Dimension-2 private charsets */
141 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
147 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
149 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
153 if (XVECTOR_LENGTH (v) > ccs_len)
156 for (i = 0; i < XVECTOR_LENGTH (v); i++)
158 Lisp_Object c = XVECTOR_DATA(v)[i];
160 if (!NILP (c) && !CHARP (c))
164 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
176 put_char_ccs_code_point (Lisp_Object character,
177 Lisp_Object ccs, Lisp_Object value)
179 if (!EQ (XCHARSET_NAME (ccs), Qmap_ucs)
181 || (XCHAR (character) != XINT (value)))
183 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
187 { /* obsolete representation: value must be a list of bytes */
188 Lisp_Object ret = Fcar (value);
192 signal_simple_error ("Invalid value for coded-charset", value);
193 code_point = XINT (ret);
194 if (XCHARSET_GRAPHIC (ccs) == 1)
202 signal_simple_error ("Invalid value for coded-charset",
206 signal_simple_error ("Invalid value for coded-charset",
209 if (XCHARSET_GRAPHIC (ccs) == 1)
211 code_point = (code_point << 8) | j;
214 value = make_int (code_point);
216 else if (INTP (value))
218 code_point = XINT (value);
219 if (XCHARSET_GRAPHIC (ccs) == 1)
221 code_point &= 0x7F7F7F7F;
222 value = make_int (code_point);
226 signal_simple_error ("Invalid value for coded-charset", value);
230 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
233 decoding_table_remove_char (ccs, XINT (cpos));
236 decoding_table_put_char (ccs, code_point, character);
242 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
244 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
245 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
247 if (VECTORP (decoding_table))
249 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
253 decoding_table_remove_char (ccs, XINT (cpos));
256 if (CHAR_TABLEP (encoding_table))
258 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
266 int leading_code_private_11;
269 Lisp_Object Qcharsetp;
271 /* Qdoc_string, Qdimension, Qchars defined in general.c */
272 Lisp_Object Qregistry, Qfinal, Qgraphic;
273 Lisp_Object Qdirection;
274 Lisp_Object Qreverse_direction_charset;
275 Lisp_Object Qleading_byte;
276 Lisp_Object Qshort_name, Qlong_name;
279 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
280 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
314 Qvietnamese_viscii_lower,
315 Qvietnamese_viscii_upper,
325 Lisp_Object Ql2r, Qr2l;
327 Lisp_Object Vcharset_hash_table;
329 /* Composite characters are characters constructed by overstriking two
330 or more regular characters.
332 1) The old Mule implementation involves storing composite characters
333 in a buffer as a tag followed by all of the actual characters
334 used to make up the composite character. I think this is a bad
335 idea; it greatly complicates code that wants to handle strings
336 one character at a time because it has to deal with the possibility
337 of great big ungainly characters. It's much more reasonable to
338 simply store an index into a table of composite characters.
340 2) The current implementation only allows for 16,384 separate
341 composite characters over the lifetime of the XEmacs process.
342 This could become a potential problem if the user
343 edited lots of different files that use composite characters.
344 Due to FSF bogosity, increasing the number of allowable
345 composite characters under Mule would decrease the number
346 of possible faces that can exist. Mule already has shrunk
347 this to 2048, and further shrinkage would become uncomfortable.
348 No such problems exist in XEmacs.
350 Composite characters could be represented as 0x80 C1 C2 C3,
351 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
352 for slightly under 2^20 (one million) composite characters
353 over the XEmacs process lifetime, and you only need to
354 increase the size of a Mule character from 19 to 21 bits.
355 Or you could use 0x80 C1 C2 C3 C4, allowing for about
356 85 million (slightly over 2^26) composite characters. */
359 /************************************************************************/
360 /* Basic Emchar functions */
361 /************************************************************************/
363 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
364 string in STR. Returns the number of bytes stored.
365 Do not call this directly. Use the macro set_charptr_emchar() instead.
369 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
384 else if ( c <= 0x7ff )
386 *p++ = (c >> 6) | 0xc0;
387 *p++ = (c & 0x3f) | 0x80;
389 else if ( c <= 0xffff )
391 *p++ = (c >> 12) | 0xe0;
392 *p++ = ((c >> 6) & 0x3f) | 0x80;
393 *p++ = (c & 0x3f) | 0x80;
395 else if ( c <= 0x1fffff )
397 *p++ = (c >> 18) | 0xf0;
398 *p++ = ((c >> 12) & 0x3f) | 0x80;
399 *p++ = ((c >> 6) & 0x3f) | 0x80;
400 *p++ = (c & 0x3f) | 0x80;
402 else if ( c <= 0x3ffffff )
404 *p++ = (c >> 24) | 0xf8;
405 *p++ = ((c >> 18) & 0x3f) | 0x80;
406 *p++ = ((c >> 12) & 0x3f) | 0x80;
407 *p++ = ((c >> 6) & 0x3f) | 0x80;
408 *p++ = (c & 0x3f) | 0x80;
412 *p++ = (c >> 30) | 0xfc;
413 *p++ = ((c >> 24) & 0x3f) | 0x80;
414 *p++ = ((c >> 18) & 0x3f) | 0x80;
415 *p++ = ((c >> 12) & 0x3f) | 0x80;
416 *p++ = ((c >> 6) & 0x3f) | 0x80;
417 *p++ = (c & 0x3f) | 0x80;
420 BREAKUP_CHAR (c, charset, c1, c2);
421 lb = CHAR_LEADING_BYTE (c);
422 if (LEADING_BYTE_PRIVATE_P (lb))
423 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
425 if (EQ (charset, Vcharset_control_1))
434 /* Return the first character from a Mule-encoded string in STR,
435 assuming it's non-ASCII. Do not call this directly.
436 Use the macro charptr_emchar() instead. */
439 non_ascii_charptr_emchar (const Bufbyte *str)
452 else if ( b >= 0xf8 )
457 else if ( b >= 0xf0 )
462 else if ( b >= 0xe0 )
467 else if ( b >= 0xc0 )
477 for( ; len > 0; len-- )
480 ch = ( ch << 6 ) | ( b & 0x3f );
484 Bufbyte i0 = *str, i1, i2 = 0;
487 if (i0 == LEADING_BYTE_CONTROL_1)
488 return (Emchar) (*++str - 0x20);
490 if (LEADING_BYTE_PREFIX_P (i0))
495 charset = CHARSET_BY_LEADING_BYTE (i0);
496 if (XCHARSET_DIMENSION (charset) == 2)
499 return MAKE_CHAR (charset, i1, i2);
503 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
504 Do not call this directly. Use the macro valid_char_p() instead. */
508 non_ascii_valid_char_p (Emchar ch)
512 /* Must have only lowest 19 bits set */
516 f1 = CHAR_FIELD1 (ch);
517 f2 = CHAR_FIELD2 (ch);
518 f3 = CHAR_FIELD3 (ch);
524 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
525 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
526 f2 > MAX_CHAR_FIELD2_PRIVATE)
531 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
532 f2 <= MAX_CHAR_FIELD2_PRIVATE))
536 NOTE: This takes advantage of the fact that
537 FIELD2_TO_OFFICIAL_LEADING_BYTE and
538 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
540 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
541 if (EQ (charset, Qnil))
543 return (XCHARSET_CHARS (charset) == 96);
549 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
550 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
551 f1 > MAX_CHAR_FIELD1_PRIVATE)
553 if (f2 < 0x20 || f3 < 0x20)
556 #ifdef ENABLE_COMPOSITE_CHARS
557 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
559 if (UNBOUNDP (Fgethash (make_int (ch),
560 Vcomposite_char_char2string_hash_table,
565 #endif /* ENABLE_COMPOSITE_CHARS */
567 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
568 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
571 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
573 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
576 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
578 if (EQ (charset, Qnil))
580 return (XCHARSET_CHARS (charset) == 96);
586 /************************************************************************/
587 /* Basic string functions */
588 /************************************************************************/
590 /* Copy the character pointed to by SRC into DST. Do not call this
591 directly. Use the macro charptr_copy_char() instead.
592 Return the number of bytes copied. */
595 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
597 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
599 for (i = bytes; i; i--, dst++, src++)
605 /************************************************************************/
606 /* streams of Emchars */
607 /************************************************************************/
609 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
610 The functions below are not meant to be called directly; use
611 the macros in insdel.h. */
614 Lstream_get_emchar_1 (Lstream *stream, int ch)
616 Bufbyte str[MAX_EMCHAR_LEN];
617 Bufbyte *strptr = str;
620 str[0] = (Bufbyte) ch;
622 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
624 int c = Lstream_getc (stream);
625 bufpos_checking_assert (c >= 0);
626 *++strptr = (Bufbyte) c;
628 return charptr_emchar (str);
632 Lstream_fput_emchar (Lstream *stream, Emchar ch)
634 Bufbyte str[MAX_EMCHAR_LEN];
635 Bytecount len = set_charptr_emchar (str, ch);
636 return Lstream_write (stream, str, len);
640 Lstream_funget_emchar (Lstream *stream, Emchar ch)
642 Bufbyte str[MAX_EMCHAR_LEN];
643 Bytecount len = set_charptr_emchar (str, ch);
644 Lstream_unread (stream, str, len);
648 /************************************************************************/
650 /************************************************************************/
653 mark_charset (Lisp_Object obj)
655 Lisp_Charset *cs = XCHARSET (obj);
657 mark_object (cs->short_name);
658 mark_object (cs->long_name);
659 mark_object (cs->doc_string);
660 mark_object (cs->registry);
661 mark_object (cs->ccl_program);
663 mark_object (cs->decoding_table);
664 mark_object (cs->mother);
670 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
672 Lisp_Charset *cs = XCHARSET (obj);
676 error ("printing unreadable object #<charset %s 0x%x>",
677 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
680 write_c_string ("#<charset ", printcharfun);
681 print_internal (CHARSET_NAME (cs), printcharfun, 0);
682 write_c_string (" ", printcharfun);
683 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
684 write_c_string (" ", printcharfun);
685 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
686 write_c_string (" ", printcharfun);
687 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
688 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
690 CHARSET_DIMENSION (cs),
691 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
692 CHARSET_COLUMNS (cs),
693 CHARSET_GRAPHIC (cs),
695 write_c_string (buf, printcharfun);
696 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
697 sprintf (buf, " 0x%x>", cs->header.uid);
698 write_c_string (buf, printcharfun);
701 static const struct lrecord_description charset_description[] = {
702 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
703 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
704 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
705 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
706 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
707 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
708 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
710 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
711 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
716 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
717 mark_charset, print_charset, 0, 0, 0,
721 /* Make a new charset. */
722 /* #### SJT Should generic properties be allowed? */
724 make_charset (Charset_ID id, Lisp_Object name,
725 unsigned short chars, unsigned char dimension,
726 unsigned char columns, unsigned char graphic,
727 Bufbyte final, unsigned char direction, Lisp_Object short_name,
728 Lisp_Object long_name, Lisp_Object doc,
730 Lisp_Object decoding_table,
731 Emchar min_code, Emchar max_code,
732 Emchar code_offset, unsigned char byte_offset,
733 Lisp_Object mother, unsigned char conversion)
736 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
740 XSETCHARSET (obj, cs);
742 CHARSET_ID (cs) = id;
743 CHARSET_NAME (cs) = name;
744 CHARSET_SHORT_NAME (cs) = short_name;
745 CHARSET_LONG_NAME (cs) = long_name;
746 CHARSET_CHARS (cs) = chars;
747 CHARSET_DIMENSION (cs) = dimension;
748 CHARSET_DIRECTION (cs) = direction;
749 CHARSET_COLUMNS (cs) = columns;
750 CHARSET_GRAPHIC (cs) = graphic;
751 CHARSET_FINAL (cs) = final;
752 CHARSET_DOC_STRING (cs) = doc;
753 CHARSET_REGISTRY (cs) = reg;
754 CHARSET_CCL_PROGRAM (cs) = Qnil;
755 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
757 CHARSET_DECODING_TABLE(cs) = Qunbound;
758 CHARSET_MIN_CODE (cs) = min_code;
759 CHARSET_MAX_CODE (cs) = max_code;
760 CHARSET_CODE_OFFSET (cs) = code_offset;
761 CHARSET_BYTE_OFFSET (cs) = byte_offset;
762 CHARSET_MOTHER (cs) = mother;
763 CHARSET_CONVERSION (cs) = conversion;
767 if (id == LEADING_BYTE_ASCII)
768 CHARSET_REP_BYTES (cs) = 1;
770 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
772 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
777 /* some charsets do not have final characters. This includes
778 ASCII, Control-1, Composite, and the two faux private
780 unsigned char iso2022_type
781 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
783 if (code_offset == 0)
785 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
786 chlook->charset_by_attributes[iso2022_type][final] = obj;
790 (chlook->charset_by_attributes[iso2022_type][final][direction]));
791 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
795 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
796 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
798 /* Some charsets are "faux" and don't have names or really exist at
799 all except in the leading-byte table. */
801 Fputhash (name, obj, Vcharset_hash_table);
806 get_unallocated_leading_byte (int dimension)
811 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
814 lb = chlook->next_allocated_leading_byte++;
818 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
821 lb = chlook->next_allocated_1_byte_leading_byte++;
825 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
828 lb = chlook->next_allocated_2_byte_leading_byte++;
834 ("No more character sets free for this dimension",
835 make_int (dimension));
841 /* Number of Big5 characters which have the same code in 1st byte. */
843 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
846 decode_defined_char (Lisp_Object ccs, int code_point)
848 int dim = XCHARSET_DIMENSION (ccs);
849 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
857 = get_ccs_octet_table (decoding_table, ccs,
858 (code_point >> (dim * 8)) & 255);
860 if (CHARP (decoding_table))
861 return XCHAR (decoding_table);
864 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
866 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
868 if ( EQ (mother, Vcharset_ucs) )
869 return DECODE_CHAR (mother, code_point);
871 return decode_defined_char (mother, code_point);
878 decode_builtin_char (Lisp_Object charset, int code_point)
880 Lisp_Object mother = XCHARSET_MOTHER (charset);
883 if ( XCHARSET_MAX_CODE (charset) > 0 )
885 if ( CHARSETP (mother) )
887 int code = code_point;
889 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
891 int row = code_point >> 8;
892 int cell = code_point & 255;
896 else if (row < 16 + 32 + 30)
897 code = (row - (16 + 32)) * 94 + cell - 33;
898 else if (row < 18 + 32 + 30)
900 else if (row < 18 + 32 + 60)
901 code = (row - (18 + 32)) * 94 + cell - 33;
903 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
905 int plane = code_point >> 16;
906 int row = (code_point >> 8) & 255;
907 int cell = code_point & 255;
911 else if (row < 16 + 32 + 30)
913 = (plane - 33) * 94 * 60
914 + (row - (16 + 32)) * 94
916 else if (row < 18 + 32 + 30)
918 else if (row < 18 + 32 + 60)
920 = (plane - 33) * 94 * 60
921 + (row - (18 + 32)) * 94
925 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
930 = (XCHARSET_DIMENSION (charset) == 1
932 code_point - XCHARSET_BYTE_OFFSET (charset)
934 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
935 * XCHARSET_CHARS (charset)
936 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
937 + XCHARSET_CODE_OFFSET (charset);
938 if ((cid < XCHARSET_MIN_CODE (charset))
939 || (XCHARSET_MAX_CODE (charset) < cid))
944 else if ((final = XCHARSET_FINAL (charset)) >= '0')
946 if (XCHARSET_DIMENSION (charset) == 1)
948 switch (XCHARSET_CHARS (charset))
952 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
955 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
963 switch (XCHARSET_CHARS (charset))
966 return MIN_CHAR_94x94
967 + (final - '0') * 94 * 94
968 + (((code_point >> 8) & 0x7F) - 33) * 94
969 + ((code_point & 0x7F) - 33);
971 return MIN_CHAR_96x96
972 + (final - '0') * 96 * 96
973 + (((code_point >> 8) & 0x7F) - 32) * 96
974 + ((code_point & 0x7F) - 32);
986 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
988 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
991 if ( CHAR_TABLEP (encoding_table)
992 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
997 Lisp_Object mother = XCHARSET_MOTHER (charset);
998 int min = XCHARSET_MIN_CODE (charset);
999 int max = XCHARSET_MAX_CODE (charset);
1002 if ( CHARSETP (mother) )
1004 if (XCHARSET_FINAL (charset) >= '0')
1005 code = charset_code_point (mother, ch, 1);
1007 code = charset_code_point (mother, ch, defined_only);
1009 else if (defined_only)
1011 else if ( ((max == 0) && CHARSETP (mother)
1012 && (XCHARSET_FINAL (charset) == 0))
1013 || ((min <= ch) && (ch <= max)) )
1015 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1016 || ((min <= code) && (code <= max)) )
1018 int d = code - XCHARSET_CODE_OFFSET (charset);
1020 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1022 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1024 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1026 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1029 int cell = d % 94 + 33;
1035 return (row << 8) | cell;
1037 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1038 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1039 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1040 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1041 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1043 int plane = d / (94 * 60) + 33;
1044 int row = (d % (94 * 60)) / 94;
1045 int cell = d % 94 + 33;
1051 return (plane << 16) | (row << 8) | cell;
1053 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1055 ( (d / (94 * 94) + 33) << 16)
1056 | ((d / 94 % 94 + 33) << 8)
1058 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1060 ( (d / (96 * 96) + 32) << 16)
1061 | ((d / 96 % 96 + 32) << 8)
1063 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1065 ( (d / (94 * 94 * 94) + 33) << 24)
1066 | ((d / (94 * 94) % 94 + 33) << 16)
1067 | ((d / 94 % 94 + 33) << 8)
1069 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1071 ( (d / (96 * 96 * 96) + 32) << 24)
1072 | ((d / (96 * 96) % 96 + 32) << 16)
1073 | ((d / 96 % 96 + 32) << 8)
1077 printf ("Unknown CCS-conversion %d is specified!",
1078 XCHARSET_CONVERSION (charset));
1082 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1083 ( XCHARSET_MIN_CODE (charset) == 0 )
1085 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1086 (XCHARSET_CODE_OFFSET (charset)
1087 == XCHARSET_MIN_CODE (charset))
1092 if (XCHARSET_DIMENSION (charset) == 1)
1094 if (XCHARSET_CHARS (charset) == 94)
1096 if (((d = ch - (MIN_CHAR_94
1097 + (XCHARSET_FINAL (charset) - '0') * 94))
1102 else if (XCHARSET_CHARS (charset) == 96)
1104 if (((d = ch - (MIN_CHAR_96
1105 + (XCHARSET_FINAL (charset) - '0') * 96))
1113 else if (XCHARSET_DIMENSION (charset) == 2)
1115 if (XCHARSET_CHARS (charset) == 94)
1117 if (((d = ch - (MIN_CHAR_94x94
1119 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1122 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1124 else if (XCHARSET_CHARS (charset) == 96)
1126 if (((d = ch - (MIN_CHAR_96x96
1128 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1131 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1142 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1144 if (c <= MAX_CHAR_BASIC_LATIN)
1146 *charset = Vcharset_ascii;
1151 *charset = Vcharset_control_1;
1156 *charset = Vcharset_latin_iso8859_1;
1160 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1162 *charset = Vcharset_hebrew_iso8859_8;
1163 return c - MIN_CHAR_HEBREW + 0x20;
1166 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1168 *charset = Vcharset_thai_tis620;
1169 return c - MIN_CHAR_THAI + 0x20;
1172 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1173 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1175 return list2 (Vcharset_katakana_jisx0201,
1176 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1179 else if (c <= MAX_CHAR_BMP)
1181 *charset = Vcharset_ucs_bmp;
1184 else if (c <= MAX_CHAR_SMP)
1186 *charset = Vcharset_ucs_smp;
1187 return c - MIN_CHAR_SMP;
1189 else if (c <= MAX_CHAR_SIP)
1191 *charset = Vcharset_ucs_sip;
1192 return c - MIN_CHAR_SIP;
1194 else if (c < MIN_CHAR_94)
1196 *charset = Vcharset_ucs;
1199 else if (c <= MAX_CHAR_94)
1201 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1202 ((c - MIN_CHAR_94) / 94) + '0',
1203 CHARSET_LEFT_TO_RIGHT);
1204 if (!NILP (*charset))
1205 return ((c - MIN_CHAR_94) % 94) + 33;
1208 *charset = Vcharset_ucs;
1212 else if (c <= MAX_CHAR_96)
1214 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1215 ((c - MIN_CHAR_96) / 96) + '0',
1216 CHARSET_LEFT_TO_RIGHT);
1217 if (!NILP (*charset))
1218 return ((c - MIN_CHAR_96) % 96) + 32;
1221 *charset = Vcharset_ucs;
1225 else if (c <= MAX_CHAR_94x94)
1228 = CHARSET_BY_ATTRIBUTES (94, 2,
1229 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1230 CHARSET_LEFT_TO_RIGHT);
1231 if (!NILP (*charset))
1232 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1233 | (((c - MIN_CHAR_94x94) % 94) + 33);
1236 *charset = Vcharset_ucs;
1240 else if (c <= MAX_CHAR_96x96)
1243 = CHARSET_BY_ATTRIBUTES (96, 2,
1244 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1245 CHARSET_LEFT_TO_RIGHT);
1246 if (!NILP (*charset))
1247 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1248 | (((c - MIN_CHAR_96x96) % 96) + 32);
1251 *charset = Vcharset_ucs;
1257 *charset = Vcharset_ucs;
1262 Lisp_Object Vdefault_coded_charset_priority_list;
1266 /************************************************************************/
1267 /* Basic charset Lisp functions */
1268 /************************************************************************/
1270 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1271 Return non-nil if OBJECT is a charset.
1275 return CHARSETP (object) ? Qt : Qnil;
1278 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1279 Retrieve the charset of the given name.
1280 If CHARSET-OR-NAME is a charset object, it is simply returned.
1281 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1282 nil is returned. Otherwise the associated charset object is returned.
1286 if (CHARSETP (charset_or_name))
1287 return charset_or_name;
1289 CHECK_SYMBOL (charset_or_name);
1290 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1293 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1294 Retrieve the charset of the given name.
1295 Same as `find-charset' except an error is signalled if there is no such
1296 charset instead of returning nil.
1300 Lisp_Object charset = Ffind_charset (name);
1303 signal_simple_error ("No such charset", name);
1307 /* We store the charsets in hash tables with the names as the key and the
1308 actual charset object as the value. Occasionally we need to use them
1309 in a list format. These routines provide us with that. */
1310 struct charset_list_closure
1312 Lisp_Object *charset_list;
1316 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1317 void *charset_list_closure)
1319 /* This function can GC */
1320 struct charset_list_closure *chcl =
1321 (struct charset_list_closure*) charset_list_closure;
1322 Lisp_Object *charset_list = chcl->charset_list;
1324 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1328 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1329 Return a list of the names of all defined charsets.
1333 Lisp_Object charset_list = Qnil;
1334 struct gcpro gcpro1;
1335 struct charset_list_closure charset_list_closure;
1337 GCPRO1 (charset_list);
1338 charset_list_closure.charset_list = &charset_list;
1339 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1340 &charset_list_closure);
1343 return charset_list;
1346 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1347 Return the name of charset CHARSET.
1351 return XCHARSET_NAME (Fget_charset (charset));
1354 /* #### SJT Should generic properties be allowed? */
1355 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1356 Define a new character set.
1357 This function is for use with Mule support.
1358 NAME is a symbol, the name by which the character set is normally referred.
1359 DOC-STRING is a string describing the character set.
1360 PROPS is a property list, describing the specific nature of the
1361 character set. Recognized properties are:
1363 'short-name Short version of the charset name (ex: Latin-1)
1364 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1365 'registry A regular expression matching the font registry field for
1367 'dimension Number of octets used to index a character in this charset.
1368 Either 1 or 2. Defaults to 1.
1369 If UTF-2000 feature is enabled, 3 or 4 are also available.
1370 'columns Number of columns used to display a character in this charset.
1371 Only used in TTY mode. (Under X, the actual width of a
1372 character can be derived from the font used to display the
1373 characters.) If unspecified, defaults to the dimension
1374 (this is almost always the correct value).
1375 'chars Number of characters in each dimension (94 or 96).
1376 Defaults to 94. Note that if the dimension is 2, the
1377 character set thus described is 94x94 or 96x96.
1378 If UTF-2000 feature is enabled, 128 or 256 are also available.
1379 'final Final byte of ISO 2022 escape sequence. Must be
1380 supplied. Each combination of (DIMENSION, CHARS) defines a
1381 separate namespace for final bytes. Note that ISO
1382 2022 restricts the final byte to the range
1383 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1384 dimension == 2. Note also that final bytes in the range
1385 0x30 - 0x3F are reserved for user-defined (not official)
1387 'graphic 0 (use left half of font on output) or 1 (use right half
1388 of font on output). Defaults to 0. For example, for
1389 a font whose registry is ISO8859-1, the left half
1390 (octets 0x20 - 0x7F) is the `ascii' character set, while
1391 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1392 character set. With 'graphic set to 0, the octets
1393 will have their high bit cleared; with it set to 1,
1394 the octets will have their high bit set.
1395 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1397 'ccl-program A compiled CCL program used to convert a character in
1398 this charset into an index into the font. This is in
1399 addition to the 'graphic property. The CCL program
1400 is passed the octets of the character, with the high
1401 bit cleared and set depending upon whether the value
1402 of the 'graphic property is 0 or 1.
1403 'mother [UTF-2000 only] Base coded-charset.
1404 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1405 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1406 'code-offset [UTF-2000 only] Offset for a code-point of a base
1408 'conversion [UTF-2000 only] Conversion for a code-point of a base
1409 coded-charset (94x60 or 94x94x60).
1411 (name, doc_string, props))
1413 int id = 0, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1414 int direction = CHARSET_LEFT_TO_RIGHT;
1415 Lisp_Object registry = Qnil;
1416 Lisp_Object charset;
1417 Lisp_Object ccl_program = Qnil;
1418 Lisp_Object short_name = Qnil, long_name = Qnil;
1419 Lisp_Object mother = Qnil;
1420 int min_code = 0, max_code = 0, code_offset = 0;
1421 int byte_offset = -1;
1424 CHECK_SYMBOL (name);
1425 if (!NILP (doc_string))
1426 CHECK_STRING (doc_string);
1428 charset = Ffind_charset (name);
1429 if (!NILP (charset))
1430 signal_simple_error ("Cannot redefine existing charset", name);
1433 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1435 if (EQ (keyword, Qshort_name))
1437 CHECK_STRING (value);
1441 if (EQ (keyword, Qlong_name))
1443 CHECK_STRING (value);
1447 else if (EQ (keyword, Qiso_ir))
1451 id = - XINT (value);
1455 else if (EQ (keyword, Qdimension))
1458 dimension = XINT (value);
1459 if (dimension < 1 ||
1466 signal_simple_error ("Invalid value for 'dimension", value);
1469 else if (EQ (keyword, Qchars))
1472 chars = XINT (value);
1473 if (chars != 94 && chars != 96
1475 && chars != 128 && chars != 256
1478 signal_simple_error ("Invalid value for 'chars", value);
1481 else if (EQ (keyword, Qcolumns))
1484 columns = XINT (value);
1485 if (columns != 1 && columns != 2)
1486 signal_simple_error ("Invalid value for 'columns", value);
1489 else if (EQ (keyword, Qgraphic))
1492 graphic = XINT (value);
1500 signal_simple_error ("Invalid value for 'graphic", value);
1503 else if (EQ (keyword, Qregistry))
1505 CHECK_STRING (value);
1509 else if (EQ (keyword, Qdirection))
1511 if (EQ (value, Ql2r))
1512 direction = CHARSET_LEFT_TO_RIGHT;
1513 else if (EQ (value, Qr2l))
1514 direction = CHARSET_RIGHT_TO_LEFT;
1516 signal_simple_error ("Invalid value for 'direction", value);
1519 else if (EQ (keyword, Qfinal))
1521 CHECK_CHAR_COERCE_INT (value);
1522 final = XCHAR (value);
1523 if (final < '0' || final > '~')
1524 signal_simple_error ("Invalid value for 'final", value);
1528 else if (EQ (keyword, Qmother))
1530 mother = Fget_charset (value);
1533 else if (EQ (keyword, Qmin_code))
1536 min_code = XUINT (value);
1539 else if (EQ (keyword, Qmax_code))
1542 max_code = XUINT (value);
1545 else if (EQ (keyword, Qcode_offset))
1548 code_offset = XUINT (value);
1551 else if (EQ (keyword, Qconversion))
1553 if (EQ (value, Q94x60))
1554 conversion = CONVERSION_94x60;
1555 else if (EQ (value, Q94x94x60))
1556 conversion = CONVERSION_94x94x60;
1558 signal_simple_error ("Unrecognized conversion", value);
1562 else if (EQ (keyword, Qccl_program))
1564 struct ccl_program test_ccl;
1566 if (setup_ccl_program (&test_ccl, value) < 0)
1567 signal_simple_error ("Invalid value for 'ccl-program", value);
1568 ccl_program = value;
1572 signal_simple_error ("Unrecognized property", keyword);
1578 error ("'final must be specified");
1580 if (dimension == 2 && final > 0x5F)
1582 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1585 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1586 CHARSET_LEFT_TO_RIGHT)) ||
1587 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1588 CHARSET_RIGHT_TO_LEFT)))
1590 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1593 id = get_unallocated_leading_byte (dimension);
1595 if (NILP (doc_string))
1596 doc_string = build_string ("");
1598 if (NILP (registry))
1599 registry = build_string ("");
1601 if (NILP (short_name))
1602 XSETSTRING (short_name, XSYMBOL (name)->name);
1604 if (NILP (long_name))
1605 long_name = doc_string;
1608 columns = dimension;
1610 if (byte_offset < 0)
1614 else if (chars == 96)
1620 charset = make_charset (id, name, chars, dimension, columns, graphic,
1621 final, direction, short_name, long_name,
1622 doc_string, registry,
1623 Qnil, min_code, max_code, code_offset, byte_offset,
1624 mother, conversion);
1625 if (!NILP (ccl_program))
1626 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1630 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1632 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1633 NEW-NAME is the name of the new charset. Return the new charset.
1635 (charset, new_name))
1637 Lisp_Object new_charset = Qnil;
1638 int id, chars, dimension, columns, graphic, final;
1640 Lisp_Object registry, doc_string, short_name, long_name;
1643 charset = Fget_charset (charset);
1644 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1645 signal_simple_error ("Charset already has reverse-direction charset",
1648 CHECK_SYMBOL (new_name);
1649 if (!NILP (Ffind_charset (new_name)))
1650 signal_simple_error ("Cannot redefine existing charset", new_name);
1652 cs = XCHARSET (charset);
1654 chars = CHARSET_CHARS (cs);
1655 dimension = CHARSET_DIMENSION (cs);
1656 columns = CHARSET_COLUMNS (cs);
1657 id = get_unallocated_leading_byte (dimension);
1659 graphic = CHARSET_GRAPHIC (cs);
1660 final = CHARSET_FINAL (cs);
1661 direction = CHARSET_RIGHT_TO_LEFT;
1662 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1663 direction = CHARSET_LEFT_TO_RIGHT;
1664 doc_string = CHARSET_DOC_STRING (cs);
1665 short_name = CHARSET_SHORT_NAME (cs);
1666 long_name = CHARSET_LONG_NAME (cs);
1667 registry = CHARSET_REGISTRY (cs);
1669 new_charset = make_charset (id, new_name, chars, dimension, columns,
1670 graphic, final, direction, short_name, long_name,
1671 doc_string, registry,
1673 CHARSET_DECODING_TABLE(cs),
1674 CHARSET_MIN_CODE(cs),
1675 CHARSET_MAX_CODE(cs),
1676 CHARSET_CODE_OFFSET(cs),
1677 CHARSET_BYTE_OFFSET(cs),
1679 CHARSET_CONVERSION (cs)
1681 Qnil, 0, 0, 0, 0, Qnil, 0
1685 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1686 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1691 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1692 Define symbol ALIAS as an alias for CHARSET.
1696 CHECK_SYMBOL (alias);
1697 charset = Fget_charset (charset);
1698 return Fputhash (alias, charset, Vcharset_hash_table);
1701 /* #### Reverse direction charsets not yet implemented. */
1703 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1705 Return the reverse-direction charset parallel to CHARSET, if any.
1706 This is the charset with the same properties (in particular, the same
1707 dimension, number of characters per dimension, and final byte) as
1708 CHARSET but whose characters are displayed in the opposite direction.
1712 charset = Fget_charset (charset);
1713 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1717 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1718 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1719 If DIRECTION is omitted, both directions will be checked (left-to-right
1720 will be returned if character sets exist for both directions).
1722 (dimension, chars, final, direction))
1724 int dm, ch, fi, di = -1;
1725 Lisp_Object obj = Qnil;
1727 CHECK_INT (dimension);
1728 dm = XINT (dimension);
1729 if (dm < 1 || dm > 2)
1730 signal_simple_error ("Invalid value for DIMENSION", dimension);
1734 if (ch != 94 && ch != 96)
1735 signal_simple_error ("Invalid value for CHARS", chars);
1737 CHECK_CHAR_COERCE_INT (final);
1739 if (fi < '0' || fi > '~')
1740 signal_simple_error ("Invalid value for FINAL", final);
1742 if (EQ (direction, Ql2r))
1743 di = CHARSET_LEFT_TO_RIGHT;
1744 else if (EQ (direction, Qr2l))
1745 di = CHARSET_RIGHT_TO_LEFT;
1746 else if (!NILP (direction))
1747 signal_simple_error ("Invalid value for DIRECTION", direction);
1749 if (dm == 2 && fi > 0x5F)
1751 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1755 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1757 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1760 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1763 return XCHARSET_NAME (obj);
1767 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1768 Return short name of CHARSET.
1772 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1775 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1776 Return long name of CHARSET.
1780 return XCHARSET_LONG_NAME (Fget_charset (charset));
1783 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1784 Return description of CHARSET.
1788 return XCHARSET_DOC_STRING (Fget_charset (charset));
1791 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1792 Return dimension of CHARSET.
1796 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1799 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1800 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1801 Recognized properties are those listed in `make-charset', as well as
1802 'name and 'doc-string.
1808 charset = Fget_charset (charset);
1809 cs = XCHARSET (charset);
1811 CHECK_SYMBOL (prop);
1812 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1813 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1814 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1815 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1816 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1817 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1818 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1819 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1820 Qnil : make_char (CHARSET_FINAL (cs));
1821 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1822 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1823 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1824 if (EQ (prop, Qdirection))
1825 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1826 if (EQ (prop, Qreverse_direction_charset))
1828 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1829 /* #### Is this translation OK? If so, error checking sufficient? */
1830 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1833 if (EQ (prop, Qmother))
1834 return CHARSET_MOTHER (cs);
1835 if (EQ (prop, Qmin_code))
1836 return make_int (CHARSET_MIN_CODE (cs));
1837 if (EQ (prop, Qmax_code))
1838 return make_int (CHARSET_MAX_CODE (cs));
1840 signal_simple_error ("Unrecognized charset property name", prop);
1841 return Qnil; /* not reached */
1844 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1845 Return charset identification number of CHARSET.
1849 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1852 /* #### We need to figure out which properties we really want to
1855 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1856 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1858 (charset, ccl_program))
1860 struct ccl_program test_ccl;
1862 charset = Fget_charset (charset);
1863 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1864 signal_simple_error ("Invalid ccl-program", ccl_program);
1865 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1870 invalidate_charset_font_caches (Lisp_Object charset)
1872 /* Invalidate font cache entries for charset on all devices. */
1873 Lisp_Object devcons, concons, hash_table;
1874 DEVICE_LOOP_NO_BREAK (devcons, concons)
1876 struct device *d = XDEVICE (XCAR (devcons));
1877 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1878 if (!UNBOUNDP (hash_table))
1879 Fclrhash (hash_table);
1883 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1884 Set the 'registry property of CHARSET to REGISTRY.
1886 (charset, registry))
1888 charset = Fget_charset (charset);
1889 CHECK_STRING (registry);
1890 XCHARSET_REGISTRY (charset) = registry;
1891 invalidate_charset_font_caches (charset);
1892 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1897 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1898 Return mapping-table of CHARSET.
1902 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1905 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1906 Set mapping-table of CHARSET to TABLE.
1910 struct Lisp_Charset *cs;
1914 charset = Fget_charset (charset);
1915 cs = XCHARSET (charset);
1919 CHARSET_DECODING_TABLE(cs) = Qnil;
1922 else if (VECTORP (table))
1924 int ccs_len = CHARSET_BYTE_SIZE (cs);
1925 int ret = decoding_table_check_elements (table,
1926 CHARSET_DIMENSION (cs),
1931 signal_simple_error ("Too big table", table);
1933 signal_simple_error ("Invalid element is found", table);
1935 signal_simple_error ("Something wrong", table);
1937 CHARSET_DECODING_TABLE(cs) = Qnil;
1940 signal_error (Qwrong_type_argument,
1941 list2 (build_translated_string ("vector-or-nil-p"),
1944 byte_offset = CHARSET_BYTE_OFFSET (cs);
1945 switch (CHARSET_DIMENSION (cs))
1948 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1950 Lisp_Object c = XVECTOR_DATA(table)[i];
1953 Fput_char_attribute (c, XCHARSET_NAME (charset),
1954 make_int (i + byte_offset));
1958 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1960 Lisp_Object v = XVECTOR_DATA(table)[i];
1966 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1968 Lisp_Object c = XVECTOR_DATA(v)[j];
1972 (c, XCHARSET_NAME (charset),
1973 make_int ( ( (i + byte_offset) << 8 )
1979 Fput_char_attribute (v, XCHARSET_NAME (charset),
1980 make_int (i + byte_offset));
1989 /************************************************************************/
1990 /* Lisp primitives for working with characters */
1991 /************************************************************************/
1994 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
1995 Make a character from CHARSET and code-point CODE.
1996 If DEFINED_ONLY is non-nil, builtin character is not returned.
1997 If corresponding character is not found, nil is returned.
1999 (charset, code, defined_only))
2003 charset = Fget_charset (charset);
2006 if (XCHARSET_GRAPHIC (charset) == 1)
2008 if (NILP (defined_only))
2009 c = DECODE_CHAR (charset, c);
2011 c = decode_defined_char (charset, c);
2012 return c >= 0 ? make_char (c) : Qnil;
2015 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2016 Make a builtin character from CHARSET and code-point CODE.
2022 charset = Fget_charset (charset);
2024 if (EQ (charset, Vcharset_latin_viscii))
2026 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2032 (ret = Fget_char_attribute (chr,
2033 Vcharset_latin_viscii_lower,
2036 charset = Vcharset_latin_viscii_lower;
2040 (ret = Fget_char_attribute (chr,
2041 Vcharset_latin_viscii_upper,
2044 charset = Vcharset_latin_viscii_upper;
2051 if (XCHARSET_GRAPHIC (charset) == 1)
2054 c = decode_builtin_char (charset, c);
2055 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2059 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2060 Make a character from CHARSET and octets ARG1 and ARG2.
2061 ARG2 is required only for characters from two-dimensional charsets.
2062 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2063 character s with caron.
2065 (charset, arg1, arg2))
2069 int lowlim, highlim;
2071 charset = Fget_charset (charset);
2072 cs = XCHARSET (charset);
2074 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2075 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2077 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2079 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2080 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2083 /* It is useful (and safe, according to Olivier Galibert) to strip
2084 the 8th bit off ARG1 and ARG2 because it allows programmers to
2085 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2086 Latin 2 code of the character. */
2094 if (a1 < lowlim || a1 > highlim)
2095 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2097 if (CHARSET_DIMENSION (cs) == 1)
2101 ("Charset is of dimension one; second octet must be nil", arg2);
2102 return make_char (MAKE_CHAR (charset, a1, 0));
2111 a2 = XINT (arg2) & 0x7f;
2113 if (a2 < lowlim || a2 > highlim)
2114 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2116 return make_char (MAKE_CHAR (charset, a1, a2));
2119 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2120 Return the character set of CHARACTER.
2124 CHECK_CHAR_COERCE_INT (character);
2126 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2129 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2130 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2131 N defaults to 0 if omitted.
2135 Lisp_Object charset;
2138 CHECK_CHAR_COERCE_INT (character);
2140 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2142 if (NILP (n) || EQ (n, Qzero))
2143 return make_int (octet0);
2144 else if (EQ (n, make_int (1)))
2145 return make_int (octet1);
2147 signal_simple_error ("Octet number must be 0 or 1", n);
2151 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2152 Return code-point of CHARACTER in specified CHARSET.
2154 (character, charset, defined_only))
2158 CHECK_CHAR_COERCE_INT (character);
2159 charset = Fget_charset (charset);
2160 code_point = charset_code_point (charset, XCHAR (character),
2161 !NILP (defined_only));
2162 if (code_point >= 0)
2163 return make_int (code_point);
2169 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2170 Return list of charset and one or two position-codes of CHARACTER.
2174 /* This function can GC */
2175 struct gcpro gcpro1, gcpro2;
2176 Lisp_Object charset = Qnil;
2177 Lisp_Object rc = Qnil;
2185 GCPRO2 (charset, rc);
2186 CHECK_CHAR_COERCE_INT (character);
2189 code_point = ENCODE_CHAR (XCHAR (character), charset);
2190 dimension = XCHARSET_DIMENSION (charset);
2191 while (dimension > 0)
2193 rc = Fcons (make_int (code_point & 255), rc);
2197 rc = Fcons (XCHARSET_NAME (charset), rc);
2199 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2201 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2203 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2207 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2216 #ifdef ENABLE_COMPOSITE_CHARS
2217 /************************************************************************/
2218 /* composite character functions */
2219 /************************************************************************/
2222 lookup_composite_char (Bufbyte *str, int len)
2224 Lisp_Object lispstr = make_string (str, len);
2225 Lisp_Object ch = Fgethash (lispstr,
2226 Vcomposite_char_string2char_hash_table,
2232 if (composite_char_row_next >= 128)
2233 signal_simple_error ("No more composite chars available", lispstr);
2234 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2235 composite_char_col_next);
2236 Fputhash (make_char (emch), lispstr,
2237 Vcomposite_char_char2string_hash_table);
2238 Fputhash (lispstr, make_char (emch),
2239 Vcomposite_char_string2char_hash_table);
2240 composite_char_col_next++;
2241 if (composite_char_col_next >= 128)
2243 composite_char_col_next = 32;
2244 composite_char_row_next++;
2253 composite_char_string (Emchar ch)
2255 Lisp_Object str = Fgethash (make_char (ch),
2256 Vcomposite_char_char2string_hash_table,
2258 assert (!UNBOUNDP (str));
2262 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2263 Convert a string into a single composite character.
2264 The character is the result of overstriking all the characters in
2269 CHECK_STRING (string);
2270 return make_char (lookup_composite_char (XSTRING_DATA (string),
2271 XSTRING_LENGTH (string)));
2274 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2275 Return a string of the characters comprising a composite character.
2283 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2284 signal_simple_error ("Must be composite char", ch);
2285 return composite_char_string (emch);
2287 #endif /* ENABLE_COMPOSITE_CHARS */
2290 /************************************************************************/
2291 /* initialization */
2292 /************************************************************************/
2295 syms_of_mule_charset (void)
2297 INIT_LRECORD_IMPLEMENTATION (charset);
2299 DEFSUBR (Fcharsetp);
2300 DEFSUBR (Ffind_charset);
2301 DEFSUBR (Fget_charset);
2302 DEFSUBR (Fcharset_list);
2303 DEFSUBR (Fcharset_name);
2304 DEFSUBR (Fmake_charset);
2305 DEFSUBR (Fmake_reverse_direction_charset);
2306 /* DEFSUBR (Freverse_direction_charset); */
2307 DEFSUBR (Fdefine_charset_alias);
2308 DEFSUBR (Fcharset_from_attributes);
2309 DEFSUBR (Fcharset_short_name);
2310 DEFSUBR (Fcharset_long_name);
2311 DEFSUBR (Fcharset_description);
2312 DEFSUBR (Fcharset_dimension);
2313 DEFSUBR (Fcharset_property);
2314 DEFSUBR (Fcharset_id);
2315 DEFSUBR (Fset_charset_ccl_program);
2316 DEFSUBR (Fset_charset_registry);
2318 DEFSUBR (Fcharset_mapping_table);
2319 DEFSUBR (Fset_charset_mapping_table);
2323 DEFSUBR (Fdecode_char);
2324 DEFSUBR (Fdecode_builtin_char);
2325 DEFSUBR (Fencode_char);
2327 DEFSUBR (Fmake_char);
2328 DEFSUBR (Fchar_charset);
2329 DEFSUBR (Fchar_octet);
2330 DEFSUBR (Fsplit_char);
2332 #ifdef ENABLE_COMPOSITE_CHARS
2333 DEFSUBR (Fmake_composite_char);
2334 DEFSUBR (Fcomposite_char_string);
2337 defsymbol (&Qcharsetp, "charsetp");
2338 defsymbol (&Qregistry, "registry");
2339 defsymbol (&Qfinal, "final");
2340 defsymbol (&Qgraphic, "graphic");
2341 defsymbol (&Qdirection, "direction");
2342 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2343 defsymbol (&Qshort_name, "short-name");
2344 defsymbol (&Qlong_name, "long-name");
2345 defsymbol (&Qiso_ir, "iso-ir");
2347 defsymbol (&Qmother, "mother");
2348 defsymbol (&Qmin_code, "min-code");
2349 defsymbol (&Qmax_code, "max-code");
2350 defsymbol (&Qcode_offset, "code-offset");
2351 defsymbol (&Qconversion, "conversion");
2352 defsymbol (&Q94x60, "94x60");
2353 defsymbol (&Q94x94x60, "94x94x60");
2356 defsymbol (&Ql2r, "l2r");
2357 defsymbol (&Qr2l, "r2l");
2359 /* Charsets, compatible with FSF 20.3
2360 Naming convention is Script-Charset[-Edition] */
2361 defsymbol (&Qascii, "ascii");
2362 defsymbol (&Qcontrol_1, "control-1");
2363 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2364 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2365 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2366 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2367 defsymbol (&Qthai_tis620, "thai-tis620");
2368 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2369 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2370 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2371 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2372 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2373 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2374 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2375 defsymbol (&Qmap_jis_x0208_1978, "=jis-x0208-1978");
2376 defsymbol (&Qmap_gb2312, "=gb2312");
2377 defsymbol (&Qmap_gb12345, "=gb12345");
2378 defsymbol (&Qmap_jis_x0208_1983, "=jis-x0208-1983");
2379 defsymbol (&Qmap_ks_x1001, "=ks-x1001");
2380 defsymbol (&Qmap_jis_x0212, "=jis-x0212");
2381 defsymbol (&Qmap_cns11643_1, "=cns11643-1");
2382 defsymbol (&Qmap_cns11643_2, "=cns11643-2");
2384 defsymbol (&Qmap_ucs, "=ucs");
2385 defsymbol (&Qucs, "ucs");
2386 defsymbol (&Qucs_bmp, "ucs-bmp");
2387 defsymbol (&Qucs_smp, "ucs-smp");
2388 defsymbol (&Qucs_sip, "ucs-sip");
2389 defsymbol (&Qlatin_viscii, "latin-viscii");
2390 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2391 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2392 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2393 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2394 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2395 defsymbol (&Qmap_jis_x0208, "=jis-x0208");
2396 defsymbol (&Qmap_jis_x0208_1990, "=jis-x0208-1990");
2397 defsymbol (&Qmap_big5, "=big5");
2398 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2400 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2401 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2403 defsymbol (&Qcomposite, "composite");
2407 vars_of_mule_charset (void)
2414 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2415 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2417 /* Table of charsets indexed by leading byte. */
2418 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2419 chlook->charset_by_leading_byte[i] = Qnil;
2422 /* Table of charsets indexed by type/final-byte. */
2423 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2424 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2425 chlook->charset_by_attributes[i][j] = Qnil;
2427 /* Table of charsets indexed by type/final-byte/direction. */
2428 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2429 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2430 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2431 chlook->charset_by_attributes[i][j][k] = Qnil;
2435 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2437 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2438 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2442 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2443 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2444 Leading-code of private TYPE9N charset of column-width 1.
2446 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2450 Vdefault_coded_charset_priority_list = Qnil;
2451 DEFVAR_LISP ("default-coded-charset-priority-list",
2452 &Vdefault_coded_charset_priority_list /*
2453 Default order of preferred coded-character-sets.
2459 complex_vars_of_mule_charset (void)
2461 staticpro (&Vcharset_hash_table);
2462 Vcharset_hash_table =
2463 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2465 /* Predefined character sets. We store them into variables for
2469 staticpro (&Vcharset_ucs);
2471 make_charset (LEADING_BYTE_UCS, Qmap_ucs, 256, 4,
2472 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2473 build_string ("UCS"),
2474 build_string ("UCS"),
2475 build_string ("ISO/IEC 10646"),
2477 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2478 staticpro (&Vcharset_ucs_bmp);
2480 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2481 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2482 build_string ("BMP"),
2483 build_string ("UCS-BMP"),
2484 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2486 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2487 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2488 staticpro (&Vcharset_ucs_smp);
2490 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2491 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2492 build_string ("SMP"),
2493 build_string ("UCS-SMP"),
2494 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2495 build_string ("UCS00-1"),
2496 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2497 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2498 staticpro (&Vcharset_ucs_sip);
2500 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2501 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2502 build_string ("SIP"),
2503 build_string ("UCS-SIP"),
2504 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2505 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2506 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2507 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2509 # define MIN_CHAR_THAI 0
2510 # define MAX_CHAR_THAI 0
2511 /* # define MIN_CHAR_HEBREW 0 */
2512 /* # define MAX_CHAR_HEBREW 0 */
2513 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2514 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2516 staticpro (&Vcharset_ascii);
2518 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2519 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2520 build_string ("ASCII"),
2521 build_string ("ASCII)"),
2522 build_string ("ASCII (ISO646 IRV)"),
2523 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2524 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2525 staticpro (&Vcharset_control_1);
2526 Vcharset_control_1 =
2527 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2528 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2529 build_string ("C1"),
2530 build_string ("Control characters"),
2531 build_string ("Control characters 128-191"),
2533 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2534 staticpro (&Vcharset_latin_iso8859_1);
2535 Vcharset_latin_iso8859_1 =
2536 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2537 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2538 build_string ("Latin-1"),
2539 build_string ("ISO8859-1 (Latin-1)"),
2540 build_string ("ISO8859-1 (Latin-1)"),
2541 build_string ("iso8859-1"),
2542 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2543 staticpro (&Vcharset_latin_iso8859_2);
2544 Vcharset_latin_iso8859_2 =
2545 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2546 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2547 build_string ("Latin-2"),
2548 build_string ("ISO8859-2 (Latin-2)"),
2549 build_string ("ISO8859-2 (Latin-2)"),
2550 build_string ("iso8859-2"),
2551 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2552 staticpro (&Vcharset_latin_iso8859_3);
2553 Vcharset_latin_iso8859_3 =
2554 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2555 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2556 build_string ("Latin-3"),
2557 build_string ("ISO8859-3 (Latin-3)"),
2558 build_string ("ISO8859-3 (Latin-3)"),
2559 build_string ("iso8859-3"),
2560 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2561 staticpro (&Vcharset_latin_iso8859_4);
2562 Vcharset_latin_iso8859_4 =
2563 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2564 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2565 build_string ("Latin-4"),
2566 build_string ("ISO8859-4 (Latin-4)"),
2567 build_string ("ISO8859-4 (Latin-4)"),
2568 build_string ("iso8859-4"),
2569 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2570 staticpro (&Vcharset_thai_tis620);
2571 Vcharset_thai_tis620 =
2572 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2573 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2574 build_string ("TIS620"),
2575 build_string ("TIS620 (Thai)"),
2576 build_string ("TIS620.2529 (Thai)"),
2577 build_string ("tis620"),
2578 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2579 staticpro (&Vcharset_greek_iso8859_7);
2580 Vcharset_greek_iso8859_7 =
2581 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2582 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2583 build_string ("ISO8859-7"),
2584 build_string ("ISO8859-7 (Greek)"),
2585 build_string ("ISO8859-7 (Greek)"),
2586 build_string ("iso8859-7"),
2587 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2588 staticpro (&Vcharset_arabic_iso8859_6);
2589 Vcharset_arabic_iso8859_6 =
2590 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2591 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2592 build_string ("ISO8859-6"),
2593 build_string ("ISO8859-6 (Arabic)"),
2594 build_string ("ISO8859-6 (Arabic)"),
2595 build_string ("iso8859-6"),
2596 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2597 staticpro (&Vcharset_hebrew_iso8859_8);
2598 Vcharset_hebrew_iso8859_8 =
2599 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2600 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2601 build_string ("ISO8859-8"),
2602 build_string ("ISO8859-8 (Hebrew)"),
2603 build_string ("ISO8859-8 (Hebrew)"),
2604 build_string ("iso8859-8"),
2606 0 /* MIN_CHAR_HEBREW */,
2607 0 /* MAX_CHAR_HEBREW */, 0, 32,
2608 Qnil, CONVERSION_IDENTICAL);
2609 staticpro (&Vcharset_katakana_jisx0201);
2610 Vcharset_katakana_jisx0201 =
2611 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2612 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2613 build_string ("JISX0201 Kana"),
2614 build_string ("JISX0201.1976 (Japanese Kana)"),
2615 build_string ("JISX0201.1976 Japanese Kana"),
2616 build_string ("jisx0201\\.1976"),
2617 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2618 staticpro (&Vcharset_latin_jisx0201);
2619 Vcharset_latin_jisx0201 =
2620 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2621 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2622 build_string ("JISX0201 Roman"),
2623 build_string ("JISX0201.1976 (Japanese Roman)"),
2624 build_string ("JISX0201.1976 Japanese Roman"),
2625 build_string ("jisx0201\\.1976"),
2626 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2627 staticpro (&Vcharset_cyrillic_iso8859_5);
2628 Vcharset_cyrillic_iso8859_5 =
2629 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2630 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2631 build_string ("ISO8859-5"),
2632 build_string ("ISO8859-5 (Cyrillic)"),
2633 build_string ("ISO8859-5 (Cyrillic)"),
2634 build_string ("iso8859-5"),
2635 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2636 staticpro (&Vcharset_latin_iso8859_9);
2637 Vcharset_latin_iso8859_9 =
2638 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2639 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2640 build_string ("Latin-5"),
2641 build_string ("ISO8859-9 (Latin-5)"),
2642 build_string ("ISO8859-9 (Latin-5)"),
2643 build_string ("iso8859-9"),
2644 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2646 staticpro (&Vcharset_jis_x0208);
2647 Vcharset_jis_x0208 =
2648 make_charset (LEADING_BYTE_JIS_X0208,
2649 Qmap_jis_x0208, 94, 2,
2650 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2651 build_string ("JIS X0208"),
2652 build_string ("JIS X0208 Common"),
2653 build_string ("JIS X0208 Common part"),
2654 build_string ("jisx0208\\.1990"),
2656 MIN_CHAR_JIS_X0208_1990,
2657 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2658 Qnil, CONVERSION_94x94);
2660 staticpro (&Vcharset_japanese_jisx0208_1978);
2661 Vcharset_japanese_jisx0208_1978 =
2662 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2663 Qmap_jis_x0208_1978, 94, 2,
2664 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2665 build_string ("JIS X0208:1978"),
2666 build_string ("JIS X0208:1978 (Japanese)"),
2668 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2669 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2676 CONVERSION_IDENTICAL);
2677 staticpro (&Vcharset_chinese_gb2312);
2678 Vcharset_chinese_gb2312 =
2679 make_charset (LEADING_BYTE_CHINESE_GB2312, Qmap_gb2312, 94, 2,
2680 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2681 build_string ("GB2312"),
2682 build_string ("GB2312)"),
2683 build_string ("GB2312 Chinese simplified"),
2684 build_string ("gb2312"),
2685 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2686 staticpro (&Vcharset_chinese_gb12345);
2687 Vcharset_chinese_gb12345 =
2688 make_charset (LEADING_BYTE_CHINESE_GB12345, Qmap_gb12345, 94, 2,
2689 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2690 build_string ("G1"),
2691 build_string ("GB 12345)"),
2692 build_string ("GB 12345-1990"),
2693 build_string ("GB12345\\(\\.1990\\)?-0"),
2694 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2695 staticpro (&Vcharset_japanese_jisx0208);
2696 Vcharset_japanese_jisx0208 =
2697 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qmap_jis_x0208_1983, 94, 2,
2698 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2699 build_string ("JISX0208"),
2700 build_string ("JIS X0208:1983 (Japanese)"),
2701 build_string ("JIS X0208:1983 Japanese Kanji"),
2702 build_string ("jisx0208\\.1983"),
2709 CONVERSION_IDENTICAL);
2711 staticpro (&Vcharset_japanese_jisx0208_1990);
2712 Vcharset_japanese_jisx0208_1990 =
2713 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2714 Qmap_jis_x0208_1990, 94, 2,
2715 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2716 build_string ("JISX0208-1990"),
2717 build_string ("JIS X0208:1990 (Japanese)"),
2718 build_string ("JIS X0208:1990 Japanese Kanji"),
2719 build_string ("jisx0208\\.1990"),
2721 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
2722 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
2723 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
2724 Vcharset_jis_x0208 /* Qnil */,
2725 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
2727 staticpro (&Vcharset_korean_ksc5601);
2728 Vcharset_korean_ksc5601 =
2729 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qmap_ks_x1001, 94, 2,
2730 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2731 build_string ("KSC5601"),
2732 build_string ("KSC5601 (Korean"),
2733 build_string ("KSC5601 Korean Hangul and Hanja"),
2734 build_string ("ksc5601"),
2735 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2736 staticpro (&Vcharset_japanese_jisx0212);
2737 Vcharset_japanese_jisx0212 =
2738 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qmap_jis_x0212, 94, 2,
2739 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2740 build_string ("JISX0212"),
2741 build_string ("JISX0212 (Japanese)"),
2742 build_string ("JISX0212 Japanese Supplement"),
2743 build_string ("jisx0212"),
2744 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2746 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2747 staticpro (&Vcharset_chinese_cns11643_1);
2748 Vcharset_chinese_cns11643_1 =
2749 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qmap_cns11643_1, 94, 2,
2750 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2751 build_string ("CNS11643-1"),
2752 build_string ("CNS11643-1 (Chinese traditional)"),
2754 ("CNS 11643 Plane 1 Chinese traditional"),
2755 build_string (CHINESE_CNS_PLANE_RE("1")),
2756 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2757 staticpro (&Vcharset_chinese_cns11643_2);
2758 Vcharset_chinese_cns11643_2 =
2759 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qmap_cns11643_2, 94, 2,
2760 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2761 build_string ("CNS11643-2"),
2762 build_string ("CNS11643-2 (Chinese traditional)"),
2764 ("CNS 11643 Plane 2 Chinese traditional"),
2765 build_string (CHINESE_CNS_PLANE_RE("2")),
2766 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2768 staticpro (&Vcharset_latin_tcvn5712);
2769 Vcharset_latin_tcvn5712 =
2770 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2771 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2772 build_string ("TCVN 5712"),
2773 build_string ("TCVN 5712 (VSCII-2)"),
2774 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2775 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2776 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2777 staticpro (&Vcharset_latin_viscii_lower);
2778 Vcharset_latin_viscii_lower =
2779 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2780 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2781 build_string ("VISCII lower"),
2782 build_string ("VISCII lower (Vietnamese)"),
2783 build_string ("VISCII lower (Vietnamese)"),
2784 build_string ("MULEVISCII-LOWER"),
2785 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2786 staticpro (&Vcharset_latin_viscii_upper);
2787 Vcharset_latin_viscii_upper =
2788 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2789 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2790 build_string ("VISCII upper"),
2791 build_string ("VISCII upper (Vietnamese)"),
2792 build_string ("VISCII upper (Vietnamese)"),
2793 build_string ("MULEVISCII-UPPER"),
2794 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2795 staticpro (&Vcharset_latin_viscii);
2796 Vcharset_latin_viscii =
2797 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2798 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2799 build_string ("VISCII"),
2800 build_string ("VISCII 1.1 (Vietnamese)"),
2801 build_string ("VISCII 1.1 (Vietnamese)"),
2802 build_string ("VISCII1\\.1"),
2803 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2804 staticpro (&Vcharset_chinese_big5);
2805 Vcharset_chinese_big5 =
2806 make_charset (LEADING_BYTE_CHINESE_BIG5, Qmap_big5, 256, 2,
2807 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2808 build_string ("Big5"),
2809 build_string ("Big5"),
2810 build_string ("Big5 Chinese traditional"),
2811 build_string ("big5-0"),
2813 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2814 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2816 staticpro (&Vcharset_ethiopic_ucs);
2817 Vcharset_ethiopic_ucs =
2818 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
2819 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2820 build_string ("Ethiopic (UCS)"),
2821 build_string ("Ethiopic (UCS)"),
2822 build_string ("Ethiopic of UCS"),
2823 build_string ("Ethiopic-Unicode"),
2824 Qnil, 0x1200, 0x137F, 0, 0,
2825 Qnil, CONVERSION_IDENTICAL);
2827 staticpro (&Vcharset_chinese_big5_1);
2828 Vcharset_chinese_big5_1 =
2829 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
2830 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
2831 build_string ("Big5"),
2832 build_string ("Big5 (Level-1)"),
2834 ("Big5 Level-1 Chinese traditional"),
2835 build_string ("big5"),
2836 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2837 staticpro (&Vcharset_chinese_big5_2);
2838 Vcharset_chinese_big5_2 =
2839 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
2840 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
2841 build_string ("Big5"),
2842 build_string ("Big5 (Level-2)"),
2844 ("Big5 Level-2 Chinese traditional"),
2845 build_string ("big5"),
2846 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2848 #ifdef ENABLE_COMPOSITE_CHARS
2849 /* #### For simplicity, we put composite chars into a 96x96 charset.
2850 This is going to lead to problems because you can run out of
2851 room, esp. as we don't yet recycle numbers. */
2852 staticpro (&Vcharset_composite);
2853 Vcharset_composite =
2854 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
2855 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2856 build_string ("Composite"),
2857 build_string ("Composite characters"),
2858 build_string ("Composite characters"),
2861 /* #### not dumped properly */
2862 composite_char_row_next = 32;
2863 composite_char_col_next = 32;
2865 Vcomposite_char_string2char_hash_table =
2866 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2867 Vcomposite_char_char2string_hash_table =
2868 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2869 staticpro (&Vcomposite_char_string2char_hash_table);
2870 staticpro (&Vcomposite_char_char2string_hash_table);
2871 #endif /* ENABLE_COMPOSITE_CHARS */