1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_smp;
70 Lisp_Object Vcharset_ucs_sip;
71 Lisp_Object Vcharset_latin_viscii;
72 Lisp_Object Vcharset_latin_tcvn5712;
73 Lisp_Object Vcharset_latin_viscii_lower;
74 Lisp_Object Vcharset_latin_viscii_upper;
75 Lisp_Object Vcharset_jis_x0208;
76 Lisp_Object Vcharset_chinese_big5;
77 Lisp_Object Vcharset_ethiopic_ucs;
79 Lisp_Object Vcharset_chinese_big5_1;
80 Lisp_Object Vcharset_chinese_big5_2;
82 #ifdef ENABLE_COMPOSITE_CHARS
83 Lisp_Object Vcharset_composite;
85 /* Hash tables for composite chars. One maps string representing
86 composed chars to their equivalent chars; one goes the
88 Lisp_Object Vcomposite_char_char2string_hash_table;
89 Lisp_Object Vcomposite_char_string2char_hash_table;
91 static int composite_char_row_next;
92 static int composite_char_col_next;
94 #endif /* ENABLE_COMPOSITE_CHARS */
96 struct charset_lookup *chlook;
98 static const struct lrecord_description charset_lookup_description_1[] = {
99 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
101 NUM_LEADING_BYTES+4*128
108 static const struct struct_description charset_lookup_description = {
109 sizeof (struct charset_lookup),
110 charset_lookup_description_1
114 /* Table of number of bytes in the string representation of a character
115 indexed by the first byte of that representation.
117 rep_bytes_by_first_byte(c) is more efficient than the equivalent
118 canonical computation:
120 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
122 const Bytecount rep_bytes_by_first_byte[0xA0] =
123 { /* 0x00 - 0x7f are for straight ASCII */
124 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
125 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
126 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
127 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 /* 0x80 - 0x8f are for Dimension-1 official charsets */
134 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
136 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
138 /* 0x90 - 0x9d are for Dimension-2 official charsets */
139 /* 0x9e is for Dimension-1 private charsets */
140 /* 0x9f is for Dimension-2 private charsets */
141 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
147 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
149 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
153 if (XVECTOR_LENGTH (v) > ccs_len)
156 for (i = 0; i < XVECTOR_LENGTH (v); i++)
158 Lisp_Object c = XVECTOR_DATA(v)[i];
160 if (!NILP (c) && !CHARP (c))
164 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
176 put_char_ccs_code_point (Lisp_Object character,
177 Lisp_Object ccs, Lisp_Object value)
179 if (!EQ (XCHARSET_NAME (ccs), Qmap_ucs)
181 || (XCHAR (character) != XINT (value)))
183 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
187 { /* obsolete representation: value must be a list of bytes */
188 Lisp_Object ret = Fcar (value);
192 signal_simple_error ("Invalid value for coded-charset", value);
193 code_point = XINT (ret);
194 if (XCHARSET_GRAPHIC (ccs) == 1)
202 signal_simple_error ("Invalid value for coded-charset",
206 signal_simple_error ("Invalid value for coded-charset",
209 if (XCHARSET_GRAPHIC (ccs) == 1)
211 code_point = (code_point << 8) | j;
214 value = make_int (code_point);
216 else if (INTP (value))
218 code_point = XINT (value);
219 if (XCHARSET_GRAPHIC (ccs) == 1)
221 code_point &= 0x7F7F7F7F;
222 value = make_int (code_point);
226 signal_simple_error ("Invalid value for coded-charset", value);
230 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
233 decoding_table_remove_char (ccs, XINT (cpos));
236 decoding_table_put_char (ccs, code_point, character);
242 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
244 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
245 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
247 if (VECTORP (decoding_table))
249 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
253 decoding_table_remove_char (ccs, XINT (cpos));
256 if (CHAR_TABLEP (encoding_table))
258 put_char_id_table (XCHAR_TABLE(encoding_table), character, Qunbound);
266 int leading_code_private_11;
269 Lisp_Object Qcharsetp;
271 /* Qdoc_string, Qdimension, Qchars defined in general.c */
272 Lisp_Object Qregistry, Qfinal, Qgraphic;
273 Lisp_Object Qdirection;
274 Lisp_Object Qreverse_direction_charset;
275 Lisp_Object Qleading_byte;
276 Lisp_Object Qshort_name, Qlong_name;
279 Lisp_Object Qmin_code, Qmax_code, Qcode_offset;
280 Lisp_Object Qmother, Qconversion, Q94x60, Q94x94x60;
314 Qvietnamese_viscii_lower,
315 Qvietnamese_viscii_upper,
325 Lisp_Object Ql2r, Qr2l;
327 Lisp_Object Vcharset_hash_table;
329 /* Composite characters are characters constructed by overstriking two
330 or more regular characters.
332 1) The old Mule implementation involves storing composite characters
333 in a buffer as a tag followed by all of the actual characters
334 used to make up the composite character. I think this is a bad
335 idea; it greatly complicates code that wants to handle strings
336 one character at a time because it has to deal with the possibility
337 of great big ungainly characters. It's much more reasonable to
338 simply store an index into a table of composite characters.
340 2) The current implementation only allows for 16,384 separate
341 composite characters over the lifetime of the XEmacs process.
342 This could become a potential problem if the user
343 edited lots of different files that use composite characters.
344 Due to FSF bogosity, increasing the number of allowable
345 composite characters under Mule would decrease the number
346 of possible faces that can exist. Mule already has shrunk
347 this to 2048, and further shrinkage would become uncomfortable.
348 No such problems exist in XEmacs.
350 Composite characters could be represented as 0x80 C1 C2 C3,
351 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
352 for slightly under 2^20 (one million) composite characters
353 over the XEmacs process lifetime, and you only need to
354 increase the size of a Mule character from 19 to 21 bits.
355 Or you could use 0x80 C1 C2 C3 C4, allowing for about
356 85 million (slightly over 2^26) composite characters. */
359 /************************************************************************/
360 /* Basic Emchar functions */
361 /************************************************************************/
363 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
364 string in STR. Returns the number of bytes stored.
365 Do not call this directly. Use the macro set_charptr_emchar() instead.
369 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
384 else if ( c <= 0x7ff )
386 *p++ = (c >> 6) | 0xc0;
387 *p++ = (c & 0x3f) | 0x80;
389 else if ( c <= 0xffff )
391 *p++ = (c >> 12) | 0xe0;
392 *p++ = ((c >> 6) & 0x3f) | 0x80;
393 *p++ = (c & 0x3f) | 0x80;
395 else if ( c <= 0x1fffff )
397 *p++ = (c >> 18) | 0xf0;
398 *p++ = ((c >> 12) & 0x3f) | 0x80;
399 *p++ = ((c >> 6) & 0x3f) | 0x80;
400 *p++ = (c & 0x3f) | 0x80;
402 else if ( c <= 0x3ffffff )
404 *p++ = (c >> 24) | 0xf8;
405 *p++ = ((c >> 18) & 0x3f) | 0x80;
406 *p++ = ((c >> 12) & 0x3f) | 0x80;
407 *p++ = ((c >> 6) & 0x3f) | 0x80;
408 *p++ = (c & 0x3f) | 0x80;
412 *p++ = (c >> 30) | 0xfc;
413 *p++ = ((c >> 24) & 0x3f) | 0x80;
414 *p++ = ((c >> 18) & 0x3f) | 0x80;
415 *p++ = ((c >> 12) & 0x3f) | 0x80;
416 *p++ = ((c >> 6) & 0x3f) | 0x80;
417 *p++ = (c & 0x3f) | 0x80;
420 BREAKUP_CHAR (c, charset, c1, c2);
421 lb = CHAR_LEADING_BYTE (c);
422 if (LEADING_BYTE_PRIVATE_P (lb))
423 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
425 if (EQ (charset, Vcharset_control_1))
434 /* Return the first character from a Mule-encoded string in STR,
435 assuming it's non-ASCII. Do not call this directly.
436 Use the macro charptr_emchar() instead. */
439 non_ascii_charptr_emchar (const Bufbyte *str)
452 else if ( b >= 0xf8 )
457 else if ( b >= 0xf0 )
462 else if ( b >= 0xe0 )
467 else if ( b >= 0xc0 )
477 for( ; len > 0; len-- )
480 ch = ( ch << 6 ) | ( b & 0x3f );
484 Bufbyte i0 = *str, i1, i2 = 0;
487 if (i0 == LEADING_BYTE_CONTROL_1)
488 return (Emchar) (*++str - 0x20);
490 if (LEADING_BYTE_PREFIX_P (i0))
495 charset = CHARSET_BY_LEADING_BYTE (i0);
496 if (XCHARSET_DIMENSION (charset) == 2)
499 return MAKE_CHAR (charset, i1, i2);
503 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
504 Do not call this directly. Use the macro valid_char_p() instead. */
508 non_ascii_valid_char_p (Emchar ch)
512 /* Must have only lowest 19 bits set */
516 f1 = CHAR_FIELD1 (ch);
517 f2 = CHAR_FIELD2 (ch);
518 f3 = CHAR_FIELD3 (ch);
524 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
525 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
526 f2 > MAX_CHAR_FIELD2_PRIVATE)
531 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
532 f2 <= MAX_CHAR_FIELD2_PRIVATE))
536 NOTE: This takes advantage of the fact that
537 FIELD2_TO_OFFICIAL_LEADING_BYTE and
538 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
540 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
541 if (EQ (charset, Qnil))
543 return (XCHARSET_CHARS (charset) == 96);
549 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
550 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
551 f1 > MAX_CHAR_FIELD1_PRIVATE)
553 if (f2 < 0x20 || f3 < 0x20)
556 #ifdef ENABLE_COMPOSITE_CHARS
557 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
559 if (UNBOUNDP (Fgethash (make_int (ch),
560 Vcomposite_char_char2string_hash_table,
565 #endif /* ENABLE_COMPOSITE_CHARS */
567 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
568 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
571 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
573 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
576 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
578 if (EQ (charset, Qnil))
580 return (XCHARSET_CHARS (charset) == 96);
586 /************************************************************************/
587 /* Basic string functions */
588 /************************************************************************/
590 /* Copy the character pointed to by SRC into DST. Do not call this
591 directly. Use the macro charptr_copy_char() instead.
592 Return the number of bytes copied. */
595 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
597 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
599 for (i = bytes; i; i--, dst++, src++)
605 /************************************************************************/
606 /* streams of Emchars */
607 /************************************************************************/
609 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
610 The functions below are not meant to be called directly; use
611 the macros in insdel.h. */
614 Lstream_get_emchar_1 (Lstream *stream, int ch)
616 Bufbyte str[MAX_EMCHAR_LEN];
617 Bufbyte *strptr = str;
620 str[0] = (Bufbyte) ch;
622 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
624 int c = Lstream_getc (stream);
625 bufpos_checking_assert (c >= 0);
626 *++strptr = (Bufbyte) c;
628 return charptr_emchar (str);
632 Lstream_fput_emchar (Lstream *stream, Emchar ch)
634 Bufbyte str[MAX_EMCHAR_LEN];
635 Bytecount len = set_charptr_emchar (str, ch);
636 return Lstream_write (stream, str, len);
640 Lstream_funget_emchar (Lstream *stream, Emchar ch)
642 Bufbyte str[MAX_EMCHAR_LEN];
643 Bytecount len = set_charptr_emchar (str, ch);
644 Lstream_unread (stream, str, len);
648 /************************************************************************/
650 /************************************************************************/
653 mark_charset (Lisp_Object obj)
655 Lisp_Charset *cs = XCHARSET (obj);
657 mark_object (cs->short_name);
658 mark_object (cs->long_name);
659 mark_object (cs->doc_string);
660 mark_object (cs->registry);
661 mark_object (cs->ccl_program);
663 mark_object (cs->decoding_table);
664 mark_object (cs->mother);
670 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
672 Lisp_Charset *cs = XCHARSET (obj);
676 error ("printing unreadable object #<charset %s 0x%x>",
677 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
680 write_c_string ("#<charset ", printcharfun);
681 print_internal (CHARSET_NAME (cs), printcharfun, 0);
682 write_c_string (" ", printcharfun);
683 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
684 write_c_string (" ", printcharfun);
685 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
686 write_c_string (" ", printcharfun);
687 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
688 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
690 CHARSET_DIMENSION (cs),
691 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
692 CHARSET_COLUMNS (cs),
693 CHARSET_GRAPHIC (cs),
695 write_c_string (buf, printcharfun);
696 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
697 sprintf (buf, " 0x%x>", cs->header.uid);
698 write_c_string (buf, printcharfun);
701 static const struct lrecord_description charset_description[] = {
702 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
703 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
704 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
705 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
706 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
707 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
708 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
710 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
711 { XD_LISP_OBJECT, offsetof (Lisp_Charset, mother) },
716 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
717 mark_charset, print_charset, 0, 0, 0,
721 /* Make a new charset. */
722 /* #### SJT Should generic properties be allowed? */
724 make_charset (Charset_ID id, Lisp_Object name,
725 unsigned short chars, unsigned char dimension,
726 unsigned char columns, unsigned char graphic,
727 Bufbyte final, unsigned char direction, Lisp_Object short_name,
728 Lisp_Object long_name, Lisp_Object doc,
730 Lisp_Object decoding_table,
731 Emchar min_code, Emchar max_code,
732 Emchar code_offset, unsigned char byte_offset,
733 Lisp_Object mother, unsigned char conversion)
736 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
740 XSETCHARSET (obj, cs);
742 CHARSET_ID (cs) = id;
743 CHARSET_NAME (cs) = name;
744 CHARSET_SHORT_NAME (cs) = short_name;
745 CHARSET_LONG_NAME (cs) = long_name;
746 CHARSET_CHARS (cs) = chars;
747 CHARSET_DIMENSION (cs) = dimension;
748 CHARSET_DIRECTION (cs) = direction;
749 CHARSET_COLUMNS (cs) = columns;
750 CHARSET_GRAPHIC (cs) = graphic;
751 CHARSET_FINAL (cs) = final;
752 CHARSET_DOC_STRING (cs) = doc;
753 CHARSET_REGISTRY (cs) = reg;
754 CHARSET_CCL_PROGRAM (cs) = Qnil;
755 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
757 CHARSET_DECODING_TABLE(cs) = Qunbound;
758 CHARSET_MIN_CODE (cs) = min_code;
759 CHARSET_MAX_CODE (cs) = max_code;
760 CHARSET_CODE_OFFSET (cs) = code_offset;
761 CHARSET_BYTE_OFFSET (cs) = byte_offset;
762 CHARSET_MOTHER (cs) = mother;
763 CHARSET_CONVERSION (cs) = conversion;
767 if (id == LEADING_BYTE_ASCII)
768 CHARSET_REP_BYTES (cs) = 1;
770 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
772 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
777 /* some charsets do not have final characters. This includes
778 ASCII, Control-1, Composite, and the two faux private
780 unsigned char iso2022_type
781 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
783 if (code_offset == 0)
785 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
786 chlook->charset_by_attributes[iso2022_type][final] = obj;
790 (chlook->charset_by_attributes[iso2022_type][final][direction]));
791 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
795 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
796 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
798 /* Some charsets are "faux" and don't have names or really exist at
799 all except in the leading-byte table. */
801 Fputhash (name, obj, Vcharset_hash_table);
806 get_unallocated_leading_byte (int dimension)
811 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
814 lb = chlook->next_allocated_leading_byte++;
818 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
821 lb = chlook->next_allocated_1_byte_leading_byte++;
825 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
828 lb = chlook->next_allocated_2_byte_leading_byte++;
834 ("No more character sets free for this dimension",
835 make_int (dimension));
841 /* Number of Big5 characters which have the same code in 1st byte. */
843 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
846 decode_defined_char (Lisp_Object ccs, int code_point)
848 int dim = XCHARSET_DIMENSION (ccs);
849 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
857 = get_ccs_octet_table (decoding_table, ccs,
858 (code_point >> (dim * 8)) & 255);
860 if (CHARP (decoding_table))
861 return XCHAR (decoding_table);
864 else if ( CHARSETP (mother = XCHARSET_MOTHER (ccs)) )
866 if ( XCHARSET_CONVERSION (ccs) == CONVERSION_IDENTICAL )
868 if ( EQ (mother, Vcharset_ucs) )
869 return DECODE_CHAR (mother, code_point);
871 return decode_defined_char (mother, code_point);
878 decode_builtin_char (Lisp_Object charset, int code_point)
880 Lisp_Object mother = XCHARSET_MOTHER (charset);
883 if ( XCHARSET_MAX_CODE (charset) > 0 )
885 if ( CHARSETP (mother) )
887 int code = code_point;
889 if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
891 int row = code_point >> 8;
892 int cell = code_point & 255;
896 else if (row < 16 + 32 + 30)
897 code = (row - (16 + 32)) * 94 + cell - 33;
898 else if (row < 18 + 32 + 30)
900 else if (row < 18 + 32 + 60)
901 code = (row - (18 + 32)) * 94 + cell - 33;
903 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
905 int plane = code_point >> 16;
906 int row = (code_point >> 8) & 255;
907 int cell = code_point & 255;
911 else if (row < 16 + 32 + 30)
913 = (plane - 33) * 94 * 60
914 + (row - (16 + 32)) * 94
916 else if (row < 18 + 32 + 30)
918 else if (row < 18 + 32 + 60)
920 = (plane - 33) * 94 * 60
921 + (row - (18 + 32)) * 94
925 decode_builtin_char (mother, code + XCHARSET_CODE_OFFSET(charset));
930 = (XCHARSET_DIMENSION (charset) == 1
932 code_point - XCHARSET_BYTE_OFFSET (charset)
934 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
935 * XCHARSET_CHARS (charset)
936 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
937 + XCHARSET_CODE_OFFSET (charset);
938 if ((cid < XCHARSET_MIN_CODE (charset))
939 || (XCHARSET_MAX_CODE (charset) < cid))
944 else if ((final = XCHARSET_FINAL (charset)) >= '0')
946 if (XCHARSET_DIMENSION (charset) == 1)
948 switch (XCHARSET_CHARS (charset))
952 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
955 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
963 switch (XCHARSET_CHARS (charset))
966 return MIN_CHAR_94x94
967 + (final - '0') * 94 * 94
968 + (((code_point >> 8) & 0x7F) - 33) * 94
969 + ((code_point & 0x7F) - 33);
971 return MIN_CHAR_96x96
972 + (final - '0') * 96 * 96
973 + (((code_point >> 8) & 0x7F) - 32) * 96
974 + ((code_point & 0x7F) - 32);
986 charset_code_point (Lisp_Object charset, Emchar ch, int defined_only)
988 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (charset);
991 if ( CHAR_TABLEP (encoding_table)
992 && INTP (ret = get_char_id_table (XCHAR_TABLE(encoding_table),
997 Lisp_Object mother = XCHARSET_MOTHER (charset);
998 int min = XCHARSET_MIN_CODE (charset);
999 int max = XCHARSET_MAX_CODE (charset);
1002 if ( CHARSETP (mother) )
1004 if (XCHARSET_FINAL (charset) >= '0')
1005 code = charset_code_point (mother, ch, 1);
1007 code = charset_code_point (mother, ch, defined_only);
1009 else if (defined_only)
1011 else if ( ((max == 0) && CHARSETP (mother)
1012 && (XCHARSET_FINAL (charset) == 0))
1013 || ((min <= ch) && (ch <= max)) )
1015 if ( ((max == 0) && CHARSETP (mother) && (code >= 0))
1016 || ((min <= code) && (code <= max)) )
1018 int d = code - XCHARSET_CODE_OFFSET (charset);
1020 if ( XCHARSET_CONVERSION (charset) == CONVERSION_IDENTICAL )
1022 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94 )
1024 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96 )
1026 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x60 )
1029 int cell = d % 94 + 33;
1035 return (row << 8) | cell;
1037 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94 )
1038 return ((d / 94 + 33) << 8) | (d % 94 + 33);
1039 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96 )
1040 return ((d / 96 + 32) << 8) | (d % 96 + 32);
1041 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x60 )
1043 int plane = d / (94 * 60) + 33;
1044 int row = (d % (94 * 60)) / 94;
1045 int cell = d % 94 + 33;
1051 return (plane << 16) | (row << 8) | cell;
1053 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94 )
1055 ( (d / (94 * 94) + 33) << 16)
1056 | ((d / 94 % 94 + 33) << 8)
1058 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96 )
1060 ( (d / (96 * 96) + 32) << 16)
1061 | ((d / 96 % 96 + 32) << 8)
1063 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_94x94x94x94 )
1065 ( (d / (94 * 94 * 94) + 33) << 24)
1066 | ((d / (94 * 94) % 94 + 33) << 16)
1067 | ((d / 94 % 94 + 33) << 8)
1069 else if ( XCHARSET_CONVERSION (charset) == CONVERSION_96x96x96x96 )
1071 ( (d / (96 * 96 * 96) + 32) << 24)
1072 | ((d / (96 * 96) % 96 + 32) << 16)
1073 | ((d / 96 % 96 + 32) << 8)
1077 printf ("Unknown CCS-conversion %d is specified!",
1078 XCHARSET_CONVERSION (charset));
1082 else if (defined_only)
1084 else if ( ( XCHARSET_FINAL (charset) >= '0' ) &&
1085 ( XCHARSET_MIN_CODE (charset) == 0 )
1087 (XCHARSET_CODE_OFFSET (charset) == 0) ||
1088 (XCHARSET_CODE_OFFSET (charset)
1089 == XCHARSET_MIN_CODE (charset))
1094 if (XCHARSET_DIMENSION (charset) == 1)
1096 if (XCHARSET_CHARS (charset) == 94)
1098 if (((d = ch - (MIN_CHAR_94
1099 + (XCHARSET_FINAL (charset) - '0') * 94))
1104 else if (XCHARSET_CHARS (charset) == 96)
1106 if (((d = ch - (MIN_CHAR_96
1107 + (XCHARSET_FINAL (charset) - '0') * 96))
1115 else if (XCHARSET_DIMENSION (charset) == 2)
1117 if (XCHARSET_CHARS (charset) == 94)
1119 if (((d = ch - (MIN_CHAR_94x94
1121 (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1124 return (((d / 94) + 33) << 8) | (d % 94 + 33);
1126 else if (XCHARSET_CHARS (charset) == 96)
1128 if (((d = ch - (MIN_CHAR_96x96
1130 (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1133 return (((d / 96) + 32) << 8) | (d % 96 + 32);
1144 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
1146 if (c <= MAX_CHAR_BASIC_LATIN)
1148 *charset = Vcharset_ascii;
1153 *charset = Vcharset_control_1;
1158 *charset = Vcharset_latin_iso8859_1;
1162 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1164 *charset = Vcharset_hebrew_iso8859_8;
1165 return c - MIN_CHAR_HEBREW + 0x20;
1168 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1170 *charset = Vcharset_thai_tis620;
1171 return c - MIN_CHAR_THAI + 0x20;
1174 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1175 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1177 return list2 (Vcharset_katakana_jisx0201,
1178 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1181 else if (c <= MAX_CHAR_BMP)
1183 *charset = Vcharset_ucs_bmp;
1186 else if (c <= MAX_CHAR_SMP)
1188 *charset = Vcharset_ucs_smp;
1189 return c - MIN_CHAR_SMP;
1191 else if (c <= MAX_CHAR_SIP)
1193 *charset = Vcharset_ucs_sip;
1194 return c - MIN_CHAR_SIP;
1196 else if (c < MIN_CHAR_94)
1198 *charset = Vcharset_ucs;
1201 else if (c <= MAX_CHAR_94)
1203 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
1204 ((c - MIN_CHAR_94) / 94) + '0',
1205 CHARSET_LEFT_TO_RIGHT);
1206 if (!NILP (*charset))
1207 return ((c - MIN_CHAR_94) % 94) + 33;
1210 *charset = Vcharset_ucs;
1214 else if (c <= MAX_CHAR_96)
1216 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
1217 ((c - MIN_CHAR_96) / 96) + '0',
1218 CHARSET_LEFT_TO_RIGHT);
1219 if (!NILP (*charset))
1220 return ((c - MIN_CHAR_96) % 96) + 32;
1223 *charset = Vcharset_ucs;
1227 else if (c <= MAX_CHAR_94x94)
1230 = CHARSET_BY_ATTRIBUTES (94, 2,
1231 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1232 CHARSET_LEFT_TO_RIGHT);
1233 if (!NILP (*charset))
1234 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
1235 | (((c - MIN_CHAR_94x94) % 94) + 33);
1238 *charset = Vcharset_ucs;
1242 else if (c <= MAX_CHAR_96x96)
1245 = CHARSET_BY_ATTRIBUTES (96, 2,
1246 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1247 CHARSET_LEFT_TO_RIGHT);
1248 if (!NILP (*charset))
1249 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
1250 | (((c - MIN_CHAR_96x96) % 96) + 32);
1253 *charset = Vcharset_ucs;
1259 *charset = Vcharset_ucs;
1264 Lisp_Object Vdefault_coded_charset_priority_list;
1268 /************************************************************************/
1269 /* Basic charset Lisp functions */
1270 /************************************************************************/
1272 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1273 Return non-nil if OBJECT is a charset.
1277 return CHARSETP (object) ? Qt : Qnil;
1280 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1281 Retrieve the charset of the given name.
1282 If CHARSET-OR-NAME is a charset object, it is simply returned.
1283 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1284 nil is returned. Otherwise the associated charset object is returned.
1288 if (CHARSETP (charset_or_name))
1289 return charset_or_name;
1291 CHECK_SYMBOL (charset_or_name);
1292 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1295 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1296 Retrieve the charset of the given name.
1297 Same as `find-charset' except an error is signalled if there is no such
1298 charset instead of returning nil.
1302 Lisp_Object charset = Ffind_charset (name);
1305 signal_simple_error ("No such charset", name);
1309 /* We store the charsets in hash tables with the names as the key and the
1310 actual charset object as the value. Occasionally we need to use them
1311 in a list format. These routines provide us with that. */
1312 struct charset_list_closure
1314 Lisp_Object *charset_list;
1318 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1319 void *charset_list_closure)
1321 /* This function can GC */
1322 struct charset_list_closure *chcl =
1323 (struct charset_list_closure*) charset_list_closure;
1324 Lisp_Object *charset_list = chcl->charset_list;
1326 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
1330 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1331 Return a list of the names of all defined charsets.
1335 Lisp_Object charset_list = Qnil;
1336 struct gcpro gcpro1;
1337 struct charset_list_closure charset_list_closure;
1339 GCPRO1 (charset_list);
1340 charset_list_closure.charset_list = &charset_list;
1341 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1342 &charset_list_closure);
1345 return charset_list;
1348 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1349 Return the name of charset CHARSET.
1353 return XCHARSET_NAME (Fget_charset (charset));
1356 /* #### SJT Should generic properties be allowed? */
1357 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1358 Define a new character set.
1359 This function is for use with Mule support.
1360 NAME is a symbol, the name by which the character set is normally referred.
1361 DOC-STRING is a string describing the character set.
1362 PROPS is a property list, describing the specific nature of the
1363 character set. Recognized properties are:
1365 'short-name Short version of the charset name (ex: Latin-1)
1366 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1367 'registry A regular expression matching the font registry field for
1369 'dimension Number of octets used to index a character in this charset.
1370 Either 1 or 2. Defaults to 1.
1371 If UTF-2000 feature is enabled, 3 or 4 are also available.
1372 'columns Number of columns used to display a character in this charset.
1373 Only used in TTY mode. (Under X, the actual width of a
1374 character can be derived from the font used to display the
1375 characters.) If unspecified, defaults to the dimension
1376 (this is almost always the correct value).
1377 'chars Number of characters in each dimension (94 or 96).
1378 Defaults to 94. Note that if the dimension is 2, the
1379 character set thus described is 94x94 or 96x96.
1380 If UTF-2000 feature is enabled, 128 or 256 are also available.
1381 'final Final byte of ISO 2022 escape sequence. Must be
1382 supplied. Each combination of (DIMENSION, CHARS) defines a
1383 separate namespace for final bytes. Note that ISO
1384 2022 restricts the final byte to the range
1385 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1386 dimension == 2. Note also that final bytes in the range
1387 0x30 - 0x3F are reserved for user-defined (not official)
1389 'graphic 0 (use left half of font on output) or 1 (use right half
1390 of font on output). Defaults to 0. For example, for
1391 a font whose registry is ISO8859-1, the left half
1392 (octets 0x20 - 0x7F) is the `ascii' character set, while
1393 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1394 character set. With 'graphic set to 0, the octets
1395 will have their high bit cleared; with it set to 1,
1396 the octets will have their high bit set.
1397 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1399 'ccl-program A compiled CCL program used to convert a character in
1400 this charset into an index into the font. This is in
1401 addition to the 'graphic property. The CCL program
1402 is passed the octets of the character, with the high
1403 bit cleared and set depending upon whether the value
1404 of the 'graphic property is 0 or 1.
1405 'mother [UTF-2000 only] Base coded-charset.
1406 'code-min [UTF-2000 only] Minimum code-point of a base coded-charset.
1407 'code-max [UTF-2000 only] Maximum code-point of a base coded-charset.
1408 'code-offset [UTF-2000 only] Offset for a code-point of a base
1410 'conversion [UTF-2000 only] Conversion for a code-point of a base
1411 coded-charset (94x60 or 94x94x60).
1413 (name, doc_string, props))
1415 int id = 0, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1416 int direction = CHARSET_LEFT_TO_RIGHT;
1417 Lisp_Object registry = Qnil;
1418 Lisp_Object charset;
1419 Lisp_Object ccl_program = Qnil;
1420 Lisp_Object short_name = Qnil, long_name = Qnil;
1421 Lisp_Object mother = Qnil;
1422 int min_code = 0, max_code = 0, code_offset = 0;
1423 int byte_offset = -1;
1426 CHECK_SYMBOL (name);
1427 if (!NILP (doc_string))
1428 CHECK_STRING (doc_string);
1430 charset = Ffind_charset (name);
1431 if (!NILP (charset))
1432 signal_simple_error ("Cannot redefine existing charset", name);
1435 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
1437 if (EQ (keyword, Qshort_name))
1439 CHECK_STRING (value);
1443 if (EQ (keyword, Qlong_name))
1445 CHECK_STRING (value);
1449 else if (EQ (keyword, Qiso_ir))
1453 id = - XINT (value);
1457 else if (EQ (keyword, Qdimension))
1460 dimension = XINT (value);
1461 if (dimension < 1 ||
1468 signal_simple_error ("Invalid value for 'dimension", value);
1471 else if (EQ (keyword, Qchars))
1474 chars = XINT (value);
1475 if (chars != 94 && chars != 96
1477 && chars != 128 && chars != 256
1480 signal_simple_error ("Invalid value for 'chars", value);
1483 else if (EQ (keyword, Qcolumns))
1486 columns = XINT (value);
1487 if (columns != 1 && columns != 2)
1488 signal_simple_error ("Invalid value for 'columns", value);
1491 else if (EQ (keyword, Qgraphic))
1494 graphic = XINT (value);
1502 signal_simple_error ("Invalid value for 'graphic", value);
1505 else if (EQ (keyword, Qregistry))
1507 CHECK_STRING (value);
1511 else if (EQ (keyword, Qdirection))
1513 if (EQ (value, Ql2r))
1514 direction = CHARSET_LEFT_TO_RIGHT;
1515 else if (EQ (value, Qr2l))
1516 direction = CHARSET_RIGHT_TO_LEFT;
1518 signal_simple_error ("Invalid value for 'direction", value);
1521 else if (EQ (keyword, Qfinal))
1523 CHECK_CHAR_COERCE_INT (value);
1524 final = XCHAR (value);
1525 if (final < '0' || final > '~')
1526 signal_simple_error ("Invalid value for 'final", value);
1530 else if (EQ (keyword, Qmother))
1532 mother = Fget_charset (value);
1535 else if (EQ (keyword, Qmin_code))
1538 min_code = XUINT (value);
1541 else if (EQ (keyword, Qmax_code))
1544 max_code = XUINT (value);
1547 else if (EQ (keyword, Qcode_offset))
1550 code_offset = XUINT (value);
1553 else if (EQ (keyword, Qconversion))
1555 if (EQ (value, Q94x60))
1556 conversion = CONVERSION_94x60;
1557 else if (EQ (value, Q94x94x60))
1558 conversion = CONVERSION_94x94x60;
1560 signal_simple_error ("Unrecognized conversion", value);
1564 else if (EQ (keyword, Qccl_program))
1566 struct ccl_program test_ccl;
1568 if (setup_ccl_program (&test_ccl, value) < 0)
1569 signal_simple_error ("Invalid value for 'ccl-program", value);
1570 ccl_program = value;
1574 signal_simple_error ("Unrecognized property", keyword);
1580 error ("'final must be specified");
1582 if (dimension == 2 && final > 0x5F)
1584 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1587 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1588 CHARSET_LEFT_TO_RIGHT)) ||
1589 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
1590 CHARSET_RIGHT_TO_LEFT)))
1592 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1595 id = get_unallocated_leading_byte (dimension);
1597 if (NILP (doc_string))
1598 doc_string = build_string ("");
1600 if (NILP (registry))
1601 registry = build_string ("");
1603 if (NILP (short_name))
1604 XSETSTRING (short_name, XSYMBOL (name)->name);
1606 if (NILP (long_name))
1607 long_name = doc_string;
1610 columns = dimension;
1612 if (byte_offset < 0)
1616 else if (chars == 96)
1622 charset = make_charset (id, name, chars, dimension, columns, graphic,
1623 final, direction, short_name, long_name,
1624 doc_string, registry,
1625 Qnil, min_code, max_code, code_offset, byte_offset,
1626 mother, conversion);
1627 if (!NILP (ccl_program))
1628 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1632 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1634 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1635 NEW-NAME is the name of the new charset. Return the new charset.
1637 (charset, new_name))
1639 Lisp_Object new_charset = Qnil;
1640 int id, chars, dimension, columns, graphic, final;
1642 Lisp_Object registry, doc_string, short_name, long_name;
1645 charset = Fget_charset (charset);
1646 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1647 signal_simple_error ("Charset already has reverse-direction charset",
1650 CHECK_SYMBOL (new_name);
1651 if (!NILP (Ffind_charset (new_name)))
1652 signal_simple_error ("Cannot redefine existing charset", new_name);
1654 cs = XCHARSET (charset);
1656 chars = CHARSET_CHARS (cs);
1657 dimension = CHARSET_DIMENSION (cs);
1658 columns = CHARSET_COLUMNS (cs);
1659 id = get_unallocated_leading_byte (dimension);
1661 graphic = CHARSET_GRAPHIC (cs);
1662 final = CHARSET_FINAL (cs);
1663 direction = CHARSET_RIGHT_TO_LEFT;
1664 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1665 direction = CHARSET_LEFT_TO_RIGHT;
1666 doc_string = CHARSET_DOC_STRING (cs);
1667 short_name = CHARSET_SHORT_NAME (cs);
1668 long_name = CHARSET_LONG_NAME (cs);
1669 registry = CHARSET_REGISTRY (cs);
1671 new_charset = make_charset (id, new_name, chars, dimension, columns,
1672 graphic, final, direction, short_name, long_name,
1673 doc_string, registry,
1675 CHARSET_DECODING_TABLE(cs),
1676 CHARSET_MIN_CODE(cs),
1677 CHARSET_MAX_CODE(cs),
1678 CHARSET_CODE_OFFSET(cs),
1679 CHARSET_BYTE_OFFSET(cs),
1681 CHARSET_CONVERSION (cs)
1683 Qnil, 0, 0, 0, 0, Qnil, 0
1687 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1688 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1693 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1694 Define symbol ALIAS as an alias for CHARSET.
1698 CHECK_SYMBOL (alias);
1699 charset = Fget_charset (charset);
1700 return Fputhash (alias, charset, Vcharset_hash_table);
1703 /* #### Reverse direction charsets not yet implemented. */
1705 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1707 Return the reverse-direction charset parallel to CHARSET, if any.
1708 This is the charset with the same properties (in particular, the same
1709 dimension, number of characters per dimension, and final byte) as
1710 CHARSET but whose characters are displayed in the opposite direction.
1714 charset = Fget_charset (charset);
1715 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1719 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1720 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1721 If DIRECTION is omitted, both directions will be checked (left-to-right
1722 will be returned if character sets exist for both directions).
1724 (dimension, chars, final, direction))
1726 int dm, ch, fi, di = -1;
1727 Lisp_Object obj = Qnil;
1729 CHECK_INT (dimension);
1730 dm = XINT (dimension);
1731 if (dm < 1 || dm > 2)
1732 signal_simple_error ("Invalid value for DIMENSION", dimension);
1736 if (ch != 94 && ch != 96)
1737 signal_simple_error ("Invalid value for CHARS", chars);
1739 CHECK_CHAR_COERCE_INT (final);
1741 if (fi < '0' || fi > '~')
1742 signal_simple_error ("Invalid value for FINAL", final);
1744 if (EQ (direction, Ql2r))
1745 di = CHARSET_LEFT_TO_RIGHT;
1746 else if (EQ (direction, Qr2l))
1747 di = CHARSET_RIGHT_TO_LEFT;
1748 else if (!NILP (direction))
1749 signal_simple_error ("Invalid value for DIRECTION", direction);
1751 if (dm == 2 && fi > 0x5F)
1753 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1757 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
1759 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
1762 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
1765 return XCHARSET_NAME (obj);
1769 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1770 Return short name of CHARSET.
1774 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1777 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1778 Return long name of CHARSET.
1782 return XCHARSET_LONG_NAME (Fget_charset (charset));
1785 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1786 Return description of CHARSET.
1790 return XCHARSET_DOC_STRING (Fget_charset (charset));
1793 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1794 Return dimension of CHARSET.
1798 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1801 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1802 Return property PROP of CHARSET, a charset object or symbol naming a charset.
1803 Recognized properties are those listed in `make-charset', as well as
1804 'name and 'doc-string.
1810 charset = Fget_charset (charset);
1811 cs = XCHARSET (charset);
1813 CHECK_SYMBOL (prop);
1814 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1815 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1816 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1817 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1818 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1819 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1820 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1821 if (EQ (prop, Qfinal)) return CHARSET_FINAL (cs) == 0 ?
1822 Qnil : make_char (CHARSET_FINAL (cs));
1823 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1824 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1825 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1826 if (EQ (prop, Qdirection))
1827 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1828 if (EQ (prop, Qreverse_direction_charset))
1830 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1831 /* #### Is this translation OK? If so, error checking sufficient? */
1832 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
1835 if (EQ (prop, Qmother))
1836 return CHARSET_MOTHER (cs);
1837 if (EQ (prop, Qmin_code))
1838 return make_int (CHARSET_MIN_CODE (cs));
1839 if (EQ (prop, Qmax_code))
1840 return make_int (CHARSET_MAX_CODE (cs));
1842 signal_simple_error ("Unrecognized charset property name", prop);
1843 return Qnil; /* not reached */
1846 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1847 Return charset identification number of CHARSET.
1851 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1854 /* #### We need to figure out which properties we really want to
1857 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1858 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1860 (charset, ccl_program))
1862 struct ccl_program test_ccl;
1864 charset = Fget_charset (charset);
1865 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
1866 signal_simple_error ("Invalid ccl-program", ccl_program);
1867 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1872 invalidate_charset_font_caches (Lisp_Object charset)
1874 /* Invalidate font cache entries for charset on all devices. */
1875 Lisp_Object devcons, concons, hash_table;
1876 DEVICE_LOOP_NO_BREAK (devcons, concons)
1878 struct device *d = XDEVICE (XCAR (devcons));
1879 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1880 if (!UNBOUNDP (hash_table))
1881 Fclrhash (hash_table);
1885 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1886 Set the 'registry property of CHARSET to REGISTRY.
1888 (charset, registry))
1890 charset = Fget_charset (charset);
1891 CHECK_STRING (registry);
1892 XCHARSET_REGISTRY (charset) = registry;
1893 invalidate_charset_font_caches (charset);
1894 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1899 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1900 Return mapping-table of CHARSET.
1904 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1907 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1908 Set mapping-table of CHARSET to TABLE.
1912 struct Lisp_Charset *cs;
1916 charset = Fget_charset (charset);
1917 cs = XCHARSET (charset);
1921 CHARSET_DECODING_TABLE(cs) = Qnil;
1924 else if (VECTORP (table))
1926 int ccs_len = CHARSET_BYTE_SIZE (cs);
1927 int ret = decoding_table_check_elements (table,
1928 CHARSET_DIMENSION (cs),
1933 signal_simple_error ("Too big table", table);
1935 signal_simple_error ("Invalid element is found", table);
1937 signal_simple_error ("Something wrong", table);
1939 CHARSET_DECODING_TABLE(cs) = Qnil;
1942 signal_error (Qwrong_type_argument,
1943 list2 (build_translated_string ("vector-or-nil-p"),
1946 byte_offset = CHARSET_BYTE_OFFSET (cs);
1947 switch (CHARSET_DIMENSION (cs))
1950 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1952 Lisp_Object c = XVECTOR_DATA(table)[i];
1955 Fput_char_attribute (c, XCHARSET_NAME (charset),
1956 make_int (i + byte_offset));
1960 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1962 Lisp_Object v = XVECTOR_DATA(table)[i];
1968 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1970 Lisp_Object c = XVECTOR_DATA(v)[j];
1974 (c, XCHARSET_NAME (charset),
1975 make_int ( ( (i + byte_offset) << 8 )
1981 Fput_char_attribute (v, XCHARSET_NAME (charset),
1982 make_int (i + byte_offset));
1991 /************************************************************************/
1992 /* Lisp primitives for working with characters */
1993 /************************************************************************/
1996 DEFUN ("decode-char", Fdecode_char, 2, 3, 0, /*
1997 Make a character from CHARSET and code-point CODE.
1998 If DEFINED_ONLY is non-nil, builtin character is not returned.
1999 If corresponding character is not found, nil is returned.
2001 (charset, code, defined_only))
2005 charset = Fget_charset (charset);
2008 if (XCHARSET_GRAPHIC (charset) == 1)
2010 if (NILP (defined_only))
2011 c = DECODE_CHAR (charset, c);
2013 c = decode_defined_char (charset, c);
2014 return c >= 0 ? make_char (c) : Qnil;
2017 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
2018 Make a builtin character from CHARSET and code-point CODE.
2024 charset = Fget_charset (charset);
2026 if (EQ (charset, Vcharset_latin_viscii))
2028 Lisp_Object chr = Fdecode_char (charset, code, Qnil);
2034 (ret = Fget_char_attribute (chr,
2035 Vcharset_latin_viscii_lower,
2038 charset = Vcharset_latin_viscii_lower;
2042 (ret = Fget_char_attribute (chr,
2043 Vcharset_latin_viscii_upper,
2046 charset = Vcharset_latin_viscii_upper;
2053 if (XCHARSET_GRAPHIC (charset) == 1)
2056 c = decode_builtin_char (charset, c);
2057 return c >= 0 ? make_char (c) : Fdecode_char (charset, code, Qnil);
2061 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2062 Make a character from CHARSET and octets ARG1 and ARG2.
2063 ARG2 is required only for characters from two-dimensional charsets.
2064 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2065 character s with caron.
2067 (charset, arg1, arg2))
2071 int lowlim, highlim;
2073 charset = Fget_charset (charset);
2074 cs = XCHARSET (charset);
2076 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2077 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2079 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2081 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2082 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2085 /* It is useful (and safe, according to Olivier Galibert) to strip
2086 the 8th bit off ARG1 and ARG2 because it allows programmers to
2087 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2088 Latin 2 code of the character. */
2096 if (a1 < lowlim || a1 > highlim)
2097 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2099 if (CHARSET_DIMENSION (cs) == 1)
2103 ("Charset is of dimension one; second octet must be nil", arg2);
2104 return make_char (MAKE_CHAR (charset, a1, 0));
2113 a2 = XINT (arg2) & 0x7f;
2115 if (a2 < lowlim || a2 > highlim)
2116 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2118 return make_char (MAKE_CHAR (charset, a1, a2));
2121 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2122 Return the character set of CHARACTER.
2126 CHECK_CHAR_COERCE_INT (character);
2128 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
2131 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
2132 Return the octet numbered N (should be 0 or 1) of CHARACTER.
2133 N defaults to 0 if omitted.
2137 Lisp_Object charset;
2140 CHECK_CHAR_COERCE_INT (character);
2142 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
2144 if (NILP (n) || EQ (n, Qzero))
2145 return make_int (octet0);
2146 else if (EQ (n, make_int (1)))
2147 return make_int (octet1);
2149 signal_simple_error ("Octet number must be 0 or 1", n);
2153 DEFUN ("encode-char", Fencode_char, 2, 3, 0, /*
2154 Return code-point of CHARACTER in specified CHARSET.
2156 (character, charset, defined_only))
2160 CHECK_CHAR_COERCE_INT (character);
2161 charset = Fget_charset (charset);
2162 code_point = charset_code_point (charset, XCHAR (character),
2163 !NILP (defined_only));
2164 if (code_point >= 0)
2165 return make_int (code_point);
2171 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2172 Return list of charset and one or two position-codes of CHARACTER.
2176 /* This function can GC */
2177 struct gcpro gcpro1, gcpro2;
2178 Lisp_Object charset = Qnil;
2179 Lisp_Object rc = Qnil;
2187 GCPRO2 (charset, rc);
2188 CHECK_CHAR_COERCE_INT (character);
2191 code_point = ENCODE_CHAR (XCHAR (character), charset);
2192 dimension = XCHARSET_DIMENSION (charset);
2193 while (dimension > 0)
2195 rc = Fcons (make_int (code_point & 255), rc);
2199 rc = Fcons (XCHARSET_NAME (charset), rc);
2201 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2203 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2205 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2209 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2218 #ifdef ENABLE_COMPOSITE_CHARS
2219 /************************************************************************/
2220 /* composite character functions */
2221 /************************************************************************/
2224 lookup_composite_char (Bufbyte *str, int len)
2226 Lisp_Object lispstr = make_string (str, len);
2227 Lisp_Object ch = Fgethash (lispstr,
2228 Vcomposite_char_string2char_hash_table,
2234 if (composite_char_row_next >= 128)
2235 signal_simple_error ("No more composite chars available", lispstr);
2236 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2237 composite_char_col_next);
2238 Fputhash (make_char (emch), lispstr,
2239 Vcomposite_char_char2string_hash_table);
2240 Fputhash (lispstr, make_char (emch),
2241 Vcomposite_char_string2char_hash_table);
2242 composite_char_col_next++;
2243 if (composite_char_col_next >= 128)
2245 composite_char_col_next = 32;
2246 composite_char_row_next++;
2255 composite_char_string (Emchar ch)
2257 Lisp_Object str = Fgethash (make_char (ch),
2258 Vcomposite_char_char2string_hash_table,
2260 assert (!UNBOUNDP (str));
2264 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2265 Convert a string into a single composite character.
2266 The character is the result of overstriking all the characters in
2271 CHECK_STRING (string);
2272 return make_char (lookup_composite_char (XSTRING_DATA (string),
2273 XSTRING_LENGTH (string)));
2276 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2277 Return a string of the characters comprising a composite character.
2285 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2286 signal_simple_error ("Must be composite char", ch);
2287 return composite_char_string (emch);
2289 #endif /* ENABLE_COMPOSITE_CHARS */
2292 /************************************************************************/
2293 /* initialization */
2294 /************************************************************************/
2297 syms_of_mule_charset (void)
2299 INIT_LRECORD_IMPLEMENTATION (charset);
2301 DEFSUBR (Fcharsetp);
2302 DEFSUBR (Ffind_charset);
2303 DEFSUBR (Fget_charset);
2304 DEFSUBR (Fcharset_list);
2305 DEFSUBR (Fcharset_name);
2306 DEFSUBR (Fmake_charset);
2307 DEFSUBR (Fmake_reverse_direction_charset);
2308 /* DEFSUBR (Freverse_direction_charset); */
2309 DEFSUBR (Fdefine_charset_alias);
2310 DEFSUBR (Fcharset_from_attributes);
2311 DEFSUBR (Fcharset_short_name);
2312 DEFSUBR (Fcharset_long_name);
2313 DEFSUBR (Fcharset_description);
2314 DEFSUBR (Fcharset_dimension);
2315 DEFSUBR (Fcharset_property);
2316 DEFSUBR (Fcharset_id);
2317 DEFSUBR (Fset_charset_ccl_program);
2318 DEFSUBR (Fset_charset_registry);
2320 DEFSUBR (Fcharset_mapping_table);
2321 DEFSUBR (Fset_charset_mapping_table);
2325 DEFSUBR (Fdecode_char);
2326 DEFSUBR (Fdecode_builtin_char);
2327 DEFSUBR (Fencode_char);
2329 DEFSUBR (Fmake_char);
2330 DEFSUBR (Fchar_charset);
2331 DEFSUBR (Fchar_octet);
2332 DEFSUBR (Fsplit_char);
2334 #ifdef ENABLE_COMPOSITE_CHARS
2335 DEFSUBR (Fmake_composite_char);
2336 DEFSUBR (Fcomposite_char_string);
2339 defsymbol (&Qcharsetp, "charsetp");
2340 defsymbol (&Qregistry, "registry");
2341 defsymbol (&Qfinal, "final");
2342 defsymbol (&Qgraphic, "graphic");
2343 defsymbol (&Qdirection, "direction");
2344 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2345 defsymbol (&Qshort_name, "short-name");
2346 defsymbol (&Qlong_name, "long-name");
2347 defsymbol (&Qiso_ir, "iso-ir");
2349 defsymbol (&Qmother, "mother");
2350 defsymbol (&Qmin_code, "min-code");
2351 defsymbol (&Qmax_code, "max-code");
2352 defsymbol (&Qcode_offset, "code-offset");
2353 defsymbol (&Qconversion, "conversion");
2354 defsymbol (&Q94x60, "94x60");
2355 defsymbol (&Q94x94x60, "94x94x60");
2358 defsymbol (&Ql2r, "l2r");
2359 defsymbol (&Qr2l, "r2l");
2361 /* Charsets, compatible with FSF 20.3
2362 Naming convention is Script-Charset[-Edition] */
2363 defsymbol (&Qascii, "ascii");
2364 defsymbol (&Qcontrol_1, "control-1");
2365 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2366 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2367 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2368 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2369 defsymbol (&Qthai_tis620, "thai-tis620");
2370 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2371 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2372 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2373 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2374 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2375 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2376 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2377 defsymbol (&Qmap_jis_x0208_1978, "=jis-x0208-1978");
2378 defsymbol (&Qmap_gb2312, "=gb2312");
2379 defsymbol (&Qmap_gb12345, "=gb12345");
2380 defsymbol (&Qmap_jis_x0208_1983, "=jis-x0208-1983");
2381 defsymbol (&Qmap_ks_x1001, "=ks-x1001");
2382 defsymbol (&Qmap_jis_x0212, "=jis-x0212");
2383 defsymbol (&Qmap_cns11643_1, "=cns11643-1");
2384 defsymbol (&Qmap_cns11643_2, "=cns11643-2");
2386 defsymbol (&Qmap_ucs, "=ucs");
2387 defsymbol (&Qucs, "ucs");
2388 defsymbol (&Qucs_bmp, "ucs-bmp");
2389 defsymbol (&Qucs_smp, "ucs-smp");
2390 defsymbol (&Qucs_sip, "ucs-sip");
2391 defsymbol (&Qlatin_viscii, "latin-viscii");
2392 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
2393 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2394 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2395 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2396 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2397 defsymbol (&Qmap_jis_x0208, "=jis-x0208");
2398 defsymbol (&Qmap_jis_x0208_1990, "=jis-x0208-1990");
2399 defsymbol (&Qmap_big5, "=big5");
2400 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2402 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2403 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2405 defsymbol (&Qcomposite, "composite");
2409 vars_of_mule_charset (void)
2416 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
2417 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
2419 /* Table of charsets indexed by leading byte. */
2420 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
2421 chlook->charset_by_leading_byte[i] = Qnil;
2424 /* Table of charsets indexed by type/final-byte. */
2425 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2426 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2427 chlook->charset_by_attributes[i][j] = Qnil;
2429 /* Table of charsets indexed by type/final-byte/direction. */
2430 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
2431 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
2432 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
2433 chlook->charset_by_attributes[i][j][k] = Qnil;
2437 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2439 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2440 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2444 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2445 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2446 Leading-code of private TYPE9N charset of column-width 1.
2448 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2452 Vdefault_coded_charset_priority_list = Qnil;
2453 DEFVAR_LISP ("default-coded-charset-priority-list",
2454 &Vdefault_coded_charset_priority_list /*
2455 Default order of preferred coded-character-sets.
2461 complex_vars_of_mule_charset (void)
2463 staticpro (&Vcharset_hash_table);
2464 Vcharset_hash_table =
2465 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2467 /* Predefined character sets. We store them into variables for
2471 staticpro (&Vcharset_ucs);
2473 make_charset (LEADING_BYTE_UCS, Qmap_ucs, 256, 4,
2474 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2475 build_string ("UCS"),
2476 build_string ("UCS"),
2477 build_string ("ISO/IEC 10646"),
2479 Qnil, 0, 0x7FFFFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2480 staticpro (&Vcharset_ucs_bmp);
2482 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
2483 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2484 build_string ("BMP"),
2485 build_string ("UCS-BMP"),
2486 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2488 ("\\(ISO10646.*-[01]\\|UCS00-0\\|UNICODE[23]?-0\\)"),
2489 Qnil, 0, 0xFFFF, 0, 0, Qnil, CONVERSION_IDENTICAL);
2490 staticpro (&Vcharset_ucs_smp);
2492 make_charset (LEADING_BYTE_UCS_SMP, Qucs_smp, 256, 2,
2493 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2494 build_string ("SMP"),
2495 build_string ("UCS-SMP"),
2496 build_string ("ISO/IEC 10646 Group 0 Plane 1 (SMP)"),
2497 build_string ("UCS00-1"),
2498 Qnil, MIN_CHAR_SMP, MAX_CHAR_SMP,
2499 MIN_CHAR_SMP, 0, Qnil, CONVERSION_IDENTICAL);
2500 staticpro (&Vcharset_ucs_sip);
2502 make_charset (LEADING_BYTE_UCS_SIP, Qucs_sip, 256, 2,
2503 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2504 build_string ("SIP"),
2505 build_string ("UCS-SIP"),
2506 build_string ("ISO/IEC 10646 Group 0 Plane 2 (SIP)"),
2507 build_string ("\\(ISO10646.*-2\\|UCS00-2\\)"),
2508 Qnil, MIN_CHAR_SIP, MAX_CHAR_SIP,
2509 MIN_CHAR_SIP, 0, Qnil, CONVERSION_IDENTICAL);
2511 # define MIN_CHAR_THAI 0
2512 # define MAX_CHAR_THAI 0
2513 /* # define MIN_CHAR_HEBREW 0 */
2514 /* # define MAX_CHAR_HEBREW 0 */
2515 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2516 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2518 staticpro (&Vcharset_ascii);
2520 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
2521 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2522 build_string ("ASCII"),
2523 build_string ("ASCII)"),
2524 build_string ("ASCII (ISO646 IRV)"),
2525 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2526 Qnil, 0, 0x7F, 0, 0, Qnil, CONVERSION_IDENTICAL);
2527 staticpro (&Vcharset_control_1);
2528 Vcharset_control_1 =
2529 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
2530 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
2531 build_string ("C1"),
2532 build_string ("Control characters"),
2533 build_string ("Control characters 128-191"),
2535 Qnil, 0x80, 0x9F, 0x80, 0, Qnil, CONVERSION_IDENTICAL);
2536 staticpro (&Vcharset_latin_iso8859_1);
2537 Vcharset_latin_iso8859_1 =
2538 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
2539 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
2540 build_string ("Latin-1"),
2541 build_string ("ISO8859-1 (Latin-1)"),
2542 build_string ("ISO8859-1 (Latin-1)"),
2543 build_string ("iso8859-1"),
2544 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2545 staticpro (&Vcharset_latin_iso8859_2);
2546 Vcharset_latin_iso8859_2 =
2547 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
2548 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
2549 build_string ("Latin-2"),
2550 build_string ("ISO8859-2 (Latin-2)"),
2551 build_string ("ISO8859-2 (Latin-2)"),
2552 build_string ("iso8859-2"),
2553 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2554 staticpro (&Vcharset_latin_iso8859_3);
2555 Vcharset_latin_iso8859_3 =
2556 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
2557 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
2558 build_string ("Latin-3"),
2559 build_string ("ISO8859-3 (Latin-3)"),
2560 build_string ("ISO8859-3 (Latin-3)"),
2561 build_string ("iso8859-3"),
2562 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2563 staticpro (&Vcharset_latin_iso8859_4);
2564 Vcharset_latin_iso8859_4 =
2565 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
2566 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
2567 build_string ("Latin-4"),
2568 build_string ("ISO8859-4 (Latin-4)"),
2569 build_string ("ISO8859-4 (Latin-4)"),
2570 build_string ("iso8859-4"),
2571 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2572 staticpro (&Vcharset_thai_tis620);
2573 Vcharset_thai_tis620 =
2574 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
2575 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
2576 build_string ("TIS620"),
2577 build_string ("TIS620 (Thai)"),
2578 build_string ("TIS620.2529 (Thai)"),
2579 build_string ("tis620"),
2580 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2581 staticpro (&Vcharset_greek_iso8859_7);
2582 Vcharset_greek_iso8859_7 =
2583 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
2584 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
2585 build_string ("ISO8859-7"),
2586 build_string ("ISO8859-7 (Greek)"),
2587 build_string ("ISO8859-7 (Greek)"),
2588 build_string ("iso8859-7"),
2589 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2590 staticpro (&Vcharset_arabic_iso8859_6);
2591 Vcharset_arabic_iso8859_6 =
2592 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
2593 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
2594 build_string ("ISO8859-6"),
2595 build_string ("ISO8859-6 (Arabic)"),
2596 build_string ("ISO8859-6 (Arabic)"),
2597 build_string ("iso8859-6"),
2598 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2599 staticpro (&Vcharset_hebrew_iso8859_8);
2600 Vcharset_hebrew_iso8859_8 =
2601 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
2602 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
2603 build_string ("ISO8859-8"),
2604 build_string ("ISO8859-8 (Hebrew)"),
2605 build_string ("ISO8859-8 (Hebrew)"),
2606 build_string ("iso8859-8"),
2608 0 /* MIN_CHAR_HEBREW */,
2609 0 /* MAX_CHAR_HEBREW */, 0, 32,
2610 Qnil, CONVERSION_IDENTICAL);
2611 staticpro (&Vcharset_katakana_jisx0201);
2612 Vcharset_katakana_jisx0201 =
2613 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
2614 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
2615 build_string ("JISX0201 Kana"),
2616 build_string ("JISX0201.1976 (Japanese Kana)"),
2617 build_string ("JISX0201.1976 Japanese Kana"),
2618 build_string ("jisx0201\\.1976"),
2619 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2620 staticpro (&Vcharset_latin_jisx0201);
2621 Vcharset_latin_jisx0201 =
2622 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
2623 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
2624 build_string ("JISX0201 Roman"),
2625 build_string ("JISX0201.1976 (Japanese Roman)"),
2626 build_string ("JISX0201.1976 Japanese Roman"),
2627 build_string ("jisx0201\\.1976"),
2628 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2629 staticpro (&Vcharset_cyrillic_iso8859_5);
2630 Vcharset_cyrillic_iso8859_5 =
2631 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
2632 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
2633 build_string ("ISO8859-5"),
2634 build_string ("ISO8859-5 (Cyrillic)"),
2635 build_string ("ISO8859-5 (Cyrillic)"),
2636 build_string ("iso8859-5"),
2637 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2638 staticpro (&Vcharset_latin_iso8859_9);
2639 Vcharset_latin_iso8859_9 =
2640 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
2641 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
2642 build_string ("Latin-5"),
2643 build_string ("ISO8859-9 (Latin-5)"),
2644 build_string ("ISO8859-9 (Latin-5)"),
2645 build_string ("iso8859-9"),
2646 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2648 staticpro (&Vcharset_jis_x0208);
2649 Vcharset_jis_x0208 =
2650 make_charset (LEADING_BYTE_JIS_X0208,
2651 Qmap_jis_x0208, 94, 2,
2652 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2653 build_string ("JIS X0208"),
2654 build_string ("JIS X0208 Common"),
2655 build_string ("JIS X0208 Common part"),
2656 build_string ("jisx0208\\.1990"),
2658 MIN_CHAR_JIS_X0208_1990,
2659 MAX_CHAR_JIS_X0208_1990, MIN_CHAR_JIS_X0208_1990, 33,
2660 Qnil, CONVERSION_94x94);
2662 staticpro (&Vcharset_japanese_jisx0208_1978);
2663 Vcharset_japanese_jisx0208_1978 =
2664 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
2665 Qmap_jis_x0208_1978, 94, 2,
2666 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
2667 build_string ("JIS X0208:1978"),
2668 build_string ("JIS X0208:1978 (Japanese)"),
2670 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2671 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2678 CONVERSION_IDENTICAL);
2679 staticpro (&Vcharset_chinese_gb2312);
2680 Vcharset_chinese_gb2312 =
2681 make_charset (LEADING_BYTE_CHINESE_GB2312, Qmap_gb2312, 94, 2,
2682 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
2683 build_string ("GB2312"),
2684 build_string ("GB2312)"),
2685 build_string ("GB2312 Chinese simplified"),
2686 build_string ("gb2312"),
2687 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2688 staticpro (&Vcharset_chinese_gb12345);
2689 Vcharset_chinese_gb12345 =
2690 make_charset (LEADING_BYTE_CHINESE_GB12345, Qmap_gb12345, 94, 2,
2691 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2692 build_string ("G1"),
2693 build_string ("GB 12345)"),
2694 build_string ("GB 12345-1990"),
2695 build_string ("GB12345\\(\\.1990\\)?-0"),
2696 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2697 staticpro (&Vcharset_japanese_jisx0208);
2698 Vcharset_japanese_jisx0208 =
2699 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qmap_jis_x0208_1983, 94, 2,
2700 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
2701 build_string ("JISX0208"),
2702 build_string ("JIS X0208:1983 (Japanese)"),
2703 build_string ("JIS X0208:1983 Japanese Kanji"),
2704 build_string ("jisx0208\\.1983"),
2711 CONVERSION_IDENTICAL);
2713 staticpro (&Vcharset_japanese_jisx0208_1990);
2714 Vcharset_japanese_jisx0208_1990 =
2715 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
2716 Qmap_jis_x0208_1990, 94, 2,
2717 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2718 build_string ("JISX0208-1990"),
2719 build_string ("JIS X0208:1990 (Japanese)"),
2720 build_string ("JIS X0208:1990 Japanese Kanji"),
2721 build_string ("jisx0208\\.1990"),
2723 0x2121 /* MIN_CHAR_JIS_X0208_1990 */,
2724 0x7426 /* MAX_CHAR_JIS_X0208_1990 */,
2725 0 /* MIN_CHAR_JIS_X0208_1990 */, 33,
2726 Vcharset_jis_x0208 /* Qnil */,
2727 CONVERSION_IDENTICAL /* CONVERSION_94x94 */);
2729 staticpro (&Vcharset_korean_ksc5601);
2730 Vcharset_korean_ksc5601 =
2731 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qmap_ks_x1001, 94, 2,
2732 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
2733 build_string ("KSC5601"),
2734 build_string ("KSC5601 (Korean"),
2735 build_string ("KSC5601 Korean Hangul and Hanja"),
2736 build_string ("ksc5601"),
2737 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2738 staticpro (&Vcharset_japanese_jisx0212);
2739 Vcharset_japanese_jisx0212 =
2740 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qmap_jis_x0212, 94, 2,
2741 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
2742 build_string ("JISX0212"),
2743 build_string ("JISX0212 (Japanese)"),
2744 build_string ("JISX0212 Japanese Supplement"),
2745 build_string ("jisx0212"),
2746 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2748 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2749 staticpro (&Vcharset_chinese_cns11643_1);
2750 Vcharset_chinese_cns11643_1 =
2751 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qmap_cns11643_1, 94, 2,
2752 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
2753 build_string ("CNS11643-1"),
2754 build_string ("CNS11643-1 (Chinese traditional)"),
2756 ("CNS 11643 Plane 1 Chinese traditional"),
2757 build_string (CHINESE_CNS_PLANE_RE("1")),
2758 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2759 staticpro (&Vcharset_chinese_cns11643_2);
2760 Vcharset_chinese_cns11643_2 =
2761 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qmap_cns11643_2, 94, 2,
2762 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
2763 build_string ("CNS11643-2"),
2764 build_string ("CNS11643-2 (Chinese traditional)"),
2766 ("CNS 11643 Plane 2 Chinese traditional"),
2767 build_string (CHINESE_CNS_PLANE_RE("2")),
2768 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2770 staticpro (&Vcharset_latin_tcvn5712);
2771 Vcharset_latin_tcvn5712 =
2772 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
2773 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
2774 build_string ("TCVN 5712"),
2775 build_string ("TCVN 5712 (VSCII-2)"),
2776 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
2777 build_string ("tcvn5712\\(\\.1993\\)?-1"),
2778 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2779 staticpro (&Vcharset_latin_viscii_lower);
2780 Vcharset_latin_viscii_lower =
2781 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
2782 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
2783 build_string ("VISCII lower"),
2784 build_string ("VISCII lower (Vietnamese)"),
2785 build_string ("VISCII lower (Vietnamese)"),
2786 build_string ("MULEVISCII-LOWER"),
2787 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2788 staticpro (&Vcharset_latin_viscii_upper);
2789 Vcharset_latin_viscii_upper =
2790 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
2791 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
2792 build_string ("VISCII upper"),
2793 build_string ("VISCII upper (Vietnamese)"),
2794 build_string ("VISCII upper (Vietnamese)"),
2795 build_string ("MULEVISCII-UPPER"),
2796 Qnil, 0, 0, 0, 32, Qnil, CONVERSION_IDENTICAL);
2797 staticpro (&Vcharset_latin_viscii);
2798 Vcharset_latin_viscii =
2799 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
2800 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
2801 build_string ("VISCII"),
2802 build_string ("VISCII 1.1 (Vietnamese)"),
2803 build_string ("VISCII 1.1 (Vietnamese)"),
2804 build_string ("VISCII1\\.1"),
2805 Qnil, 0, 0, 0, 0, Qnil, CONVERSION_IDENTICAL);
2806 staticpro (&Vcharset_chinese_big5);
2807 Vcharset_chinese_big5 =
2808 make_charset (LEADING_BYTE_CHINESE_BIG5, Qmap_big5, 256, 2,
2809 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2810 build_string ("Big5"),
2811 build_string ("Big5"),
2812 build_string ("Big5 Chinese traditional"),
2813 build_string ("big5-0"),
2815 MIN_CHAR_BIG5_CDP, MAX_CHAR_BIG5_CDP,
2816 MIN_CHAR_BIG5_CDP, 0, Qnil, CONVERSION_IDENTICAL);
2818 staticpro (&Vcharset_ethiopic_ucs);
2819 Vcharset_ethiopic_ucs =
2820 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
2821 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
2822 build_string ("Ethiopic (UCS)"),
2823 build_string ("Ethiopic (UCS)"),
2824 build_string ("Ethiopic of UCS"),
2825 build_string ("Ethiopic-Unicode"),
2826 Qnil, 0x1200, 0x137F, 0, 0,
2827 Qnil, CONVERSION_IDENTICAL);
2829 staticpro (&Vcharset_chinese_big5_1);
2830 Vcharset_chinese_big5_1 =
2831 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
2832 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
2833 build_string ("Big5"),
2834 build_string ("Big5 (Level-1)"),
2836 ("Big5 Level-1 Chinese traditional"),
2837 build_string ("big5"),
2838 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2839 staticpro (&Vcharset_chinese_big5_2);
2840 Vcharset_chinese_big5_2 =
2841 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
2842 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
2843 build_string ("Big5"),
2844 build_string ("Big5 (Level-2)"),
2846 ("Big5 Level-2 Chinese traditional"),
2847 build_string ("big5"),
2848 Qnil, 0, 0, 0, 33, Qnil, CONVERSION_IDENTICAL);
2850 #ifdef ENABLE_COMPOSITE_CHARS
2851 /* #### For simplicity, we put composite chars into a 96x96 charset.
2852 This is going to lead to problems because you can run out of
2853 room, esp. as we don't yet recycle numbers. */
2854 staticpro (&Vcharset_composite);
2855 Vcharset_composite =
2856 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
2857 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
2858 build_string ("Composite"),
2859 build_string ("Composite characters"),
2860 build_string ("Composite characters"),
2863 /* #### not dumped properly */
2864 composite_char_row_next = 32;
2865 composite_char_col_next = 32;
2867 Vcomposite_char_string2char_hash_table =
2868 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2869 Vcomposite_char_char2string_hash_table =
2870 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2871 staticpro (&Vcomposite_char_string2char_hash_table);
2872 staticpro (&Vcomposite_char_char2string_hash_table);
2873 #endif /* ENABLE_COMPOSITE_CHARS */