1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_ethiopic_ucs;
66 Lisp_Object Vcharset_hiragana_jisx0208;
67 Lisp_Object Vcharset_katakana_jisx0208;
69 Lisp_Object Vcharset_chinese_big5_1;
70 Lisp_Object Vcharset_chinese_big5_2;
72 #ifdef ENABLE_COMPOSITE_CHARS
73 Lisp_Object Vcharset_composite;
75 /* Hash tables for composite chars. One maps string representing
76 composed chars to their equivalent chars; one goes the
78 Lisp_Object Vcomposite_char_char2string_hash_table;
79 Lisp_Object Vcomposite_char_string2char_hash_table;
81 static int composite_char_row_next;
82 static int composite_char_col_next;
84 #endif /* ENABLE_COMPOSITE_CHARS */
86 /* Table of charsets indexed by leading byte. */
87 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
89 /* Table of charsets indexed by type/final-byte/direction. */
91 Lisp_Object charset_by_attributes[4][128];
93 Lisp_Object charset_by_attributes[4][128][2];
97 /* Table of number of bytes in the string representation of a character
98 indexed by the first byte of that representation.
100 rep_bytes_by_first_byte(c) is more efficient than the equivalent
101 canonical computation:
103 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
105 Bytecount rep_bytes_by_first_byte[0xA0] =
106 { /* 0x00 - 0x7f are for straight ASCII */
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
115 /* 0x80 - 0x8f are for Dimension-1 official charsets */
117 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
119 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121 /* 0x90 - 0x9d are for Dimension-2 official charsets */
122 /* 0x9e is for Dimension-1 private charsets */
123 /* 0x9f is for Dimension-2 private charsets */
124 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
131 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
133 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
136 for (i = 0; i < 256; i++)
138 markobj (cte->property[i]);
144 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
146 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
147 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
150 for (i = 0; i < 256; i++)
151 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
153 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
155 if (!char_byte_table_equal (cte1->property[i],
156 cte2->property[i], depth + 1))
163 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
169 char_byte_table_hash (Lisp_Object obj, int depth)
171 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
173 return internal_array_hash (cte->property, 256, depth);
176 static const struct lrecord_description char_byte_table_description[] = {
177 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
181 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
182 mark_char_byte_table,
183 internal_object_printer,
184 0, char_byte_table_equal,
185 char_byte_table_hash,
186 char_byte_table_description,
187 struct Lisp_Char_Byte_Table);
190 make_char_byte_table (Lisp_Object initval)
194 struct Lisp_Char_Byte_Table *cte =
195 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
196 &lrecord_char_byte_table);
198 for (i = 0; i < 256; i++)
199 cte->property[i] = initval;
201 XSETCHAR_BYTE_TABLE (obj, cte);
206 copy_char_byte_table (Lisp_Object entry)
208 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
211 struct Lisp_Char_Byte_Table *ctenew =
212 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
213 &lrecord_char_byte_table);
215 for (i = 0; i < 256; i++)
217 Lisp_Object new = cte->property[i];
218 if (CHAR_BYTE_TABLE_P (new))
219 ctenew->property[i] = copy_char_byte_table (new);
221 ctenew->property[i] = new;
224 XSETCHAR_BYTE_TABLE (obj, ctenew);
230 mark_char_code_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
232 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
238 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
240 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
241 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
243 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
247 char_code_table_hash (Lisp_Object obj, int depth)
249 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
251 return char_code_table_hash (cte->table, depth + 1);
254 static const struct lrecord_description char_code_table_description[] = {
255 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
259 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
260 mark_char_code_table,
261 internal_object_printer,
262 0, char_code_table_equal,
263 char_code_table_hash,
264 char_code_table_description,
265 struct Lisp_Char_Code_Table);
268 make_char_code_table (Lisp_Object initval)
271 struct Lisp_Char_Code_Table *cte =
272 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
273 &lrecord_char_code_table);
275 cte->table = make_char_byte_table (initval);
277 XSETCHAR_CODE_TABLE (obj, cte);
282 copy_char_code_table (Lisp_Object entry)
284 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
286 struct Lisp_Char_Code_Table *ctenew =
287 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
288 &lrecord_char_code_table);
290 ctenew->table = copy_char_byte_table (cte->table);
291 XSETCHAR_CODE_TABLE (obj, ctenew);
297 get_char_code_table (Emchar ch, Lisp_Object table)
299 unsigned int code = ch;
300 struct Lisp_Char_Byte_Table* cpt
301 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
302 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
304 if (CHAR_BYTE_TABLE_P (ret))
305 cpt = XCHAR_BYTE_TABLE (ret);
309 ret = cpt->property [(unsigned char) (code >> 16)];
310 if (CHAR_BYTE_TABLE_P (ret))
311 cpt = XCHAR_BYTE_TABLE (ret);
315 ret = cpt->property [(unsigned char) (code >> 8)];
316 if (CHAR_BYTE_TABLE_P (ret))
317 cpt = XCHAR_BYTE_TABLE (ret);
321 return cpt->property [(unsigned char) code];
325 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
327 unsigned int code = ch;
328 struct Lisp_Char_Byte_Table* cpt1
329 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
330 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
332 if (CHAR_BYTE_TABLE_P (ret))
334 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
336 ret = cpt2->property[(unsigned char)(code >> 16)];
337 if (CHAR_BYTE_TABLE_P (ret))
339 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
341 ret = cpt3->property[(unsigned char)(code >> 8)];
342 if (CHAR_BYTE_TABLE_P (ret))
344 struct Lisp_Char_Byte_Table* cpt4
345 = XCHAR_BYTE_TABLE (ret);
347 cpt4->property[(unsigned char)code] = value;
349 else if (!EQ (ret, value))
351 Lisp_Object cpt4 = make_char_byte_table (ret);
353 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
354 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
357 else if (!EQ (ret, value))
359 Lisp_Object cpt3 = make_char_byte_table (ret);
360 Lisp_Object cpt4 = make_char_byte_table (ret);
362 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
363 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
365 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
368 else if (!EQ (ret, value))
370 Lisp_Object cpt2 = make_char_byte_table (ret);
371 Lisp_Object cpt3 = make_char_byte_table (ret);
372 Lisp_Object cpt4 = make_char_byte_table (ret);
374 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
375 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
376 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
377 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
382 Lisp_Object Vcharacter_attribute_table;
383 Lisp_Object Vcharacter_composition_table;
384 Lisp_Object Vcharacter_variant_table;
386 Lisp_Object Q_decomposition;
389 Lisp_Object QnoBreak;
390 Lisp_Object Qfraction;
400 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
406 else if (EQ (v, Qcompat))
408 else if (EQ (v, QnoBreak))
410 else if (EQ (v, Qfraction))
412 else if (EQ (v, Qsuper))
414 else if (EQ (v, Qsub))
416 else if (EQ (v, Qcircle))
418 else if (EQ (v, Qsquare))
420 else if (EQ (v, Qwide))
422 else if (EQ (v, Qnarrow))
424 else if (EQ (v, Qfont))
427 signal_simple_error (err_msg, err_arg);
430 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
431 Return character corresponding with list.
435 Lisp_Object table = Vcharacter_composition_table;
436 Lisp_Object rest = list;
440 Lisp_Object v = Fcar (rest);
442 Emchar c = to_char_code (v, "Invalid value for composition", list);
444 ret = get_char_code_table (c, table);
449 if (!CHAR_CODE_TABLE_P (ret))
454 else if (!CONSP (rest))
456 else if (CHAR_CODE_TABLE_P (ret))
459 signal_simple_error ("Invalid table is found with", list);
461 signal_simple_error ("Invalid value for composition", list);
464 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
465 Return variants of CHARACTER.
469 CHECK_CHAR (character);
470 return Fcopy_list (get_char_code_table (XCHAR (character),
471 Vcharacter_variant_table));
474 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
475 Return the alist of attributes of CHARACTER.
479 CHECK_CHAR (character);
480 return Fcopy_alist (get_char_code_table (XCHAR (character),
481 Vcharacter_attribute_table));
484 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
485 Return the value of CHARACTER's ATTRIBUTE.
487 (character, attribute))
490 = get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
496 if (!NILP (ccs = Ffind_charset (attribute)))
499 return Fcdr (Fassq (attribute, ret));
503 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
506 Emchar char_code = XCHAR (character);
508 = get_char_code_table (char_code, Vcharacter_attribute_table);
511 cell = Fassq (attribute, ret);
515 ret = Fcons (Fcons (attribute, value), ret);
517 else if (!EQ (Fcdr (cell), value))
519 Fsetcdr (cell, value);
521 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
525 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
526 Store CHARACTER's ATTRIBUTE with VALUE.
528 (character, attribute, value))
532 CHECK_CHAR (character);
533 ccs = Ffind_charset (attribute);
537 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
542 /* ad-hoc method for `ascii' */
543 if ((XCHARSET_CHARS (ccs) == 94) &&
544 (XCHARSET_BYTE_OFFSET (ccs) != 33))
545 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
547 ccs_len = XCHARSET_CHARS (ccs);
550 signal_simple_error ("Invalid value for coded-charset",
554 rest = Fget_char_attribute (character, attribute);
561 Lisp_Object ei = Fcar (rest);
563 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
564 nv = XVECTOR_DATA(v)[i];
571 XVECTOR_DATA(v)[i] = Qnil;
572 v = XCHARSET_DECODING_TABLE (ccs);
577 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
580 if (XCHARSET_GRAPHIC (ccs) == 1)
581 value = Fcopy_list (value);
586 Lisp_Object ei = Fcar (rest);
589 signal_simple_error ("Invalid value for coded-charset", value);
591 if ((i < 0) || (255 < i))
592 signal_simple_error ("Invalid value for coded-charset", value);
593 if (XCHARSET_GRAPHIC (ccs) == 1)
596 Fsetcar (rest, make_int (i));
598 i -= XCHARSET_BYTE_OFFSET (ccs);
599 nv = XVECTOR_DATA(v)[i];
605 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
612 XVECTOR_DATA(v)[i] = character;
614 else if (EQ (attribute, Q_decomposition))
616 Lisp_Object rest = value;
617 Lisp_Object table = Vcharacter_composition_table;
620 signal_simple_error ("Invalid value for ->decomposition",
625 Lisp_Object v = Fcar (rest);
628 = to_char_code (v, "Invalid value for ->decomposition", value);
633 put_char_code_table (c, character, table);
638 ntable = get_char_code_table (c, table);
639 if (!CHAR_CODE_TABLE_P (ntable))
641 ntable = make_char_code_table (Qnil);
642 put_char_code_table (c, ntable, table);
648 else if (EQ (attribute, Q_ucs))
654 signal_simple_error ("Invalid value for ->ucs", value);
658 ret = get_char_code_table (c, Vcharacter_variant_table);
659 if (NILP (Fmemq (character, ret)))
661 put_char_code_table (c, Fcons (character, ret),
662 Vcharacter_variant_table);
665 return put_char_attribute (character, attribute, value);
670 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
671 Store character's ATTRIBUTES.
675 Lisp_Object rest = attributes;
676 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
677 Lisp_Object character;
683 Lisp_Object cell = Fcar (rest);
687 signal_simple_error ("Invalid argument", attributes);
688 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
689 && XCHARSET_FINAL (ccs))
693 if (XCHARSET_DIMENSION (ccs) == 1)
695 Lisp_Object eb1 = Fcar (Fcdr (cell));
699 signal_simple_error ("Invalid argument", attributes);
701 switch (XCHARSET_CHARS (ccs))
705 + (XCHARSET_FINAL (ccs) - '0') * 94 + (b1 - 33);
709 + (XCHARSET_FINAL (ccs) - '0') * 96 + (b1 - 32);
715 else if (XCHARSET_DIMENSION (ccs) == 2)
717 Lisp_Object eb1 = Fcar (Fcdr (cell));
718 Lisp_Object eb2 = Fcar (Fcdr (Fcdr (cell)));
722 signal_simple_error ("Invalid argument", attributes);
725 signal_simple_error ("Invalid argument", attributes);
727 switch (XCHARSET_CHARS (ccs))
730 code = MIN_CHAR_94x94
731 + (XCHARSET_FINAL (ccs) - '0') * 94 * 94
732 + (b1 - 33) * 94 + (b2 - 33);
735 code = MIN_CHAR_96x96
736 + (XCHARSET_FINAL (ccs) - '0') * 96 * 96
737 + (b1 - 32) * 96 + (b2 - 32);
748 character = make_char (code);
749 goto setup_attributes;
755 else if (!INTP (code))
756 signal_simple_error ("Invalid argument", attributes);
758 character = make_char (XINT (code));
764 Lisp_Object cell = Fcar (rest);
767 signal_simple_error ("Invalid argument", attributes);
768 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
772 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
775 Lisp_Object Vutf_2000_version;
779 int leading_code_private_11;
782 Lisp_Object Qcharsetp;
784 /* Qdoc_string, Qdimension, Qchars defined in general.c */
785 Lisp_Object Qregistry, Qfinal, Qgraphic;
786 Lisp_Object Qdirection;
787 Lisp_Object Qreverse_direction_charset;
788 Lisp_Object Qleading_byte;
789 Lisp_Object Qshort_name, Qlong_name;
805 Qjapanese_jisx0208_1978,
817 Qvietnamese_viscii_lower,
818 Qvietnamese_viscii_upper,
827 Lisp_Object Ql2r, Qr2l;
829 Lisp_Object Vcharset_hash_table;
832 static Charset_ID next_allocated_leading_byte;
834 static Charset_ID next_allocated_1_byte_leading_byte;
835 static Charset_ID next_allocated_2_byte_leading_byte;
838 /* Composite characters are characters constructed by overstriking two
839 or more regular characters.
841 1) The old Mule implementation involves storing composite characters
842 in a buffer as a tag followed by all of the actual characters
843 used to make up the composite character. I think this is a bad
844 idea; it greatly complicates code that wants to handle strings
845 one character at a time because it has to deal with the possibility
846 of great big ungainly characters. It's much more reasonable to
847 simply store an index into a table of composite characters.
849 2) The current implementation only allows for 16,384 separate
850 composite characters over the lifetime of the XEmacs process.
851 This could become a potential problem if the user
852 edited lots of different files that use composite characters.
853 Due to FSF bogosity, increasing the number of allowable
854 composite characters under Mule would decrease the number
855 of possible faces that can exist. Mule already has shrunk
856 this to 2048, and further shrinkage would become uncomfortable.
857 No such problems exist in XEmacs.
859 Composite characters could be represented as 0x80 C1 C2 C3,
860 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
861 for slightly under 2^20 (one million) composite characters
862 over the XEmacs process lifetime, and you only need to
863 increase the size of a Mule character from 19 to 21 bits.
864 Or you could use 0x80 C1 C2 C3 C4, allowing for about
865 85 million (slightly over 2^26) composite characters. */
868 /************************************************************************/
869 /* Basic Emchar functions */
870 /************************************************************************/
872 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
873 string in STR. Returns the number of bytes stored.
874 Do not call this directly. Use the macro set_charptr_emchar() instead.
878 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
893 else if ( c <= 0x7ff )
895 *p++ = (c >> 6) | 0xc0;
896 *p++ = (c & 0x3f) | 0x80;
898 else if ( c <= 0xffff )
900 *p++ = (c >> 12) | 0xe0;
901 *p++ = ((c >> 6) & 0x3f) | 0x80;
902 *p++ = (c & 0x3f) | 0x80;
904 else if ( c <= 0x1fffff )
906 *p++ = (c >> 18) | 0xf0;
907 *p++ = ((c >> 12) & 0x3f) | 0x80;
908 *p++ = ((c >> 6) & 0x3f) | 0x80;
909 *p++ = (c & 0x3f) | 0x80;
911 else if ( c <= 0x3ffffff )
913 *p++ = (c >> 24) | 0xf8;
914 *p++ = ((c >> 18) & 0x3f) | 0x80;
915 *p++ = ((c >> 12) & 0x3f) | 0x80;
916 *p++ = ((c >> 6) & 0x3f) | 0x80;
917 *p++ = (c & 0x3f) | 0x80;
921 *p++ = (c >> 30) | 0xfc;
922 *p++ = ((c >> 24) & 0x3f) | 0x80;
923 *p++ = ((c >> 18) & 0x3f) | 0x80;
924 *p++ = ((c >> 12) & 0x3f) | 0x80;
925 *p++ = ((c >> 6) & 0x3f) | 0x80;
926 *p++ = (c & 0x3f) | 0x80;
929 BREAKUP_CHAR (c, charset, c1, c2);
930 lb = CHAR_LEADING_BYTE (c);
931 if (LEADING_BYTE_PRIVATE_P (lb))
932 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
934 if (EQ (charset, Vcharset_control_1))
943 /* Return the first character from a Mule-encoded string in STR,
944 assuming it's non-ASCII. Do not call this directly.
945 Use the macro charptr_emchar() instead. */
948 non_ascii_charptr_emchar (CONST Bufbyte *str)
961 else if ( b >= 0xf8 )
966 else if ( b >= 0xf0 )
971 else if ( b >= 0xe0 )
976 else if ( b >= 0xc0 )
986 for( ; len > 0; len-- )
989 ch = ( ch << 6 ) | ( b & 0x3f );
993 Bufbyte i0 = *str, i1, i2 = 0;
996 if (i0 == LEADING_BYTE_CONTROL_1)
997 return (Emchar) (*++str - 0x20);
999 if (LEADING_BYTE_PREFIX_P (i0))
1004 charset = CHARSET_BY_LEADING_BYTE (i0);
1005 if (XCHARSET_DIMENSION (charset) == 2)
1008 return MAKE_CHAR (charset, i1, i2);
1012 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1013 Do not call this directly. Use the macro valid_char_p() instead. */
1017 non_ascii_valid_char_p (Emchar ch)
1021 /* Must have only lowest 19 bits set */
1025 f1 = CHAR_FIELD1 (ch);
1026 f2 = CHAR_FIELD2 (ch);
1027 f3 = CHAR_FIELD3 (ch);
1031 Lisp_Object charset;
1033 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1034 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1035 f2 > MAX_CHAR_FIELD2_PRIVATE)
1040 if (f3 != 0x20 && f3 != 0x7F)
1044 NOTE: This takes advantage of the fact that
1045 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1046 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1048 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1049 return (XCHARSET_CHARS (charset) == 96);
1053 Lisp_Object charset;
1055 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1056 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1057 f1 > MAX_CHAR_FIELD1_PRIVATE)
1059 if (f2 < 0x20 || f3 < 0x20)
1062 #ifdef ENABLE_COMPOSITE_CHARS
1063 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1065 if (UNBOUNDP (Fgethash (make_int (ch),
1066 Vcomposite_char_char2string_hash_table,
1071 #endif /* ENABLE_COMPOSITE_CHARS */
1073 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
1076 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1078 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1081 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1083 return (XCHARSET_CHARS (charset) == 96);
1089 /************************************************************************/
1090 /* Basic string functions */
1091 /************************************************************************/
1093 /* Copy the character pointed to by PTR into STR, assuming it's
1094 non-ASCII. Do not call this directly. Use the macro
1095 charptr_copy_char() instead. */
1098 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1100 Bufbyte *strptr = str;
1102 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1104 /* Notice fallthrough. */
1106 case 6: *++strptr = *ptr++;
1107 case 5: *++strptr = *ptr++;
1109 case 4: *++strptr = *ptr++;
1110 case 3: *++strptr = *ptr++;
1111 case 2: *++strptr = *ptr;
1116 return strptr + 1 - str;
1120 /************************************************************************/
1121 /* streams of Emchars */
1122 /************************************************************************/
1124 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1125 The functions below are not meant to be called directly; use
1126 the macros in insdel.h. */
1129 Lstream_get_emchar_1 (Lstream *stream, int ch)
1131 Bufbyte str[MAX_EMCHAR_LEN];
1132 Bufbyte *strptr = str;
1134 str[0] = (Bufbyte) ch;
1135 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1137 /* Notice fallthrough. */
1140 ch = Lstream_getc (stream);
1142 *++strptr = (Bufbyte) ch;
1144 ch = Lstream_getc (stream);
1146 *++strptr = (Bufbyte) ch;
1149 ch = Lstream_getc (stream);
1151 *++strptr = (Bufbyte) ch;
1153 ch = Lstream_getc (stream);
1155 *++strptr = (Bufbyte) ch;
1157 ch = Lstream_getc (stream);
1159 *++strptr = (Bufbyte) ch;
1164 return charptr_emchar (str);
1168 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1170 Bufbyte str[MAX_EMCHAR_LEN];
1171 Bytecount len = set_charptr_emchar (str, ch);
1172 return Lstream_write (stream, str, len);
1176 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1178 Bufbyte str[MAX_EMCHAR_LEN];
1179 Bytecount len = set_charptr_emchar (str, ch);
1180 Lstream_unread (stream, str, len);
1184 /************************************************************************/
1185 /* charset object */
1186 /************************************************************************/
1189 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
1191 struct Lisp_Charset *cs = XCHARSET (obj);
1193 markobj (cs->short_name);
1194 markobj (cs->long_name);
1195 markobj (cs->doc_string);
1196 markobj (cs->registry);
1197 markobj (cs->ccl_program);
1199 markobj (cs->decoding_table);
1205 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1207 struct Lisp_Charset *cs = XCHARSET (obj);
1211 error ("printing unreadable object #<charset %s 0x%x>",
1212 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1215 write_c_string ("#<charset ", printcharfun);
1216 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1217 write_c_string (" ", printcharfun);
1218 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1219 write_c_string (" ", printcharfun);
1220 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1221 write_c_string (" ", printcharfun);
1222 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1223 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1224 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1225 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1226 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1228 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1229 CHARSET_COLUMNS (cs),
1230 CHARSET_GRAPHIC (cs),
1231 CHARSET_FINAL (cs));
1232 write_c_string (buf, printcharfun);
1233 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1234 sprintf (buf, " 0x%x>", cs->header.uid);
1235 write_c_string (buf, printcharfun);
1238 static const struct lrecord_description charset_description[] = {
1239 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1241 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1246 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1247 mark_charset, print_charset, 0, 0, 0,
1248 charset_description,
1249 struct Lisp_Charset);
1251 /* Make a new charset. */
1254 make_charset (Charset_ID id, Lisp_Object name,
1255 unsigned char type, unsigned char columns, unsigned char graphic,
1256 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1257 Lisp_Object long_name, Lisp_Object doc,
1259 Lisp_Object decoding_table,
1260 Emchar ucs_min, Emchar ucs_max,
1261 Emchar code_offset, unsigned char byte_offset)
1264 struct Lisp_Charset *cs =
1265 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1266 XSETCHARSET (obj, cs);
1268 CHARSET_ID (cs) = id;
1269 CHARSET_NAME (cs) = name;
1270 CHARSET_SHORT_NAME (cs) = short_name;
1271 CHARSET_LONG_NAME (cs) = long_name;
1272 CHARSET_DIRECTION (cs) = direction;
1273 CHARSET_TYPE (cs) = type;
1274 CHARSET_COLUMNS (cs) = columns;
1275 CHARSET_GRAPHIC (cs) = graphic;
1276 CHARSET_FINAL (cs) = final;
1277 CHARSET_DOC_STRING (cs) = doc;
1278 CHARSET_REGISTRY (cs) = reg;
1279 CHARSET_CCL_PROGRAM (cs) = Qnil;
1280 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1282 CHARSET_DECODING_TABLE(cs) = Qnil;
1283 CHARSET_UCS_MIN(cs) = ucs_min;
1284 CHARSET_UCS_MAX(cs) = ucs_max;
1285 CHARSET_CODE_OFFSET(cs) = code_offset;
1286 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1289 switch (CHARSET_TYPE (cs))
1291 case CHARSET_TYPE_94:
1292 CHARSET_DIMENSION (cs) = 1;
1293 CHARSET_CHARS (cs) = 94;
1295 case CHARSET_TYPE_96:
1296 CHARSET_DIMENSION (cs) = 1;
1297 CHARSET_CHARS (cs) = 96;
1299 case CHARSET_TYPE_94X94:
1300 CHARSET_DIMENSION (cs) = 2;
1301 CHARSET_CHARS (cs) = 94;
1303 case CHARSET_TYPE_96X96:
1304 CHARSET_DIMENSION (cs) = 2;
1305 CHARSET_CHARS (cs) = 96;
1308 case CHARSET_TYPE_128:
1309 CHARSET_DIMENSION (cs) = 1;
1310 CHARSET_CHARS (cs) = 128;
1312 case CHARSET_TYPE_128X128:
1313 CHARSET_DIMENSION (cs) = 2;
1314 CHARSET_CHARS (cs) = 128;
1316 case CHARSET_TYPE_256:
1317 CHARSET_DIMENSION (cs) = 1;
1318 CHARSET_CHARS (cs) = 256;
1320 case CHARSET_TYPE_256X256:
1321 CHARSET_DIMENSION (cs) = 2;
1322 CHARSET_CHARS (cs) = 256;
1328 if (id == LEADING_BYTE_ASCII)
1329 CHARSET_REP_BYTES (cs) = 1;
1331 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1333 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1338 /* some charsets do not have final characters. This includes
1339 ASCII, Control-1, Composite, and the two faux private
1342 if (code_offset == 0)
1344 assert (NILP (charset_by_attributes[type][final]));
1345 charset_by_attributes[type][final] = obj;
1348 assert (NILP (charset_by_attributes[type][final][direction]));
1349 charset_by_attributes[type][final][direction] = obj;
1353 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1354 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1357 /* official leading byte */
1358 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1361 /* Some charsets are "faux" and don't have names or really exist at
1362 all except in the leading-byte table. */
1364 Fputhash (name, obj, Vcharset_hash_table);
1369 get_unallocated_leading_byte (int dimension)
1374 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1377 lb = next_allocated_leading_byte++;
1381 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1384 lb = next_allocated_1_byte_leading_byte++;
1388 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1391 lb = next_allocated_2_byte_leading_byte++;
1397 ("No more character sets free for this dimension",
1398 make_int (dimension));
1405 range_charset_code_point (Lisp_Object charset, Emchar ch)
1409 if ((XCHARSET_UCS_MIN (charset) <= ch)
1410 && (ch <= XCHARSET_UCS_MAX (charset)))
1412 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1414 if (XCHARSET_DIMENSION (charset) == 1)
1415 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
1416 else if (XCHARSET_DIMENSION (charset) == 2)
1417 return list2 (make_int (d / XCHARSET_CHARS (charset)
1418 + XCHARSET_BYTE_OFFSET (charset)),
1419 make_int (d % XCHARSET_CHARS (charset)
1420 + XCHARSET_BYTE_OFFSET (charset)));
1421 else if (XCHARSET_DIMENSION (charset) == 3)
1422 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1423 * XCHARSET_CHARS (charset))
1424 + XCHARSET_BYTE_OFFSET (charset)),
1425 make_int (d / XCHARSET_CHARS (charset)
1426 % XCHARSET_CHARS (charset)
1427 + XCHARSET_BYTE_OFFSET (charset)),
1428 make_int (d % XCHARSET_CHARS (charset)
1429 + XCHARSET_BYTE_OFFSET (charset)));
1430 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1431 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1432 * XCHARSET_CHARS (charset)
1433 * XCHARSET_CHARS (charset))
1434 + XCHARSET_BYTE_OFFSET (charset)),
1435 make_int (d / (XCHARSET_CHARS (charset)
1436 * XCHARSET_CHARS (charset))
1437 % XCHARSET_CHARS (charset)
1438 + XCHARSET_BYTE_OFFSET (charset)),
1439 make_int (d / XCHARSET_CHARS (charset)
1440 % XCHARSET_CHARS (charset)
1441 + XCHARSET_BYTE_OFFSET (charset)),
1442 make_int (d % XCHARSET_CHARS (charset)
1443 + XCHARSET_BYTE_OFFSET (charset)));
1445 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1447 if (XCHARSET_DIMENSION (charset) == 1)
1449 if (XCHARSET_CHARS (charset) == 94)
1451 if (((d = ch - (MIN_CHAR_94
1452 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1454 return list1 (make_int (d + 33));
1456 else if (XCHARSET_CHARS (charset) == 96)
1458 if (((d = ch - (MIN_CHAR_96
1459 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1461 return list1 (make_int (d + 32));
1466 else if (XCHARSET_DIMENSION (charset) == 2)
1468 if (XCHARSET_CHARS (charset) == 94)
1470 if (((d = ch - (MIN_CHAR_94x94
1471 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1474 return list2 (make_int ((d / 94) + 33),
1475 make_int (d % 94 + 33));
1477 else if (XCHARSET_CHARS (charset) == 96)
1479 if (((d = ch - (MIN_CHAR_96x96
1480 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1483 return list2 (make_int ((d / 96) + 32),
1484 make_int (d % 96 + 32));
1492 split_builtin_char (Emchar c)
1494 if (c < MIN_CHAR_OBS_94x94)
1496 if (c <= MAX_CHAR_BASIC_LATIN)
1498 return list2 (Vcharset_ascii, make_int (c));
1502 return list2 (Vcharset_control_1, make_int (c & 0x7F));
1506 return list2 (Vcharset_latin_iso8859_1, make_int (c & 0x7F));
1508 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1510 return list2 (Vcharset_greek_iso8859_7,
1511 make_int (c - MIN_CHAR_GREEK + 0x20));
1513 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1515 return list2 (Vcharset_cyrillic_iso8859_5,
1516 make_int (c - MIN_CHAR_CYRILLIC + 0x20));
1518 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1520 return list2 (Vcharset_hebrew_iso8859_8,
1521 make_int (c - MIN_CHAR_HEBREW + 0x20));
1523 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1525 return list2 (Vcharset_thai_tis620,
1526 make_int (c - MIN_CHAR_THAI + 0x20));
1528 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1529 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1531 return list2 (Vcharset_katakana_jisx0201,
1532 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1536 return list3 (Vcharset_ucs_bmp,
1537 make_int (c >> 8), make_int (c & 0xff));
1540 else if (c <= MAX_CHAR_OBS_94x94)
1542 return list3 (CHARSET_BY_ATTRIBUTES
1543 (CHARSET_TYPE_94X94,
1544 ((c - MIN_CHAR_OBS_94x94) / (94 * 94)) + '@',
1545 CHARSET_LEFT_TO_RIGHT),
1546 make_int ((((c - MIN_CHAR_OBS_94x94) / 94) % 94) + 33),
1547 make_int (((c - MIN_CHAR_OBS_94x94) % 94) + 33));
1549 else if (c <= MAX_CHAR_94)
1551 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1552 ((c - MIN_CHAR_94) / 94) + '0',
1553 CHARSET_LEFT_TO_RIGHT),
1554 make_int (((c - MIN_CHAR_94) % 94) + 33));
1556 else if (c <= MAX_CHAR_96)
1558 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1559 ((c - MIN_CHAR_96) / 96) + '0',
1560 CHARSET_LEFT_TO_RIGHT),
1561 make_int (((c - MIN_CHAR_96) % 96) + 32));
1563 else if (c <= MAX_CHAR_94x94)
1565 return list3 (CHARSET_BY_ATTRIBUTES
1566 (CHARSET_TYPE_94X94,
1567 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1568 CHARSET_LEFT_TO_RIGHT),
1569 make_int ((((c - MIN_CHAR_94x94) / 94) % 94) + 33),
1570 make_int (((c - MIN_CHAR_94x94) % 94) + 33));
1572 else if (c <= MAX_CHAR_96x96)
1574 return list3 (CHARSET_BY_ATTRIBUTES
1575 (CHARSET_TYPE_96X96,
1576 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1577 CHARSET_LEFT_TO_RIGHT),
1578 make_int ((((c - MIN_CHAR_96x96) / 96) % 96) + 32),
1579 make_int (((c - MIN_CHAR_96x96) % 96) + 32));
1588 charset_code_point (Lisp_Object charset, Emchar ch)
1590 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1592 if (!EQ (cdef, Qnil))
1594 Lisp_Object field = Fassq (charset, cdef);
1596 if (!EQ (field, Qnil))
1597 return Fcdr (field);
1599 return range_charset_code_point (charset, ch);
1602 Lisp_Object Vdefault_coded_charset_priority_list;
1606 /************************************************************************/
1607 /* Basic charset Lisp functions */
1608 /************************************************************************/
1610 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1611 Return non-nil if OBJECT is a charset.
1615 return CHARSETP (object) ? Qt : Qnil;
1618 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1619 Retrieve the charset of the given name.
1620 If CHARSET-OR-NAME is a charset object, it is simply returned.
1621 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1622 nil is returned. Otherwise the associated charset object is returned.
1626 if (CHARSETP (charset_or_name))
1627 return charset_or_name;
1629 CHECK_SYMBOL (charset_or_name);
1630 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1633 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1634 Retrieve the charset of the given name.
1635 Same as `find-charset' except an error is signalled if there is no such
1636 charset instead of returning nil.
1640 Lisp_Object charset = Ffind_charset (name);
1643 signal_simple_error ("No such charset", name);
1647 /* We store the charsets in hash tables with the names as the key and the
1648 actual charset object as the value. Occasionally we need to use them
1649 in a list format. These routines provide us with that. */
1650 struct charset_list_closure
1652 Lisp_Object *charset_list;
1656 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1657 void *charset_list_closure)
1659 /* This function can GC */
1660 struct charset_list_closure *chcl =
1661 (struct charset_list_closure*) charset_list_closure;
1662 Lisp_Object *charset_list = chcl->charset_list;
1664 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1668 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1669 Return a list of the names of all defined charsets.
1673 Lisp_Object charset_list = Qnil;
1674 struct gcpro gcpro1;
1675 struct charset_list_closure charset_list_closure;
1677 GCPRO1 (charset_list);
1678 charset_list_closure.charset_list = &charset_list;
1679 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1680 &charset_list_closure);
1683 return charset_list;
1686 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1687 Return the name of the given charset.
1691 return XCHARSET_NAME (Fget_charset (charset));
1694 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1695 Define a new character set.
1696 This function is for use with Mule support.
1697 NAME is a symbol, the name by which the character set is normally referred.
1698 DOC-STRING is a string describing the character set.
1699 PROPS is a property list, describing the specific nature of the
1700 character set. Recognized properties are:
1702 'short-name Short version of the charset name (ex: Latin-1)
1703 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1704 'registry A regular expression matching the font registry field for
1706 'dimension Number of octets used to index a character in this charset.
1707 Either 1 or 2. Defaults to 1.
1708 'columns Number of columns used to display a character in this charset.
1709 Only used in TTY mode. (Under X, the actual width of a
1710 character can be derived from the font used to display the
1711 characters.) If unspecified, defaults to the dimension
1712 (this is almost always the correct value).
1713 'chars Number of characters in each dimension (94 or 96).
1714 Defaults to 94. Note that if the dimension is 2, the
1715 character set thus described is 94x94 or 96x96.
1716 'final Final byte of ISO 2022 escape sequence. Must be
1717 supplied. Each combination of (DIMENSION, CHARS) defines a
1718 separate namespace for final bytes. Note that ISO
1719 2022 restricts the final byte to the range
1720 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1721 dimension == 2. Note also that final bytes in the range
1722 0x30 - 0x3F are reserved for user-defined (not official)
1724 'graphic 0 (use left half of font on output) or 1 (use right half
1725 of font on output). Defaults to 0. For example, for
1726 a font whose registry is ISO8859-1, the left half
1727 (octets 0x20 - 0x7F) is the `ascii' character set, while
1728 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1729 character set. With 'graphic set to 0, the octets
1730 will have their high bit cleared; with it set to 1,
1731 the octets will have their high bit set.
1732 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1734 'ccl-program A compiled CCL program used to convert a character in
1735 this charset into an index into the font. This is in
1736 addition to the 'graphic property. The CCL program
1737 is passed the octets of the character, with the high
1738 bit cleared and set depending upon whether the value
1739 of the 'graphic property is 0 or 1.
1741 (name, doc_string, props))
1743 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1744 int direction = CHARSET_LEFT_TO_RIGHT;
1746 Lisp_Object registry = Qnil;
1747 Lisp_Object charset;
1748 Lisp_Object rest, keyword, value;
1749 Lisp_Object ccl_program = Qnil;
1750 Lisp_Object short_name = Qnil, long_name = Qnil;
1751 int byte_offset = -1;
1753 CHECK_SYMBOL (name);
1754 if (!NILP (doc_string))
1755 CHECK_STRING (doc_string);
1757 charset = Ffind_charset (name);
1758 if (!NILP (charset))
1759 signal_simple_error ("Cannot redefine existing charset", name);
1761 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1763 if (EQ (keyword, Qshort_name))
1765 CHECK_STRING (value);
1769 if (EQ (keyword, Qlong_name))
1771 CHECK_STRING (value);
1775 else if (EQ (keyword, Qdimension))
1778 dimension = XINT (value);
1779 if (dimension < 1 || dimension > 2)
1780 signal_simple_error ("Invalid value for 'dimension", value);
1783 else if (EQ (keyword, Qchars))
1786 chars = XINT (value);
1787 if (chars != 94 && chars != 96)
1788 signal_simple_error ("Invalid value for 'chars", value);
1791 else if (EQ (keyword, Qcolumns))
1794 columns = XINT (value);
1795 if (columns != 1 && columns != 2)
1796 signal_simple_error ("Invalid value for 'columns", value);
1799 else if (EQ (keyword, Qgraphic))
1802 graphic = XINT (value);
1804 if (graphic < 0 || graphic > 2)
1806 if (graphic < 0 || graphic > 1)
1808 signal_simple_error ("Invalid value for 'graphic", value);
1811 else if (EQ (keyword, Qregistry))
1813 CHECK_STRING (value);
1817 else if (EQ (keyword, Qdirection))
1819 if (EQ (value, Ql2r))
1820 direction = CHARSET_LEFT_TO_RIGHT;
1821 else if (EQ (value, Qr2l))
1822 direction = CHARSET_RIGHT_TO_LEFT;
1824 signal_simple_error ("Invalid value for 'direction", value);
1827 else if (EQ (keyword, Qfinal))
1829 CHECK_CHAR_COERCE_INT (value);
1830 final = XCHAR (value);
1831 if (final < '0' || final > '~')
1832 signal_simple_error ("Invalid value for 'final", value);
1835 else if (EQ (keyword, Qccl_program))
1837 CHECK_VECTOR (value);
1838 ccl_program = value;
1842 signal_simple_error ("Unrecognized property", keyword);
1846 error ("'final must be specified");
1847 if (dimension == 2 && final > 0x5F)
1849 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1853 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1855 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1857 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1858 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1860 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1862 id = get_unallocated_leading_byte (dimension);
1864 if (NILP (doc_string))
1865 doc_string = build_string ("");
1867 if (NILP (registry))
1868 registry = build_string ("");
1870 if (NILP (short_name))
1871 XSETSTRING (short_name, XSYMBOL (name)->name);
1873 if (NILP (long_name))
1874 long_name = doc_string;
1877 columns = dimension;
1879 if (byte_offset < 0)
1883 else if (chars == 96)
1889 charset = make_charset (id, name, type, columns, graphic,
1890 final, direction, short_name, long_name,
1891 doc_string, registry,
1892 Qnil, 0, 0, 0, byte_offset);
1893 if (!NILP (ccl_program))
1894 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1898 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1900 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1901 NEW-NAME is the name of the new charset. Return the new charset.
1903 (charset, new_name))
1905 Lisp_Object new_charset = Qnil;
1906 int id, dimension, columns, graphic, final;
1907 int direction, type;
1908 Lisp_Object registry, doc_string, short_name, long_name;
1909 struct Lisp_Charset *cs;
1911 charset = Fget_charset (charset);
1912 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1913 signal_simple_error ("Charset already has reverse-direction charset",
1916 CHECK_SYMBOL (new_name);
1917 if (!NILP (Ffind_charset (new_name)))
1918 signal_simple_error ("Cannot redefine existing charset", new_name);
1920 cs = XCHARSET (charset);
1922 type = CHARSET_TYPE (cs);
1923 columns = CHARSET_COLUMNS (cs);
1924 dimension = CHARSET_DIMENSION (cs);
1925 id = get_unallocated_leading_byte (dimension);
1927 graphic = CHARSET_GRAPHIC (cs);
1928 final = CHARSET_FINAL (cs);
1929 direction = CHARSET_RIGHT_TO_LEFT;
1930 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1931 direction = CHARSET_LEFT_TO_RIGHT;
1932 doc_string = CHARSET_DOC_STRING (cs);
1933 short_name = CHARSET_SHORT_NAME (cs);
1934 long_name = CHARSET_LONG_NAME (cs);
1935 registry = CHARSET_REGISTRY (cs);
1937 new_charset = make_charset (id, new_name, type, columns,
1938 graphic, final, direction, short_name, long_name,
1939 doc_string, registry,
1941 CHARSET_DECODING_TABLE(cs),
1942 CHARSET_UCS_MIN(cs),
1943 CHARSET_UCS_MAX(cs),
1944 CHARSET_CODE_OFFSET(cs),
1945 CHARSET_BYTE_OFFSET(cs)
1951 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1952 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1957 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1958 Define symbol ALIAS as an alias for CHARSET.
1962 CHECK_SYMBOL (alias);
1963 charset = Fget_charset (charset);
1964 return Fputhash (alias, charset, Vcharset_hash_table);
1967 /* #### Reverse direction charsets not yet implemented. */
1969 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1971 Return the reverse-direction charset parallel to CHARSET, if any.
1972 This is the charset with the same properties (in particular, the same
1973 dimension, number of characters per dimension, and final byte) as
1974 CHARSET but whose characters are displayed in the opposite direction.
1978 charset = Fget_charset (charset);
1979 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1983 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1984 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1985 If DIRECTION is omitted, both directions will be checked (left-to-right
1986 will be returned if character sets exist for both directions).
1988 (dimension, chars, final, direction))
1990 int dm, ch, fi, di = -1;
1992 Lisp_Object obj = Qnil;
1994 CHECK_INT (dimension);
1995 dm = XINT (dimension);
1996 if (dm < 1 || dm > 2)
1997 signal_simple_error ("Invalid value for DIMENSION", dimension);
2001 if (ch != 94 && ch != 96)
2002 signal_simple_error ("Invalid value for CHARS", chars);
2004 CHECK_CHAR_COERCE_INT (final);
2006 if (fi < '0' || fi > '~')
2007 signal_simple_error ("Invalid value for FINAL", final);
2009 if (EQ (direction, Ql2r))
2010 di = CHARSET_LEFT_TO_RIGHT;
2011 else if (EQ (direction, Qr2l))
2012 di = CHARSET_RIGHT_TO_LEFT;
2013 else if (!NILP (direction))
2014 signal_simple_error ("Invalid value for DIRECTION", direction);
2016 if (dm == 2 && fi > 0x5F)
2018 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2021 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2023 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2027 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2029 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2032 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2035 return XCHARSET_NAME (obj);
2039 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2040 Return short name of CHARSET.
2044 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2047 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2048 Return long name of CHARSET.
2052 return XCHARSET_LONG_NAME (Fget_charset (charset));
2055 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2056 Return description of CHARSET.
2060 return XCHARSET_DOC_STRING (Fget_charset (charset));
2063 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2064 Return dimension of CHARSET.
2068 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2071 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2072 Return property PROP of CHARSET.
2073 Recognized properties are those listed in `make-charset', as well as
2074 'name and 'doc-string.
2078 struct Lisp_Charset *cs;
2080 charset = Fget_charset (charset);
2081 cs = XCHARSET (charset);
2083 CHECK_SYMBOL (prop);
2084 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2085 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2086 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2087 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2088 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2089 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2090 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2091 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2092 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2093 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2094 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2095 if (EQ (prop, Qdirection))
2096 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2097 if (EQ (prop, Qreverse_direction_charset))
2099 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2103 return XCHARSET_NAME (obj);
2105 signal_simple_error ("Unrecognized charset property name", prop);
2106 return Qnil; /* not reached */
2109 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2110 Return charset identification number of CHARSET.
2114 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2117 /* #### We need to figure out which properties we really want to
2120 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2121 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2123 (charset, ccl_program))
2125 charset = Fget_charset (charset);
2126 CHECK_VECTOR (ccl_program);
2127 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2132 invalidate_charset_font_caches (Lisp_Object charset)
2134 /* Invalidate font cache entries for charset on all devices. */
2135 Lisp_Object devcons, concons, hash_table;
2136 DEVICE_LOOP_NO_BREAK (devcons, concons)
2138 struct device *d = XDEVICE (XCAR (devcons));
2139 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2140 if (!UNBOUNDP (hash_table))
2141 Fclrhash (hash_table);
2145 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2146 Set the 'registry property of CHARSET to REGISTRY.
2148 (charset, registry))
2150 charset = Fget_charset (charset);
2151 CHECK_STRING (registry);
2152 XCHARSET_REGISTRY (charset) = registry;
2153 invalidate_charset_font_caches (charset);
2154 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2159 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2160 Return mapping-table of CHARSET.
2164 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2167 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2168 Set mapping-table of CHARSET to TABLE.
2172 struct Lisp_Charset *cs;
2173 Lisp_Object old_table;
2176 charset = Fget_charset (charset);
2177 cs = XCHARSET (charset);
2179 if (EQ (table, Qnil))
2181 CHARSET_DECODING_TABLE(cs) = table;
2184 else if (VECTORP (table))
2188 /* ad-hoc method for `ascii' */
2189 if ((CHARSET_CHARS (cs) == 94) &&
2190 (CHARSET_BYTE_OFFSET (cs) != 33))
2191 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2193 ccs_len = CHARSET_CHARS (cs);
2195 if (XVECTOR_LENGTH (table) > ccs_len)
2196 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2197 old_table = CHARSET_DECODING_TABLE(cs);
2198 CHARSET_DECODING_TABLE(cs) = table;
2201 signal_error (Qwrong_type_argument,
2202 list2 (build_translated_string ("vector-or-nil-p"),
2204 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2206 switch (CHARSET_DIMENSION (cs))
2209 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2211 Lisp_Object c = XVECTOR_DATA(table)[i];
2216 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2220 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2222 Lisp_Object v = XVECTOR_DATA(table)[i];
2228 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2230 CHARSET_DECODING_TABLE(cs) = old_table;
2231 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2233 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2235 Lisp_Object c = XVECTOR_DATA(v)[j];
2238 put_char_attribute (c, charset,
2241 (i + CHARSET_BYTE_OFFSET (cs)),
2243 (j + CHARSET_BYTE_OFFSET (cs))));
2247 put_char_attribute (v, charset,
2249 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2258 /************************************************************************/
2259 /* Lisp primitives for working with characters */
2260 /************************************************************************/
2262 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2263 Make a character from CHARSET and octets ARG1 and ARG2.
2264 ARG2 is required only for characters from two-dimensional charsets.
2265 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2266 character s with caron.
2268 (charset, arg1, arg2))
2270 struct Lisp_Charset *cs;
2272 int lowlim, highlim;
2274 charset = Fget_charset (charset);
2275 cs = XCHARSET (charset);
2277 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2278 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2280 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2282 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2283 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2286 /* It is useful (and safe, according to Olivier Galibert) to strip
2287 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2288 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2289 Latin 2 code of the character. */
2297 if (a1 < lowlim || a1 > highlim)
2298 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2300 if (CHARSET_DIMENSION (cs) == 1)
2304 ("Charset is of dimension one; second octet must be nil", arg2);
2305 return make_char (MAKE_CHAR (charset, a1, 0));
2314 a2 = XINT (arg2) & 0x7f;
2316 if (a2 < lowlim || a2 > highlim)
2317 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2319 return make_char (MAKE_CHAR (charset, a1, a2));
2322 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2323 Return the character set of char CH.
2327 CHECK_CHAR_COERCE_INT (ch);
2329 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2332 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2333 Return list of charset and one or two position-codes of CHAR.
2337 /* This function can GC */
2338 struct gcpro gcpro1, gcpro2;
2339 Lisp_Object charset = Qnil;
2340 Lisp_Object rc = Qnil;
2343 GCPRO2 (charset, rc);
2344 CHECK_CHAR_COERCE_INT (character);
2346 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2348 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2350 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2354 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2362 #ifdef ENABLE_COMPOSITE_CHARS
2363 /************************************************************************/
2364 /* composite character functions */
2365 /************************************************************************/
2368 lookup_composite_char (Bufbyte *str, int len)
2370 Lisp_Object lispstr = make_string (str, len);
2371 Lisp_Object ch = Fgethash (lispstr,
2372 Vcomposite_char_string2char_hash_table,
2378 if (composite_char_row_next >= 128)
2379 signal_simple_error ("No more composite chars available", lispstr);
2380 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2381 composite_char_col_next);
2382 Fputhash (make_char (emch), lispstr,
2383 Vcomposite_char_char2string_hash_table);
2384 Fputhash (lispstr, make_char (emch),
2385 Vcomposite_char_string2char_hash_table);
2386 composite_char_col_next++;
2387 if (composite_char_col_next >= 128)
2389 composite_char_col_next = 32;
2390 composite_char_row_next++;
2399 composite_char_string (Emchar ch)
2401 Lisp_Object str = Fgethash (make_char (ch),
2402 Vcomposite_char_char2string_hash_table,
2404 assert (!UNBOUNDP (str));
2408 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2409 Convert a string into a single composite character.
2410 The character is the result of overstriking all the characters in
2415 CHECK_STRING (string);
2416 return make_char (lookup_composite_char (XSTRING_DATA (string),
2417 XSTRING_LENGTH (string)));
2420 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2421 Return a string of the characters comprising a composite character.
2429 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2430 signal_simple_error ("Must be composite char", ch);
2431 return composite_char_string (emch);
2433 #endif /* ENABLE_COMPOSITE_CHARS */
2436 /************************************************************************/
2437 /* initialization */
2438 /************************************************************************/
2441 syms_of_mule_charset (void)
2443 DEFSUBR (Fcharsetp);
2444 DEFSUBR (Ffind_charset);
2445 DEFSUBR (Fget_charset);
2446 DEFSUBR (Fcharset_list);
2447 DEFSUBR (Fcharset_name);
2448 DEFSUBR (Fmake_charset);
2449 DEFSUBR (Fmake_reverse_direction_charset);
2450 /* DEFSUBR (Freverse_direction_charset); */
2451 DEFSUBR (Fdefine_charset_alias);
2452 DEFSUBR (Fcharset_from_attributes);
2453 DEFSUBR (Fcharset_short_name);
2454 DEFSUBR (Fcharset_long_name);
2455 DEFSUBR (Fcharset_description);
2456 DEFSUBR (Fcharset_dimension);
2457 DEFSUBR (Fcharset_property);
2458 DEFSUBR (Fcharset_id);
2459 DEFSUBR (Fset_charset_ccl_program);
2460 DEFSUBR (Fset_charset_registry);
2462 DEFSUBR (Fchar_attribute_alist);
2463 DEFSUBR (Fget_char_attribute);
2464 DEFSUBR (Fput_char_attribute);
2465 DEFSUBR (Fdefine_char);
2466 DEFSUBR (Fchar_variants);
2467 DEFSUBR (Fget_composite_char);
2468 DEFSUBR (Fcharset_mapping_table);
2469 DEFSUBR (Fset_charset_mapping_table);
2472 DEFSUBR (Fmake_char);
2473 DEFSUBR (Fchar_charset);
2474 DEFSUBR (Fsplit_char);
2476 #ifdef ENABLE_COMPOSITE_CHARS
2477 DEFSUBR (Fmake_composite_char);
2478 DEFSUBR (Fcomposite_char_string);
2481 defsymbol (&Qcharsetp, "charsetp");
2482 defsymbol (&Qregistry, "registry");
2483 defsymbol (&Qfinal, "final");
2484 defsymbol (&Qgraphic, "graphic");
2485 defsymbol (&Qdirection, "direction");
2486 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2487 defsymbol (&Qshort_name, "short-name");
2488 defsymbol (&Qlong_name, "long-name");
2490 defsymbol (&Ql2r, "l2r");
2491 defsymbol (&Qr2l, "r2l");
2493 /* Charsets, compatible with FSF 20.3
2494 Naming convention is Script-Charset[-Edition] */
2495 defsymbol (&Qascii, "ascii");
2496 defsymbol (&Qcontrol_1, "control-1");
2497 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2498 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2499 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2500 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2501 defsymbol (&Qthai_tis620, "thai-tis620");
2502 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2503 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2504 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2505 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2506 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2507 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2508 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2509 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2510 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2511 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2512 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2513 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2514 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2515 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2517 defsymbol (&Q_ucs, "->ucs");
2518 defsymbol (&Q_decomposition, "->decomposition");
2519 defsymbol (&Qcompat, "compat");
2520 defsymbol (&QnoBreak, "noBreak");
2521 defsymbol (&Qfraction, "fraction");
2522 defsymbol (&Qsuper, "super");
2523 defsymbol (&Qsub, "sub");
2524 defsymbol (&Qcircle, "circle");
2525 defsymbol (&Qsquare, "square");
2526 defsymbol (&Qwide, "wide");
2527 defsymbol (&Qnarrow, "narrow");
2528 defsymbol (&Qfont, "font");
2529 defsymbol (&Qucs, "ucs");
2530 defsymbol (&Qucs_bmp, "ucs-bmp");
2531 defsymbol (&Qlatin_viscii, "latin-viscii");
2532 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2533 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2534 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2535 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2536 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2537 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2538 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2540 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2541 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2543 defsymbol (&Qcomposite, "composite");
2547 vars_of_mule_charset (void)
2554 /* Table of charsets indexed by leading byte. */
2555 for (i = 0; i < countof (charset_by_leading_byte); i++)
2556 charset_by_leading_byte[i] = Qnil;
2559 /* Table of charsets indexed by type/final-byte. */
2560 for (i = 0; i < countof (charset_by_attributes); i++)
2561 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2562 charset_by_attributes[i][j] = Qnil;
2564 /* Table of charsets indexed by type/final-byte/direction. */
2565 for (i = 0; i < countof (charset_by_attributes); i++)
2566 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2567 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2568 charset_by_attributes[i][j][k] = Qnil;
2572 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2574 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2575 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2579 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2580 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2581 Leading-code of private TYPE9N charset of column-width 1.
2583 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2587 Vutf_2000_version = build_string("0.12 (Kashiwara)");
2588 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2589 Version number of UTF-2000.
2592 staticpro (&Vcharacter_attribute_table);
2593 Vcharacter_attribute_table = make_char_code_table (Qnil);
2595 staticpro (&Vcharacter_composition_table);
2596 Vcharacter_composition_table = make_char_code_table (Qnil);
2598 staticpro (&Vcharacter_variant_table);
2599 Vcharacter_variant_table = make_char_code_table (Qnil);
2601 Vdefault_coded_charset_priority_list = Qnil;
2602 DEFVAR_LISP ("default-coded-charset-priority-list",
2603 &Vdefault_coded_charset_priority_list /*
2604 Default order of preferred coded-character-sets.
2610 complex_vars_of_mule_charset (void)
2612 staticpro (&Vcharset_hash_table);
2613 Vcharset_hash_table =
2614 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2616 /* Predefined character sets. We store them into variables for
2621 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2622 CHARSET_TYPE_256X256, 1, 2, 0,
2623 CHARSET_LEFT_TO_RIGHT,
2624 build_string ("BMP"),
2625 build_string ("BMP"),
2626 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2627 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2628 Qnil, 0, 0xFFFF, 0, 0);
2630 # define MIN_CHAR_THAI 0
2631 # define MAX_CHAR_THAI 0
2632 # define MIN_CHAR_GREEK 0
2633 # define MAX_CHAR_GREEK 0
2634 # define MIN_CHAR_HEBREW 0
2635 # define MAX_CHAR_HEBREW 0
2636 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2637 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2638 # define MIN_CHAR_CYRILLIC 0
2639 # define MAX_CHAR_CYRILLIC 0
2642 make_charset (LEADING_BYTE_ASCII, Qascii,
2643 CHARSET_TYPE_94, 1, 0, 'B',
2644 CHARSET_LEFT_TO_RIGHT,
2645 build_string ("ASCII"),
2646 build_string ("ASCII)"),
2647 build_string ("ASCII (ISO646 IRV)"),
2648 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2649 Qnil, 0, 0x7F, 0, 0);
2650 Vcharset_control_1 =
2651 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2652 CHARSET_TYPE_94, 1, 1, 0,
2653 CHARSET_LEFT_TO_RIGHT,
2654 build_string ("C1"),
2655 build_string ("Control characters"),
2656 build_string ("Control characters 128-191"),
2658 Qnil, 0x80, 0x9F, 0, 0);
2659 Vcharset_latin_iso8859_1 =
2660 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2661 CHARSET_TYPE_96, 1, 1, 'A',
2662 CHARSET_LEFT_TO_RIGHT,
2663 build_string ("Latin-1"),
2664 build_string ("ISO8859-1 (Latin-1)"),
2665 build_string ("ISO8859-1 (Latin-1)"),
2666 build_string ("iso8859-1"),
2667 Qnil, 0xA0, 0xFF, 0, 32);
2668 Vcharset_latin_iso8859_2 =
2669 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2670 CHARSET_TYPE_96, 1, 1, 'B',
2671 CHARSET_LEFT_TO_RIGHT,
2672 build_string ("Latin-2"),
2673 build_string ("ISO8859-2 (Latin-2)"),
2674 build_string ("ISO8859-2 (Latin-2)"),
2675 build_string ("iso8859-2"),
2677 Vcharset_latin_iso8859_3 =
2678 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2679 CHARSET_TYPE_96, 1, 1, 'C',
2680 CHARSET_LEFT_TO_RIGHT,
2681 build_string ("Latin-3"),
2682 build_string ("ISO8859-3 (Latin-3)"),
2683 build_string ("ISO8859-3 (Latin-3)"),
2684 build_string ("iso8859-3"),
2686 Vcharset_latin_iso8859_4 =
2687 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2688 CHARSET_TYPE_96, 1, 1, 'D',
2689 CHARSET_LEFT_TO_RIGHT,
2690 build_string ("Latin-4"),
2691 build_string ("ISO8859-4 (Latin-4)"),
2692 build_string ("ISO8859-4 (Latin-4)"),
2693 build_string ("iso8859-4"),
2695 Vcharset_thai_tis620 =
2696 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2697 CHARSET_TYPE_96, 1, 1, 'T',
2698 CHARSET_LEFT_TO_RIGHT,
2699 build_string ("TIS620"),
2700 build_string ("TIS620 (Thai)"),
2701 build_string ("TIS620.2529 (Thai)"),
2702 build_string ("tis620"),
2703 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2704 Vcharset_greek_iso8859_7 =
2705 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2706 CHARSET_TYPE_96, 1, 1, 'F',
2707 CHARSET_LEFT_TO_RIGHT,
2708 build_string ("ISO8859-7"),
2709 build_string ("ISO8859-7 (Greek)"),
2710 build_string ("ISO8859-7 (Greek)"),
2711 build_string ("iso8859-7"),
2712 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2713 Vcharset_arabic_iso8859_6 =
2714 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2715 CHARSET_TYPE_96, 1, 1, 'G',
2716 CHARSET_RIGHT_TO_LEFT,
2717 build_string ("ISO8859-6"),
2718 build_string ("ISO8859-6 (Arabic)"),
2719 build_string ("ISO8859-6 (Arabic)"),
2720 build_string ("iso8859-6"),
2722 Vcharset_hebrew_iso8859_8 =
2723 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2724 CHARSET_TYPE_96, 1, 1, 'H',
2725 CHARSET_RIGHT_TO_LEFT,
2726 build_string ("ISO8859-8"),
2727 build_string ("ISO8859-8 (Hebrew)"),
2728 build_string ("ISO8859-8 (Hebrew)"),
2729 build_string ("iso8859-8"),
2730 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2731 Vcharset_katakana_jisx0201 =
2732 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2733 CHARSET_TYPE_94, 1, 1, 'I',
2734 CHARSET_LEFT_TO_RIGHT,
2735 build_string ("JISX0201 Kana"),
2736 build_string ("JISX0201.1976 (Japanese Kana)"),
2737 build_string ("JISX0201.1976 Japanese Kana"),
2738 build_string ("jisx0201\\.1976"),
2740 MIN_CHAR_HALFWIDTH_KATAKANA,
2741 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2742 Vcharset_latin_jisx0201 =
2743 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2744 CHARSET_TYPE_94, 1, 0, 'J',
2745 CHARSET_LEFT_TO_RIGHT,
2746 build_string ("JISX0201 Roman"),
2747 build_string ("JISX0201.1976 (Japanese Roman)"),
2748 build_string ("JISX0201.1976 Japanese Roman"),
2749 build_string ("jisx0201\\.1976"),
2751 Vcharset_cyrillic_iso8859_5 =
2752 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2753 CHARSET_TYPE_96, 1, 1, 'L',
2754 CHARSET_LEFT_TO_RIGHT,
2755 build_string ("ISO8859-5"),
2756 build_string ("ISO8859-5 (Cyrillic)"),
2757 build_string ("ISO8859-5 (Cyrillic)"),
2758 build_string ("iso8859-5"),
2759 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2760 Vcharset_latin_iso8859_9 =
2761 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2762 CHARSET_TYPE_96, 1, 1, 'M',
2763 CHARSET_LEFT_TO_RIGHT,
2764 build_string ("Latin-5"),
2765 build_string ("ISO8859-9 (Latin-5)"),
2766 build_string ("ISO8859-9 (Latin-5)"),
2767 build_string ("iso8859-9"),
2769 Vcharset_japanese_jisx0208_1978 =
2770 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2771 CHARSET_TYPE_94X94, 2, 0, '@',
2772 CHARSET_LEFT_TO_RIGHT,
2773 build_string ("JIS X0208:1978"),
2774 build_string ("JIS X0208:1978 (Japanese)"),
2776 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2777 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2779 Vcharset_chinese_gb2312 =
2780 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2781 CHARSET_TYPE_94X94, 2, 0, 'A',
2782 CHARSET_LEFT_TO_RIGHT,
2783 build_string ("GB2312"),
2784 build_string ("GB2312)"),
2785 build_string ("GB2312 Chinese simplified"),
2786 build_string ("gb2312"),
2788 Vcharset_japanese_jisx0208 =
2789 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2790 CHARSET_TYPE_94X94, 2, 0, 'B',
2791 CHARSET_LEFT_TO_RIGHT,
2792 build_string ("JISX0208"),
2793 build_string ("JIS X0208:1983 (Japanese)"),
2794 build_string ("JIS X0208:1983 Japanese Kanji"),
2795 build_string ("jisx0208\\.1983"),
2797 Vcharset_korean_ksc5601 =
2798 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2799 CHARSET_TYPE_94X94, 2, 0, 'C',
2800 CHARSET_LEFT_TO_RIGHT,
2801 build_string ("KSC5601"),
2802 build_string ("KSC5601 (Korean"),
2803 build_string ("KSC5601 Korean Hangul and Hanja"),
2804 build_string ("ksc5601"),
2806 Vcharset_japanese_jisx0212 =
2807 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2808 CHARSET_TYPE_94X94, 2, 0, 'D',
2809 CHARSET_LEFT_TO_RIGHT,
2810 build_string ("JISX0212"),
2811 build_string ("JISX0212 (Japanese)"),
2812 build_string ("JISX0212 Japanese Supplement"),
2813 build_string ("jisx0212"),
2816 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2817 Vcharset_chinese_cns11643_1 =
2818 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2819 CHARSET_TYPE_94X94, 2, 0, 'G',
2820 CHARSET_LEFT_TO_RIGHT,
2821 build_string ("CNS11643-1"),
2822 build_string ("CNS11643-1 (Chinese traditional)"),
2824 ("CNS 11643 Plane 1 Chinese traditional"),
2825 build_string (CHINESE_CNS_PLANE_RE("1")),
2827 Vcharset_chinese_cns11643_2 =
2828 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2829 CHARSET_TYPE_94X94, 2, 0, 'H',
2830 CHARSET_LEFT_TO_RIGHT,
2831 build_string ("CNS11643-2"),
2832 build_string ("CNS11643-2 (Chinese traditional)"),
2834 ("CNS 11643 Plane 2 Chinese traditional"),
2835 build_string (CHINESE_CNS_PLANE_RE("2")),
2838 Vcharset_latin_viscii_lower =
2839 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2840 CHARSET_TYPE_96, 1, 1, '1',
2841 CHARSET_LEFT_TO_RIGHT,
2842 build_string ("VISCII lower"),
2843 build_string ("VISCII lower (Vietnamese)"),
2844 build_string ("VISCII lower (Vietnamese)"),
2845 build_string ("MULEVISCII-LOWER"),
2847 Vcharset_latin_viscii_upper =
2848 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2849 CHARSET_TYPE_96, 1, 1, '2',
2850 CHARSET_LEFT_TO_RIGHT,
2851 build_string ("VISCII upper"),
2852 build_string ("VISCII upper (Vietnamese)"),
2853 build_string ("VISCII upper (Vietnamese)"),
2854 build_string ("MULEVISCII-UPPER"),
2856 Vcharset_latin_viscii =
2857 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2858 CHARSET_TYPE_256, 1, 2, 0,
2859 CHARSET_LEFT_TO_RIGHT,
2860 build_string ("VISCII"),
2861 build_string ("VISCII 1.1 (Vietnamese)"),
2862 build_string ("VISCII 1.1 (Vietnamese)"),
2863 build_string ("VISCII1\\.1"),
2865 Vcharset_ethiopic_ucs =
2866 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs,
2867 CHARSET_TYPE_256X256, 2, 2, 0,
2868 CHARSET_LEFT_TO_RIGHT,
2869 build_string ("Ethiopic (UCS)"),
2870 build_string ("Ethiopic (UCS)"),
2871 build_string ("Ethiopic of UCS"),
2872 build_string ("Ethiopic-Unicode"),
2873 Qnil, 0x1200, 0x137F, 0x1200, 0);
2874 Vcharset_hiragana_jisx0208 =
2875 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2876 CHARSET_TYPE_94X94, 2, 0, 'B',
2877 CHARSET_LEFT_TO_RIGHT,
2878 build_string ("Hiragana"),
2879 build_string ("Hiragana of JIS X0208"),
2880 build_string ("Japanese Hiragana of JIS X0208"),
2881 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2882 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2883 (0x24 - 33) * 94 + (0x21 - 33), 33);
2884 Vcharset_katakana_jisx0208 =
2885 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2886 CHARSET_TYPE_94X94, 2, 0, 'B',
2887 CHARSET_LEFT_TO_RIGHT,
2888 build_string ("Katakana"),
2889 build_string ("Katakana of JIS X0208"),
2890 build_string ("Japanese Katakana of JIS X0208"),
2891 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2892 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2893 (0x25 - 33) * 94 + (0x21 - 33), 33);
2895 Vcharset_chinese_big5_1 =
2896 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2897 CHARSET_TYPE_94X94, 2, 0, '0',
2898 CHARSET_LEFT_TO_RIGHT,
2899 build_string ("Big5"),
2900 build_string ("Big5 (Level-1)"),
2902 ("Big5 Level-1 Chinese traditional"),
2903 build_string ("big5"),
2905 Vcharset_chinese_big5_2 =
2906 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2907 CHARSET_TYPE_94X94, 2, 0, '1',
2908 CHARSET_LEFT_TO_RIGHT,
2909 build_string ("Big5"),
2910 build_string ("Big5 (Level-2)"),
2912 ("Big5 Level-2 Chinese traditional"),
2913 build_string ("big5"),
2916 #ifdef ENABLE_COMPOSITE_CHARS
2917 /* #### For simplicity, we put composite chars into a 96x96 charset.
2918 This is going to lead to problems because you can run out of
2919 room, esp. as we don't yet recycle numbers. */
2920 Vcharset_composite =
2921 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2922 CHARSET_TYPE_96X96, 2, 0, 0,
2923 CHARSET_LEFT_TO_RIGHT,
2924 build_string ("Composite"),
2925 build_string ("Composite characters"),
2926 build_string ("Composite characters"),
2929 composite_char_row_next = 32;
2930 composite_char_col_next = 32;
2932 Vcomposite_char_string2char_hash_table =
2933 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2934 Vcomposite_char_char2string_hash_table =
2935 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2936 staticpro (&Vcomposite_char_string2char_hash_table);
2937 staticpro (&Vcomposite_char_char2string_hash_table);
2938 #endif /* ENABLE_COMPOSITE_CHARS */