1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_ethiopic_ucs;
66 Lisp_Object Vcharset_hiragana_jisx0208;
67 Lisp_Object Vcharset_katakana_jisx0208;
69 Lisp_Object Vcharset_chinese_big5_1;
70 Lisp_Object Vcharset_chinese_big5_2;
72 #ifdef ENABLE_COMPOSITE_CHARS
73 Lisp_Object Vcharset_composite;
75 /* Hash tables for composite chars. One maps string representing
76 composed chars to their equivalent chars; one goes the
78 Lisp_Object Vcomposite_char_char2string_hash_table;
79 Lisp_Object Vcomposite_char_string2char_hash_table;
81 static int composite_char_row_next;
82 static int composite_char_col_next;
84 #endif /* ENABLE_COMPOSITE_CHARS */
86 /* Table of charsets indexed by leading byte. */
87 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
89 /* Table of charsets indexed by type/final-byte/direction. */
91 Lisp_Object charset_by_attributes[4][128];
93 Lisp_Object charset_by_attributes[4][128][2];
97 /* Table of number of bytes in the string representation of a character
98 indexed by the first byte of that representation.
100 rep_bytes_by_first_byte(c) is more efficient than the equivalent
101 canonical computation:
103 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
105 Bytecount rep_bytes_by_first_byte[0xA0] =
106 { /* 0x00 - 0x7f are for straight ASCII */
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
115 /* 0x80 - 0x8f are for Dimension-1 official charsets */
117 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
119 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121 /* 0x90 - 0x9d are for Dimension-2 official charsets */
122 /* 0x9e is for Dimension-1 private charsets */
123 /* 0x9f is for Dimension-2 private charsets */
124 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
131 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
133 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
136 for (i = 0; i < 256; i++)
138 markobj (cte->property[i]);
144 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
146 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
147 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
150 for (i = 0; i < 256; i++)
151 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
153 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
155 if (!char_byte_table_equal (cte1->property[i],
156 cte2->property[i], depth + 1))
163 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
169 char_byte_table_hash (Lisp_Object obj, int depth)
171 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
173 return internal_array_hash (cte->property, 256, depth);
176 static const struct lrecord_description char_byte_table_description[] = {
177 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
181 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
182 mark_char_byte_table,
183 internal_object_printer,
184 0, char_byte_table_equal,
185 char_byte_table_hash,
186 char_byte_table_description,
187 struct Lisp_Char_Byte_Table);
190 make_char_byte_table (Lisp_Object initval)
194 struct Lisp_Char_Byte_Table *cte =
195 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
196 &lrecord_char_byte_table);
198 for (i = 0; i < 256; i++)
199 cte->property[i] = initval;
201 XSETCHAR_BYTE_TABLE (obj, cte);
206 copy_char_byte_table (Lisp_Object entry)
208 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
211 struct Lisp_Char_Byte_Table *ctenew =
212 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
213 &lrecord_char_byte_table);
215 for (i = 0; i < 256; i++)
217 Lisp_Object new = cte->property[i];
218 if (CHAR_BYTE_TABLE_P (new))
219 ctenew->property[i] = copy_char_byte_table (new);
221 ctenew->property[i] = new;
224 XSETCHAR_BYTE_TABLE (obj, ctenew);
230 mark_char_code_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
232 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
238 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
240 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
241 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
243 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
247 char_code_table_hash (Lisp_Object obj, int depth)
249 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
251 return char_code_table_hash (cte->table, depth + 1);
254 static const struct lrecord_description char_code_table_description[] = {
255 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
259 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
260 mark_char_code_table,
261 internal_object_printer,
262 0, char_code_table_equal,
263 char_code_table_hash,
264 char_code_table_description,
265 struct Lisp_Char_Code_Table);
268 make_char_code_table (Lisp_Object initval)
271 struct Lisp_Char_Code_Table *cte =
272 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
273 &lrecord_char_code_table);
275 cte->table = make_char_byte_table (initval);
277 XSETCHAR_CODE_TABLE (obj, cte);
282 copy_char_code_table (Lisp_Object entry)
284 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
286 struct Lisp_Char_Code_Table *ctenew =
287 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
288 &lrecord_char_code_table);
290 ctenew->table = copy_char_byte_table (cte->table);
291 XSETCHAR_CODE_TABLE (obj, ctenew);
297 get_char_code_table (Emchar ch, Lisp_Object table)
299 unsigned int code = ch;
300 struct Lisp_Char_Byte_Table* cpt
301 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
302 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
304 if (CHAR_BYTE_TABLE_P (ret))
305 cpt = XCHAR_BYTE_TABLE (ret);
309 ret = cpt->property [(unsigned char) (code >> 16)];
310 if (CHAR_BYTE_TABLE_P (ret))
311 cpt = XCHAR_BYTE_TABLE (ret);
315 ret = cpt->property [(unsigned char) (code >> 8)];
316 if (CHAR_BYTE_TABLE_P (ret))
317 cpt = XCHAR_BYTE_TABLE (ret);
321 return cpt->property [(unsigned char) code];
325 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
327 unsigned int code = ch;
328 struct Lisp_Char_Byte_Table* cpt1
329 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
330 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
332 if (CHAR_BYTE_TABLE_P (ret))
334 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
336 ret = cpt2->property[(unsigned char)(code >> 16)];
337 if (CHAR_BYTE_TABLE_P (ret))
339 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
341 ret = cpt3->property[(unsigned char)(code >> 8)];
342 if (CHAR_BYTE_TABLE_P (ret))
344 struct Lisp_Char_Byte_Table* cpt4
345 = XCHAR_BYTE_TABLE (ret);
347 cpt4->property[(unsigned char)code] = value;
349 else if (!EQ (ret, value))
351 Lisp_Object cpt4 = make_char_byte_table (ret);
353 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
354 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
357 else if (!EQ (ret, value))
359 Lisp_Object cpt3 = make_char_byte_table (ret);
360 Lisp_Object cpt4 = make_char_byte_table (ret);
362 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
363 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
365 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
368 else if (!EQ (ret, value))
370 Lisp_Object cpt2 = make_char_byte_table (ret);
371 Lisp_Object cpt3 = make_char_byte_table (ret);
372 Lisp_Object cpt4 = make_char_byte_table (ret);
374 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
375 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
376 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
377 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
382 Lisp_Object Vcharacter_attribute_table;
383 Lisp_Object Vcharacter_composition_table;
384 Lisp_Object Vcharacter_variant_table;
386 Lisp_Object Q_decomposition;
389 Lisp_Object QnoBreak;
390 Lisp_Object Qfraction;
399 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
405 else if (EQ (v, Qcompat))
407 else if (EQ (v, QnoBreak))
409 else if (EQ (v, Qfraction))
411 else if (EQ (v, Qsuper))
413 else if (EQ (v, Qsub))
415 else if (EQ (v, Qcircle))
417 else if (EQ (v, Qwide))
419 else if (EQ (v, Qnarrow))
421 else if (EQ (v, Qfont))
424 signal_simple_error (err_msg, err_arg);
427 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
428 Return character corresponding with list.
432 Lisp_Object table = Vcharacter_composition_table;
433 Lisp_Object rest = list;
437 Lisp_Object v = Fcar (rest);
439 Emchar c = to_char_code (v, "Invalid value for composition", list);
441 ret = get_char_code_table (c, table);
446 if (!CHAR_CODE_TABLE_P (ret))
451 else if (!CONSP (rest))
453 else if (CHAR_CODE_TABLE_P (ret))
456 signal_simple_error ("Invalid table is found with", list);
458 signal_simple_error ("Invalid value for composition", list);
461 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
462 Return variants of CHARACTER.
466 CHECK_CHAR (character);
467 return Fcopy_list (get_char_code_table (XCHAR (character),
468 Vcharacter_variant_table));
471 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
472 Return the alist of attributes of CHARACTER.
476 CHECK_CHAR (character);
477 return Fcopy_alist (get_char_code_table (XCHAR (character),
478 Vcharacter_attribute_table));
481 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
482 Return the value of CHARACTER's ATTRIBUTE.
484 (character, attribute))
487 = get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
493 if (!NILP (ccs = Ffind_charset (attribute)))
496 return Fcdr (Fassq (attribute, ret));
500 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
503 Emchar char_code = XCHAR (character);
505 = get_char_code_table (char_code, Vcharacter_attribute_table);
508 cell = Fassq (attribute, ret);
512 ret = Fcons (Fcons (attribute, value), ret);
514 else if (!EQ (Fcdr (cell), value))
516 Fsetcdr (cell, value);
518 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
522 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
523 Store CHARACTER's ATTRIBUTE with VALUE.
525 (character, attribute, value))
529 ccs = Ffind_charset (attribute);
533 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
538 /* ad-hoc method for `ascii' */
539 if ((XCHARSET_CHARS (ccs) == 94) &&
540 (XCHARSET_BYTE_OFFSET (ccs) != 33))
541 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
543 ccs_len = XCHARSET_CHARS (ccs);
546 signal_simple_error ("Invalid value for coded-charset",
550 rest = Fget_char_attribute (character, attribute);
557 Lisp_Object ei = Fcar (rest);
559 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
560 nv = XVECTOR_DATA(v)[i];
567 XVECTOR_DATA(v)[i] = Qnil;
568 v = XCHARSET_DECODING_TABLE (ccs);
573 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
576 if (XCHARSET_GRAPHIC (ccs) == 1)
577 value = Fcopy_list (value);
582 Lisp_Object ei = Fcar (rest);
585 signal_simple_error ("Invalid value for coded-charset", value);
587 if ((i < 0) || (255 < i))
588 signal_simple_error ("Invalid value for coded-charset", value);
589 if (XCHARSET_GRAPHIC (ccs) == 1)
592 Fsetcar (rest, make_int (i));
594 i -= XCHARSET_BYTE_OFFSET (ccs);
595 nv = XVECTOR_DATA(v)[i];
601 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
608 XVECTOR_DATA(v)[i] = character;
610 else if (EQ (attribute, Q_decomposition))
612 Lisp_Object rest = value;
613 Lisp_Object table = Vcharacter_composition_table;
616 signal_simple_error ("Invalid value for ->decomposition",
621 Lisp_Object v = Fcar (rest);
624 = to_char_code (v, "Invalid value for ->decomposition", value);
629 put_char_code_table (c, character, table);
634 ntable = get_char_code_table (c, table);
635 if (!CHAR_CODE_TABLE_P (ntable))
637 ntable = make_char_code_table (Qnil);
638 put_char_code_table (c, ntable, table);
644 else if (EQ (attribute, Q_ucs))
650 signal_simple_error ("Invalid value for ->ucs", value);
654 ret = get_char_code_table (c, Vcharacter_variant_table);
655 if (NILP (Fmemq (character, ret)))
657 put_char_code_table (c, Fcons (character, ret),
658 Vcharacter_variant_table);
661 return put_char_attribute (character, attribute, value);
666 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
667 Store character's ATTRIBUTES.
671 Lisp_Object rest = attributes;
672 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
673 Lisp_Object character;
679 Lisp_Object cell = Fcar (rest);
683 signal_simple_error ("Invalid argument", attributes);
684 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
685 && XCHARSET_FINAL (ccs))
689 if (XCHARSET_DIMENSION (ccs) == 1)
691 Lisp_Object eb1 = Fcar (Fcdr (cell));
695 signal_simple_error ("Invalid argument", attributes);
697 switch (XCHARSET_CHARS (ccs))
701 + (XCHARSET_FINAL (ccs) - '0') * 94 + (b1 - 33);
705 + (XCHARSET_FINAL (ccs) - '0') * 96 + (b1 - 32);
711 else if (XCHARSET_DIMENSION (ccs) == 2)
713 Lisp_Object eb1 = Fcar (Fcdr (cell));
714 Lisp_Object eb2 = Fcar (Fcdr (Fcdr (cell)));
718 signal_simple_error ("Invalid argument", attributes);
721 signal_simple_error ("Invalid argument", attributes);
723 switch (XCHARSET_CHARS (ccs))
726 code = MIN_CHAR_94x94
727 + (XCHARSET_FINAL (ccs) - '0') * 94 * 94
728 + (b1 - 33) * 94 + (b2 - 33);
731 code = MIN_CHAR_96x96
732 + (XCHARSET_FINAL (ccs) - '0') * 96 * 96
733 + (b1 - 32) * 96 + (b2 - 32);
744 character = make_char (code);
745 goto setup_attributes;
751 else if (!INTP (code))
752 signal_simple_error ("Invalid argument", attributes);
754 character = make_char (XINT (code));
760 Lisp_Object cell = Fcar (rest);
763 signal_simple_error ("Invalid argument", attributes);
764 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
768 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
771 Lisp_Object Vutf_2000_version;
775 int leading_code_private_11;
778 Lisp_Object Qcharsetp;
780 /* Qdoc_string, Qdimension, Qchars defined in general.c */
781 Lisp_Object Qregistry, Qfinal, Qgraphic;
782 Lisp_Object Qdirection;
783 Lisp_Object Qreverse_direction_charset;
784 Lisp_Object Qleading_byte;
785 Lisp_Object Qshort_name, Qlong_name;
801 Qjapanese_jisx0208_1978,
813 Qvietnamese_viscii_lower,
814 Qvietnamese_viscii_upper,
823 Lisp_Object Ql2r, Qr2l;
825 Lisp_Object Vcharset_hash_table;
828 static Charset_ID next_allocated_leading_byte;
830 static Charset_ID next_allocated_1_byte_leading_byte;
831 static Charset_ID next_allocated_2_byte_leading_byte;
834 /* Composite characters are characters constructed by overstriking two
835 or more regular characters.
837 1) The old Mule implementation involves storing composite characters
838 in a buffer as a tag followed by all of the actual characters
839 used to make up the composite character. I think this is a bad
840 idea; it greatly complicates code that wants to handle strings
841 one character at a time because it has to deal with the possibility
842 of great big ungainly characters. It's much more reasonable to
843 simply store an index into a table of composite characters.
845 2) The current implementation only allows for 16,384 separate
846 composite characters over the lifetime of the XEmacs process.
847 This could become a potential problem if the user
848 edited lots of different files that use composite characters.
849 Due to FSF bogosity, increasing the number of allowable
850 composite characters under Mule would decrease the number
851 of possible faces that can exist. Mule already has shrunk
852 this to 2048, and further shrinkage would become uncomfortable.
853 No such problems exist in XEmacs.
855 Composite characters could be represented as 0x80 C1 C2 C3,
856 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
857 for slightly under 2^20 (one million) composite characters
858 over the XEmacs process lifetime, and you only need to
859 increase the size of a Mule character from 19 to 21 bits.
860 Or you could use 0x80 C1 C2 C3 C4, allowing for about
861 85 million (slightly over 2^26) composite characters. */
864 /************************************************************************/
865 /* Basic Emchar functions */
866 /************************************************************************/
868 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
869 string in STR. Returns the number of bytes stored.
870 Do not call this directly. Use the macro set_charptr_emchar() instead.
874 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
889 else if ( c <= 0x7ff )
891 *p++ = (c >> 6) | 0xc0;
892 *p++ = (c & 0x3f) | 0x80;
894 else if ( c <= 0xffff )
896 *p++ = (c >> 12) | 0xe0;
897 *p++ = ((c >> 6) & 0x3f) | 0x80;
898 *p++ = (c & 0x3f) | 0x80;
900 else if ( c <= 0x1fffff )
902 *p++ = (c >> 18) | 0xf0;
903 *p++ = ((c >> 12) & 0x3f) | 0x80;
904 *p++ = ((c >> 6) & 0x3f) | 0x80;
905 *p++ = (c & 0x3f) | 0x80;
907 else if ( c <= 0x3ffffff )
909 *p++ = (c >> 24) | 0xf8;
910 *p++ = ((c >> 18) & 0x3f) | 0x80;
911 *p++ = ((c >> 12) & 0x3f) | 0x80;
912 *p++ = ((c >> 6) & 0x3f) | 0x80;
913 *p++ = (c & 0x3f) | 0x80;
917 *p++ = (c >> 30) | 0xfc;
918 *p++ = ((c >> 24) & 0x3f) | 0x80;
919 *p++ = ((c >> 18) & 0x3f) | 0x80;
920 *p++ = ((c >> 12) & 0x3f) | 0x80;
921 *p++ = ((c >> 6) & 0x3f) | 0x80;
922 *p++ = (c & 0x3f) | 0x80;
925 BREAKUP_CHAR (c, charset, c1, c2);
926 lb = CHAR_LEADING_BYTE (c);
927 if (LEADING_BYTE_PRIVATE_P (lb))
928 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
930 if (EQ (charset, Vcharset_control_1))
939 /* Return the first character from a Mule-encoded string in STR,
940 assuming it's non-ASCII. Do not call this directly.
941 Use the macro charptr_emchar() instead. */
944 non_ascii_charptr_emchar (CONST Bufbyte *str)
957 else if ( b >= 0xf8 )
962 else if ( b >= 0xf0 )
967 else if ( b >= 0xe0 )
972 else if ( b >= 0xc0 )
982 for( ; len > 0; len-- )
985 ch = ( ch << 6 ) | ( b & 0x3f );
989 Bufbyte i0 = *str, i1, i2 = 0;
992 if (i0 == LEADING_BYTE_CONTROL_1)
993 return (Emchar) (*++str - 0x20);
995 if (LEADING_BYTE_PREFIX_P (i0))
1000 charset = CHARSET_BY_LEADING_BYTE (i0);
1001 if (XCHARSET_DIMENSION (charset) == 2)
1004 return MAKE_CHAR (charset, i1, i2);
1008 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1009 Do not call this directly. Use the macro valid_char_p() instead. */
1013 non_ascii_valid_char_p (Emchar ch)
1017 /* Must have only lowest 19 bits set */
1021 f1 = CHAR_FIELD1 (ch);
1022 f2 = CHAR_FIELD2 (ch);
1023 f3 = CHAR_FIELD3 (ch);
1027 Lisp_Object charset;
1029 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1030 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1031 f2 > MAX_CHAR_FIELD2_PRIVATE)
1036 if (f3 != 0x20 && f3 != 0x7F)
1040 NOTE: This takes advantage of the fact that
1041 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1042 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1044 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1045 return (XCHARSET_CHARS (charset) == 96);
1049 Lisp_Object charset;
1051 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1052 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1053 f1 > MAX_CHAR_FIELD1_PRIVATE)
1055 if (f2 < 0x20 || f3 < 0x20)
1058 #ifdef ENABLE_COMPOSITE_CHARS
1059 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1061 if (UNBOUNDP (Fgethash (make_int (ch),
1062 Vcomposite_char_char2string_hash_table,
1067 #endif /* ENABLE_COMPOSITE_CHARS */
1069 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
1072 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1074 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1077 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1079 return (XCHARSET_CHARS (charset) == 96);
1085 /************************************************************************/
1086 /* Basic string functions */
1087 /************************************************************************/
1089 /* Copy the character pointed to by PTR into STR, assuming it's
1090 non-ASCII. Do not call this directly. Use the macro
1091 charptr_copy_char() instead. */
1094 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1096 Bufbyte *strptr = str;
1098 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1100 /* Notice fallthrough. */
1102 case 6: *++strptr = *ptr++;
1103 case 5: *++strptr = *ptr++;
1105 case 4: *++strptr = *ptr++;
1106 case 3: *++strptr = *ptr++;
1107 case 2: *++strptr = *ptr;
1112 return strptr + 1 - str;
1116 /************************************************************************/
1117 /* streams of Emchars */
1118 /************************************************************************/
1120 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1121 The functions below are not meant to be called directly; use
1122 the macros in insdel.h. */
1125 Lstream_get_emchar_1 (Lstream *stream, int ch)
1127 Bufbyte str[MAX_EMCHAR_LEN];
1128 Bufbyte *strptr = str;
1130 str[0] = (Bufbyte) ch;
1131 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1133 /* Notice fallthrough. */
1136 ch = Lstream_getc (stream);
1138 *++strptr = (Bufbyte) ch;
1140 ch = Lstream_getc (stream);
1142 *++strptr = (Bufbyte) ch;
1145 ch = Lstream_getc (stream);
1147 *++strptr = (Bufbyte) ch;
1149 ch = Lstream_getc (stream);
1151 *++strptr = (Bufbyte) ch;
1153 ch = Lstream_getc (stream);
1155 *++strptr = (Bufbyte) ch;
1160 return charptr_emchar (str);
1164 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1166 Bufbyte str[MAX_EMCHAR_LEN];
1167 Bytecount len = set_charptr_emchar (str, ch);
1168 return Lstream_write (stream, str, len);
1172 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1174 Bufbyte str[MAX_EMCHAR_LEN];
1175 Bytecount len = set_charptr_emchar (str, ch);
1176 Lstream_unread (stream, str, len);
1180 /************************************************************************/
1181 /* charset object */
1182 /************************************************************************/
1185 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
1187 struct Lisp_Charset *cs = XCHARSET (obj);
1189 markobj (cs->short_name);
1190 markobj (cs->long_name);
1191 markobj (cs->doc_string);
1192 markobj (cs->registry);
1193 markobj (cs->ccl_program);
1195 markobj (cs->decoding_table);
1201 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1203 struct Lisp_Charset *cs = XCHARSET (obj);
1207 error ("printing unreadable object #<charset %s 0x%x>",
1208 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1211 write_c_string ("#<charset ", printcharfun);
1212 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1213 write_c_string (" ", printcharfun);
1214 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1215 write_c_string (" ", printcharfun);
1216 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1217 write_c_string (" ", printcharfun);
1218 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1219 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1220 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1221 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1222 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1224 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1225 CHARSET_COLUMNS (cs),
1226 CHARSET_GRAPHIC (cs),
1227 CHARSET_FINAL (cs));
1228 write_c_string (buf, printcharfun);
1229 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1230 sprintf (buf, " 0x%x>", cs->header.uid);
1231 write_c_string (buf, printcharfun);
1234 static const struct lrecord_description charset_description[] = {
1235 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1237 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1242 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1243 mark_charset, print_charset, 0, 0, 0,
1244 charset_description,
1245 struct Lisp_Charset);
1247 /* Make a new charset. */
1250 make_charset (Charset_ID id, Lisp_Object name,
1251 unsigned char type, unsigned char columns, unsigned char graphic,
1252 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1253 Lisp_Object long_name, Lisp_Object doc,
1255 Lisp_Object decoding_table,
1256 Emchar ucs_min, Emchar ucs_max,
1257 Emchar code_offset, unsigned char byte_offset)
1260 struct Lisp_Charset *cs =
1261 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1262 XSETCHARSET (obj, cs);
1264 CHARSET_ID (cs) = id;
1265 CHARSET_NAME (cs) = name;
1266 CHARSET_SHORT_NAME (cs) = short_name;
1267 CHARSET_LONG_NAME (cs) = long_name;
1268 CHARSET_DIRECTION (cs) = direction;
1269 CHARSET_TYPE (cs) = type;
1270 CHARSET_COLUMNS (cs) = columns;
1271 CHARSET_GRAPHIC (cs) = graphic;
1272 CHARSET_FINAL (cs) = final;
1273 CHARSET_DOC_STRING (cs) = doc;
1274 CHARSET_REGISTRY (cs) = reg;
1275 CHARSET_CCL_PROGRAM (cs) = Qnil;
1276 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1278 CHARSET_DECODING_TABLE(cs) = Qnil;
1279 CHARSET_UCS_MIN(cs) = ucs_min;
1280 CHARSET_UCS_MAX(cs) = ucs_max;
1281 CHARSET_CODE_OFFSET(cs) = code_offset;
1282 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1285 switch (CHARSET_TYPE (cs))
1287 case CHARSET_TYPE_94:
1288 CHARSET_DIMENSION (cs) = 1;
1289 CHARSET_CHARS (cs) = 94;
1291 case CHARSET_TYPE_96:
1292 CHARSET_DIMENSION (cs) = 1;
1293 CHARSET_CHARS (cs) = 96;
1295 case CHARSET_TYPE_94X94:
1296 CHARSET_DIMENSION (cs) = 2;
1297 CHARSET_CHARS (cs) = 94;
1299 case CHARSET_TYPE_96X96:
1300 CHARSET_DIMENSION (cs) = 2;
1301 CHARSET_CHARS (cs) = 96;
1304 case CHARSET_TYPE_128:
1305 CHARSET_DIMENSION (cs) = 1;
1306 CHARSET_CHARS (cs) = 128;
1308 case CHARSET_TYPE_128X128:
1309 CHARSET_DIMENSION (cs) = 2;
1310 CHARSET_CHARS (cs) = 128;
1312 case CHARSET_TYPE_256:
1313 CHARSET_DIMENSION (cs) = 1;
1314 CHARSET_CHARS (cs) = 256;
1316 case CHARSET_TYPE_256X256:
1317 CHARSET_DIMENSION (cs) = 2;
1318 CHARSET_CHARS (cs) = 256;
1324 if (id == LEADING_BYTE_ASCII)
1325 CHARSET_REP_BYTES (cs) = 1;
1327 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1329 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1334 /* some charsets do not have final characters. This includes
1335 ASCII, Control-1, Composite, and the two faux private
1338 if (code_offset == 0)
1340 assert (NILP (charset_by_attributes[type][final]));
1341 charset_by_attributes[type][final] = obj;
1344 assert (NILP (charset_by_attributes[type][final][direction]));
1345 charset_by_attributes[type][final][direction] = obj;
1349 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1350 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1353 /* official leading byte */
1354 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1357 /* Some charsets are "faux" and don't have names or really exist at
1358 all except in the leading-byte table. */
1360 Fputhash (name, obj, Vcharset_hash_table);
1365 get_unallocated_leading_byte (int dimension)
1370 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1373 lb = next_allocated_leading_byte++;
1377 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1380 lb = next_allocated_1_byte_leading_byte++;
1384 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1387 lb = next_allocated_2_byte_leading_byte++;
1393 ("No more character sets free for this dimension",
1394 make_int (dimension));
1401 range_charset_code_point (Lisp_Object charset, Emchar ch)
1405 if ((XCHARSET_UCS_MIN (charset) <= ch)
1406 && (ch <= XCHARSET_UCS_MAX (charset)))
1408 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1410 if (XCHARSET_DIMENSION (charset) == 1)
1411 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
1412 else if (XCHARSET_DIMENSION (charset) == 2)
1413 return list2 (make_int (d / XCHARSET_CHARS (charset)
1414 + XCHARSET_BYTE_OFFSET (charset)),
1415 make_int (d % XCHARSET_CHARS (charset)
1416 + XCHARSET_BYTE_OFFSET (charset)));
1417 else if (XCHARSET_DIMENSION (charset) == 3)
1418 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1419 * XCHARSET_CHARS (charset))
1420 + XCHARSET_BYTE_OFFSET (charset)),
1421 make_int (d / XCHARSET_CHARS (charset)
1422 % XCHARSET_CHARS (charset)
1423 + XCHARSET_BYTE_OFFSET (charset)),
1424 make_int (d % XCHARSET_CHARS (charset)
1425 + XCHARSET_BYTE_OFFSET (charset)));
1426 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1427 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1428 * XCHARSET_CHARS (charset)
1429 * XCHARSET_CHARS (charset))
1430 + XCHARSET_BYTE_OFFSET (charset)),
1431 make_int (d / (XCHARSET_CHARS (charset)
1432 * XCHARSET_CHARS (charset))
1433 % XCHARSET_CHARS (charset)
1434 + XCHARSET_BYTE_OFFSET (charset)),
1435 make_int (d / XCHARSET_CHARS (charset)
1436 % XCHARSET_CHARS (charset)
1437 + XCHARSET_BYTE_OFFSET (charset)),
1438 make_int (d % XCHARSET_CHARS (charset)
1439 + XCHARSET_BYTE_OFFSET (charset)));
1441 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1443 if (XCHARSET_DIMENSION (charset) == 1)
1445 if (XCHARSET_CHARS (charset) == 94)
1447 if (((d = ch - (MIN_CHAR_94
1448 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1450 return list1 (make_int (d + 33));
1452 else if (XCHARSET_CHARS (charset) == 96)
1454 if (((d = ch - (MIN_CHAR_96
1455 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1457 return list1 (make_int (d + 32));
1462 else if (XCHARSET_DIMENSION (charset) == 2)
1464 if (XCHARSET_CHARS (charset) == 94)
1466 if (((d = ch - (MIN_CHAR_94x94
1467 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1470 return list2 (make_int ((d / 94) + 33),
1471 make_int (d % 94 + 33));
1473 else if (XCHARSET_CHARS (charset) == 96)
1475 if (((d = ch - (MIN_CHAR_96x96
1476 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1479 return list2 (make_int ((d / 96) + 32),
1480 make_int (d % 96 + 32));
1488 split_builtin_char (Emchar c)
1490 if (c < MIN_CHAR_OBS_94x94)
1492 if (c <= MAX_CHAR_BASIC_LATIN)
1494 return list2 (Vcharset_ascii, make_int (c));
1498 return list2 (Vcharset_control_1, make_int (c & 0x7F));
1502 return list2 (Vcharset_latin_iso8859_1, make_int (c & 0x7F));
1504 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1506 return list2 (Vcharset_greek_iso8859_7,
1507 make_int (c - MIN_CHAR_GREEK + 0x20));
1509 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1511 return list2 (Vcharset_cyrillic_iso8859_5,
1512 make_int (c - MIN_CHAR_CYRILLIC + 0x20));
1514 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1516 return list2 (Vcharset_hebrew_iso8859_8,
1517 make_int (c - MIN_CHAR_HEBREW + 0x20));
1519 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1521 return list2 (Vcharset_thai_tis620,
1522 make_int (c - MIN_CHAR_THAI + 0x20));
1524 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1525 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1527 return list2 (Vcharset_katakana_jisx0201,
1528 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1532 return list3 (Vcharset_ucs_bmp,
1533 make_int (c >> 8), make_int (c & 0xff));
1536 else if (c <= MAX_CHAR_OBS_94x94)
1538 return list3 (CHARSET_BY_ATTRIBUTES
1539 (CHARSET_TYPE_94X94,
1540 ((c - MIN_CHAR_OBS_94x94) / (94 * 94)) + '@',
1541 CHARSET_LEFT_TO_RIGHT),
1542 make_int ((((c - MIN_CHAR_OBS_94x94) / 94) % 94) + 33),
1543 make_int (((c - MIN_CHAR_OBS_94x94) % 94) + 33));
1545 else if (c <= MAX_CHAR_94)
1547 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1548 ((c - MIN_CHAR_94) / 94) + '0',
1549 CHARSET_LEFT_TO_RIGHT),
1550 make_int (((c - MIN_CHAR_94) % 94) + 33));
1552 else if (c <= MAX_CHAR_96)
1554 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1555 ((c - MIN_CHAR_96) / 96) + '0',
1556 CHARSET_LEFT_TO_RIGHT),
1557 make_int (((c - MIN_CHAR_96) % 96) + 32));
1559 else if (c <= MAX_CHAR_94x94)
1561 return list3 (CHARSET_BY_ATTRIBUTES
1562 (CHARSET_TYPE_94X94,
1563 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1564 CHARSET_LEFT_TO_RIGHT),
1565 make_int ((((c - MIN_CHAR_94x94) / 94) % 94) + 33),
1566 make_int (((c - MIN_CHAR_94x94) % 94) + 33));
1568 else if (c <= MAX_CHAR_96x96)
1570 return list3 (CHARSET_BY_ATTRIBUTES
1571 (CHARSET_TYPE_96X96,
1572 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1573 CHARSET_LEFT_TO_RIGHT),
1574 make_int ((((c - MIN_CHAR_96x96) / 96) % 96) + 32),
1575 make_int (((c - MIN_CHAR_96x96) % 96) + 32));
1584 charset_code_point (Lisp_Object charset, Emchar ch)
1586 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1588 if (!EQ (cdef, Qnil))
1590 Lisp_Object field = Fassq (charset, cdef);
1592 if (!EQ (field, Qnil))
1593 return Fcdr (field);
1595 return range_charset_code_point (charset, ch);
1598 Lisp_Object Vdefault_coded_charset_priority_list;
1602 /************************************************************************/
1603 /* Basic charset Lisp functions */
1604 /************************************************************************/
1606 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1607 Return non-nil if OBJECT is a charset.
1611 return CHARSETP (object) ? Qt : Qnil;
1614 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1615 Retrieve the charset of the given name.
1616 If CHARSET-OR-NAME is a charset object, it is simply returned.
1617 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1618 nil is returned. Otherwise the associated charset object is returned.
1622 if (CHARSETP (charset_or_name))
1623 return charset_or_name;
1625 CHECK_SYMBOL (charset_or_name);
1626 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1629 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1630 Retrieve the charset of the given name.
1631 Same as `find-charset' except an error is signalled if there is no such
1632 charset instead of returning nil.
1636 Lisp_Object charset = Ffind_charset (name);
1639 signal_simple_error ("No such charset", name);
1643 /* We store the charsets in hash tables with the names as the key and the
1644 actual charset object as the value. Occasionally we need to use them
1645 in a list format. These routines provide us with that. */
1646 struct charset_list_closure
1648 Lisp_Object *charset_list;
1652 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1653 void *charset_list_closure)
1655 /* This function can GC */
1656 struct charset_list_closure *chcl =
1657 (struct charset_list_closure*) charset_list_closure;
1658 Lisp_Object *charset_list = chcl->charset_list;
1660 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1664 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1665 Return a list of the names of all defined charsets.
1669 Lisp_Object charset_list = Qnil;
1670 struct gcpro gcpro1;
1671 struct charset_list_closure charset_list_closure;
1673 GCPRO1 (charset_list);
1674 charset_list_closure.charset_list = &charset_list;
1675 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1676 &charset_list_closure);
1679 return charset_list;
1682 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1683 Return the name of the given charset.
1687 return XCHARSET_NAME (Fget_charset (charset));
1690 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1691 Define a new character set.
1692 This function is for use with Mule support.
1693 NAME is a symbol, the name by which the character set is normally referred.
1694 DOC-STRING is a string describing the character set.
1695 PROPS is a property list, describing the specific nature of the
1696 character set. Recognized properties are:
1698 'short-name Short version of the charset name (ex: Latin-1)
1699 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1700 'registry A regular expression matching the font registry field for
1702 'dimension Number of octets used to index a character in this charset.
1703 Either 1 or 2. Defaults to 1.
1704 'columns Number of columns used to display a character in this charset.
1705 Only used in TTY mode. (Under X, the actual width of a
1706 character can be derived from the font used to display the
1707 characters.) If unspecified, defaults to the dimension
1708 (this is almost always the correct value).
1709 'chars Number of characters in each dimension (94 or 96).
1710 Defaults to 94. Note that if the dimension is 2, the
1711 character set thus described is 94x94 or 96x96.
1712 'final Final byte of ISO 2022 escape sequence. Must be
1713 supplied. Each combination of (DIMENSION, CHARS) defines a
1714 separate namespace for final bytes. Note that ISO
1715 2022 restricts the final byte to the range
1716 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1717 dimension == 2. Note also that final bytes in the range
1718 0x30 - 0x3F are reserved for user-defined (not official)
1720 'graphic 0 (use left half of font on output) or 1 (use right half
1721 of font on output). Defaults to 0. For example, for
1722 a font whose registry is ISO8859-1, the left half
1723 (octets 0x20 - 0x7F) is the `ascii' character set, while
1724 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1725 character set. With 'graphic set to 0, the octets
1726 will have their high bit cleared; with it set to 1,
1727 the octets will have their high bit set.
1728 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1730 'ccl-program A compiled CCL program used to convert a character in
1731 this charset into an index into the font. This is in
1732 addition to the 'graphic property. The CCL program
1733 is passed the octets of the character, with the high
1734 bit cleared and set depending upon whether the value
1735 of the 'graphic property is 0 or 1.
1737 (name, doc_string, props))
1739 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1740 int direction = CHARSET_LEFT_TO_RIGHT;
1742 Lisp_Object registry = Qnil;
1743 Lisp_Object charset;
1744 Lisp_Object rest, keyword, value;
1745 Lisp_Object ccl_program = Qnil;
1746 Lisp_Object short_name = Qnil, long_name = Qnil;
1747 int byte_offset = -1;
1749 CHECK_SYMBOL (name);
1750 if (!NILP (doc_string))
1751 CHECK_STRING (doc_string);
1753 charset = Ffind_charset (name);
1754 if (!NILP (charset))
1755 signal_simple_error ("Cannot redefine existing charset", name);
1757 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1759 if (EQ (keyword, Qshort_name))
1761 CHECK_STRING (value);
1765 if (EQ (keyword, Qlong_name))
1767 CHECK_STRING (value);
1771 else if (EQ (keyword, Qdimension))
1774 dimension = XINT (value);
1775 if (dimension < 1 || dimension > 2)
1776 signal_simple_error ("Invalid value for 'dimension", value);
1779 else if (EQ (keyword, Qchars))
1782 chars = XINT (value);
1783 if (chars != 94 && chars != 96)
1784 signal_simple_error ("Invalid value for 'chars", value);
1787 else if (EQ (keyword, Qcolumns))
1790 columns = XINT (value);
1791 if (columns != 1 && columns != 2)
1792 signal_simple_error ("Invalid value for 'columns", value);
1795 else if (EQ (keyword, Qgraphic))
1798 graphic = XINT (value);
1800 if (graphic < 0 || graphic > 2)
1802 if (graphic < 0 || graphic > 1)
1804 signal_simple_error ("Invalid value for 'graphic", value);
1807 else if (EQ (keyword, Qregistry))
1809 CHECK_STRING (value);
1813 else if (EQ (keyword, Qdirection))
1815 if (EQ (value, Ql2r))
1816 direction = CHARSET_LEFT_TO_RIGHT;
1817 else if (EQ (value, Qr2l))
1818 direction = CHARSET_RIGHT_TO_LEFT;
1820 signal_simple_error ("Invalid value for 'direction", value);
1823 else if (EQ (keyword, Qfinal))
1825 CHECK_CHAR_COERCE_INT (value);
1826 final = XCHAR (value);
1827 if (final < '0' || final > '~')
1828 signal_simple_error ("Invalid value for 'final", value);
1831 else if (EQ (keyword, Qccl_program))
1833 CHECK_VECTOR (value);
1834 ccl_program = value;
1838 signal_simple_error ("Unrecognized property", keyword);
1842 error ("'final must be specified");
1843 if (dimension == 2 && final > 0x5F)
1845 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1849 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1851 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1853 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1854 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1856 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1858 id = get_unallocated_leading_byte (dimension);
1860 if (NILP (doc_string))
1861 doc_string = build_string ("");
1863 if (NILP (registry))
1864 registry = build_string ("");
1866 if (NILP (short_name))
1867 XSETSTRING (short_name, XSYMBOL (name)->name);
1869 if (NILP (long_name))
1870 long_name = doc_string;
1873 columns = dimension;
1875 if (byte_offset < 0)
1879 else if (chars == 96)
1885 charset = make_charset (id, name, type, columns, graphic,
1886 final, direction, short_name, long_name,
1887 doc_string, registry,
1888 Qnil, 0, 0, 0, byte_offset);
1889 if (!NILP (ccl_program))
1890 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1894 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1896 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1897 NEW-NAME is the name of the new charset. Return the new charset.
1899 (charset, new_name))
1901 Lisp_Object new_charset = Qnil;
1902 int id, dimension, columns, graphic, final;
1903 int direction, type;
1904 Lisp_Object registry, doc_string, short_name, long_name;
1905 struct Lisp_Charset *cs;
1907 charset = Fget_charset (charset);
1908 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1909 signal_simple_error ("Charset already has reverse-direction charset",
1912 CHECK_SYMBOL (new_name);
1913 if (!NILP (Ffind_charset (new_name)))
1914 signal_simple_error ("Cannot redefine existing charset", new_name);
1916 cs = XCHARSET (charset);
1918 type = CHARSET_TYPE (cs);
1919 columns = CHARSET_COLUMNS (cs);
1920 dimension = CHARSET_DIMENSION (cs);
1921 id = get_unallocated_leading_byte (dimension);
1923 graphic = CHARSET_GRAPHIC (cs);
1924 final = CHARSET_FINAL (cs);
1925 direction = CHARSET_RIGHT_TO_LEFT;
1926 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1927 direction = CHARSET_LEFT_TO_RIGHT;
1928 doc_string = CHARSET_DOC_STRING (cs);
1929 short_name = CHARSET_SHORT_NAME (cs);
1930 long_name = CHARSET_LONG_NAME (cs);
1931 registry = CHARSET_REGISTRY (cs);
1933 new_charset = make_charset (id, new_name, type, columns,
1934 graphic, final, direction, short_name, long_name,
1935 doc_string, registry,
1937 CHARSET_DECODING_TABLE(cs),
1938 CHARSET_UCS_MIN(cs),
1939 CHARSET_UCS_MAX(cs),
1940 CHARSET_CODE_OFFSET(cs),
1941 CHARSET_BYTE_OFFSET(cs)
1947 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1948 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1953 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1954 Define symbol ALIAS as an alias for CHARSET.
1958 CHECK_SYMBOL (alias);
1959 charset = Fget_charset (charset);
1960 return Fputhash (alias, charset, Vcharset_hash_table);
1963 /* #### Reverse direction charsets not yet implemented. */
1965 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1967 Return the reverse-direction charset parallel to CHARSET, if any.
1968 This is the charset with the same properties (in particular, the same
1969 dimension, number of characters per dimension, and final byte) as
1970 CHARSET but whose characters are displayed in the opposite direction.
1974 charset = Fget_charset (charset);
1975 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1979 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1980 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1981 If DIRECTION is omitted, both directions will be checked (left-to-right
1982 will be returned if character sets exist for both directions).
1984 (dimension, chars, final, direction))
1986 int dm, ch, fi, di = -1;
1988 Lisp_Object obj = Qnil;
1990 CHECK_INT (dimension);
1991 dm = XINT (dimension);
1992 if (dm < 1 || dm > 2)
1993 signal_simple_error ("Invalid value for DIMENSION", dimension);
1997 if (ch != 94 && ch != 96)
1998 signal_simple_error ("Invalid value for CHARS", chars);
2000 CHECK_CHAR_COERCE_INT (final);
2002 if (fi < '0' || fi > '~')
2003 signal_simple_error ("Invalid value for FINAL", final);
2005 if (EQ (direction, Ql2r))
2006 di = CHARSET_LEFT_TO_RIGHT;
2007 else if (EQ (direction, Qr2l))
2008 di = CHARSET_RIGHT_TO_LEFT;
2009 else if (!NILP (direction))
2010 signal_simple_error ("Invalid value for DIRECTION", direction);
2012 if (dm == 2 && fi > 0x5F)
2014 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2017 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2019 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2023 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2025 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2028 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2031 return XCHARSET_NAME (obj);
2035 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2036 Return short name of CHARSET.
2040 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2043 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2044 Return long name of CHARSET.
2048 return XCHARSET_LONG_NAME (Fget_charset (charset));
2051 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2052 Return description of CHARSET.
2056 return XCHARSET_DOC_STRING (Fget_charset (charset));
2059 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2060 Return dimension of CHARSET.
2064 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2067 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2068 Return property PROP of CHARSET.
2069 Recognized properties are those listed in `make-charset', as well as
2070 'name and 'doc-string.
2074 struct Lisp_Charset *cs;
2076 charset = Fget_charset (charset);
2077 cs = XCHARSET (charset);
2079 CHECK_SYMBOL (prop);
2080 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2081 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2082 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2083 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2084 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2085 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2086 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2087 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2088 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2089 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2090 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2091 if (EQ (prop, Qdirection))
2092 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2093 if (EQ (prop, Qreverse_direction_charset))
2095 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2099 return XCHARSET_NAME (obj);
2101 signal_simple_error ("Unrecognized charset property name", prop);
2102 return Qnil; /* not reached */
2105 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2106 Return charset identification number of CHARSET.
2110 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2113 /* #### We need to figure out which properties we really want to
2116 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2117 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2119 (charset, ccl_program))
2121 charset = Fget_charset (charset);
2122 CHECK_VECTOR (ccl_program);
2123 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2128 invalidate_charset_font_caches (Lisp_Object charset)
2130 /* Invalidate font cache entries for charset on all devices. */
2131 Lisp_Object devcons, concons, hash_table;
2132 DEVICE_LOOP_NO_BREAK (devcons, concons)
2134 struct device *d = XDEVICE (XCAR (devcons));
2135 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2136 if (!UNBOUNDP (hash_table))
2137 Fclrhash (hash_table);
2141 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2142 Set the 'registry property of CHARSET to REGISTRY.
2144 (charset, registry))
2146 charset = Fget_charset (charset);
2147 CHECK_STRING (registry);
2148 XCHARSET_REGISTRY (charset) = registry;
2149 invalidate_charset_font_caches (charset);
2150 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2155 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2156 Return mapping-table of CHARSET.
2160 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2163 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2164 Set mapping-table of CHARSET to TABLE.
2168 struct Lisp_Charset *cs;
2169 Lisp_Object old_table;
2172 charset = Fget_charset (charset);
2173 cs = XCHARSET (charset);
2175 if (EQ (table, Qnil))
2177 CHARSET_DECODING_TABLE(cs) = table;
2180 else if (VECTORP (table))
2184 /* ad-hoc method for `ascii' */
2185 if ((CHARSET_CHARS (cs) == 94) &&
2186 (CHARSET_BYTE_OFFSET (cs) != 33))
2187 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2189 ccs_len = CHARSET_CHARS (cs);
2191 if (XVECTOR_LENGTH (table) > ccs_len)
2192 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2193 old_table = CHARSET_DECODING_TABLE(cs);
2194 CHARSET_DECODING_TABLE(cs) = table;
2197 signal_error (Qwrong_type_argument,
2198 list2 (build_translated_string ("vector-or-nil-p"),
2200 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2202 switch (CHARSET_DIMENSION (cs))
2205 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2207 Lisp_Object c = XVECTOR_DATA(table)[i];
2212 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2216 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2218 Lisp_Object v = XVECTOR_DATA(table)[i];
2224 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2226 CHARSET_DECODING_TABLE(cs) = old_table;
2227 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2229 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2231 Lisp_Object c = XVECTOR_DATA(v)[j];
2234 put_char_attribute (c, charset,
2237 (i + CHARSET_BYTE_OFFSET (cs)),
2239 (j + CHARSET_BYTE_OFFSET (cs))));
2243 put_char_attribute (v, charset,
2245 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2254 /************************************************************************/
2255 /* Lisp primitives for working with characters */
2256 /************************************************************************/
2258 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2259 Make a character from CHARSET and octets ARG1 and ARG2.
2260 ARG2 is required only for characters from two-dimensional charsets.
2261 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2262 character s with caron.
2264 (charset, arg1, arg2))
2266 struct Lisp_Charset *cs;
2268 int lowlim, highlim;
2270 charset = Fget_charset (charset);
2271 cs = XCHARSET (charset);
2273 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2274 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2276 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2278 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2279 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2282 /* It is useful (and safe, according to Olivier Galibert) to strip
2283 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2284 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2285 Latin 2 code of the character. */
2293 if (a1 < lowlim || a1 > highlim)
2294 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2296 if (CHARSET_DIMENSION (cs) == 1)
2300 ("Charset is of dimension one; second octet must be nil", arg2);
2301 return make_char (MAKE_CHAR (charset, a1, 0));
2310 a2 = XINT (arg2) & 0x7f;
2312 if (a2 < lowlim || a2 > highlim)
2313 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2315 return make_char (MAKE_CHAR (charset, a1, a2));
2318 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2319 Return the character set of char CH.
2323 CHECK_CHAR_COERCE_INT (ch);
2325 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2328 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2329 Return list of charset and one or two position-codes of CHAR.
2333 /* This function can GC */
2334 struct gcpro gcpro1, gcpro2;
2335 Lisp_Object charset = Qnil;
2336 Lisp_Object rc = Qnil;
2339 GCPRO2 (charset, rc);
2340 CHECK_CHAR_COERCE_INT (character);
2342 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2344 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2346 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2350 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2358 #ifdef ENABLE_COMPOSITE_CHARS
2359 /************************************************************************/
2360 /* composite character functions */
2361 /************************************************************************/
2364 lookup_composite_char (Bufbyte *str, int len)
2366 Lisp_Object lispstr = make_string (str, len);
2367 Lisp_Object ch = Fgethash (lispstr,
2368 Vcomposite_char_string2char_hash_table,
2374 if (composite_char_row_next >= 128)
2375 signal_simple_error ("No more composite chars available", lispstr);
2376 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2377 composite_char_col_next);
2378 Fputhash (make_char (emch), lispstr,
2379 Vcomposite_char_char2string_hash_table);
2380 Fputhash (lispstr, make_char (emch),
2381 Vcomposite_char_string2char_hash_table);
2382 composite_char_col_next++;
2383 if (composite_char_col_next >= 128)
2385 composite_char_col_next = 32;
2386 composite_char_row_next++;
2395 composite_char_string (Emchar ch)
2397 Lisp_Object str = Fgethash (make_char (ch),
2398 Vcomposite_char_char2string_hash_table,
2400 assert (!UNBOUNDP (str));
2404 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2405 Convert a string into a single composite character.
2406 The character is the result of overstriking all the characters in
2411 CHECK_STRING (string);
2412 return make_char (lookup_composite_char (XSTRING_DATA (string),
2413 XSTRING_LENGTH (string)));
2416 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2417 Return a string of the characters comprising a composite character.
2425 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2426 signal_simple_error ("Must be composite char", ch);
2427 return composite_char_string (emch);
2429 #endif /* ENABLE_COMPOSITE_CHARS */
2432 /************************************************************************/
2433 /* initialization */
2434 /************************************************************************/
2437 syms_of_mule_charset (void)
2439 DEFSUBR (Fcharsetp);
2440 DEFSUBR (Ffind_charset);
2441 DEFSUBR (Fget_charset);
2442 DEFSUBR (Fcharset_list);
2443 DEFSUBR (Fcharset_name);
2444 DEFSUBR (Fmake_charset);
2445 DEFSUBR (Fmake_reverse_direction_charset);
2446 /* DEFSUBR (Freverse_direction_charset); */
2447 DEFSUBR (Fdefine_charset_alias);
2448 DEFSUBR (Fcharset_from_attributes);
2449 DEFSUBR (Fcharset_short_name);
2450 DEFSUBR (Fcharset_long_name);
2451 DEFSUBR (Fcharset_description);
2452 DEFSUBR (Fcharset_dimension);
2453 DEFSUBR (Fcharset_property);
2454 DEFSUBR (Fcharset_id);
2455 DEFSUBR (Fset_charset_ccl_program);
2456 DEFSUBR (Fset_charset_registry);
2458 DEFSUBR (Fchar_attribute_alist);
2459 DEFSUBR (Fget_char_attribute);
2460 DEFSUBR (Fput_char_attribute);
2461 DEFSUBR (Fdefine_char);
2462 DEFSUBR (Fchar_variants);
2463 DEFSUBR (Fget_composite_char);
2464 DEFSUBR (Fcharset_mapping_table);
2465 DEFSUBR (Fset_charset_mapping_table);
2468 DEFSUBR (Fmake_char);
2469 DEFSUBR (Fchar_charset);
2470 DEFSUBR (Fsplit_char);
2472 #ifdef ENABLE_COMPOSITE_CHARS
2473 DEFSUBR (Fmake_composite_char);
2474 DEFSUBR (Fcomposite_char_string);
2477 defsymbol (&Qcharsetp, "charsetp");
2478 defsymbol (&Qregistry, "registry");
2479 defsymbol (&Qfinal, "final");
2480 defsymbol (&Qgraphic, "graphic");
2481 defsymbol (&Qdirection, "direction");
2482 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2483 defsymbol (&Qshort_name, "short-name");
2484 defsymbol (&Qlong_name, "long-name");
2486 defsymbol (&Ql2r, "l2r");
2487 defsymbol (&Qr2l, "r2l");
2489 /* Charsets, compatible with FSF 20.3
2490 Naming convention is Script-Charset[-Edition] */
2491 defsymbol (&Qascii, "ascii");
2492 defsymbol (&Qcontrol_1, "control-1");
2493 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2494 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2495 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2496 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2497 defsymbol (&Qthai_tis620, "thai-tis620");
2498 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2499 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2500 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2501 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2502 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2503 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2504 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2505 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2506 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2507 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2508 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2509 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2510 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2511 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2513 defsymbol (&Q_ucs, "->ucs");
2514 defsymbol (&Q_decomposition, "->decomposition");
2515 defsymbol (&Qcompat, "compat");
2516 defsymbol (&QnoBreak, "noBreak");
2517 defsymbol (&Qfraction, "fraction");
2518 defsymbol (&Qsuper, "super");
2519 defsymbol (&Qsub, "sub");
2520 defsymbol (&Qcircle, "circle");
2521 defsymbol (&Qwide, "wide");
2522 defsymbol (&Qnarrow, "narrow");
2523 defsymbol (&Qfont, "font");
2524 defsymbol (&Qucs, "ucs");
2525 defsymbol (&Qucs_bmp, "ucs-bmp");
2526 defsymbol (&Qlatin_viscii, "latin-viscii");
2527 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2528 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2529 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2530 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2531 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2532 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2533 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2535 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2536 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2538 defsymbol (&Qcomposite, "composite");
2542 vars_of_mule_charset (void)
2549 /* Table of charsets indexed by leading byte. */
2550 for (i = 0; i < countof (charset_by_leading_byte); i++)
2551 charset_by_leading_byte[i] = Qnil;
2554 /* Table of charsets indexed by type/final-byte. */
2555 for (i = 0; i < countof (charset_by_attributes); i++)
2556 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2557 charset_by_attributes[i][j] = Qnil;
2559 /* Table of charsets indexed by type/final-byte/direction. */
2560 for (i = 0; i < countof (charset_by_attributes); i++)
2561 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2562 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2563 charset_by_attributes[i][j][k] = Qnil;
2567 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2569 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2570 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2574 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2575 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2576 Leading-code of private TYPE9N charset of column-width 1.
2578 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2582 Vutf_2000_version = build_string("0.12 (Kashiwara)");
2583 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2584 Version number of UTF-2000.
2587 staticpro (&Vcharacter_attribute_table);
2588 Vcharacter_attribute_table = make_char_code_table (Qnil);
2590 staticpro (&Vcharacter_composition_table);
2591 Vcharacter_composition_table = make_char_code_table (Qnil);
2593 staticpro (&Vcharacter_variant_table);
2594 Vcharacter_variant_table = make_char_code_table (Qnil);
2596 Vdefault_coded_charset_priority_list = Qnil;
2597 DEFVAR_LISP ("default-coded-charset-priority-list",
2598 &Vdefault_coded_charset_priority_list /*
2599 Default order of preferred coded-character-sets.
2605 complex_vars_of_mule_charset (void)
2607 staticpro (&Vcharset_hash_table);
2608 Vcharset_hash_table =
2609 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2611 /* Predefined character sets. We store them into variables for
2616 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2617 CHARSET_TYPE_256X256, 1, 2, 0,
2618 CHARSET_LEFT_TO_RIGHT,
2619 build_string ("BMP"),
2620 build_string ("BMP"),
2621 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2622 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2623 Qnil, 0, 0xFFFF, 0, 0);
2625 # define MIN_CHAR_THAI 0
2626 # define MAX_CHAR_THAI 0
2627 # define MIN_CHAR_GREEK 0
2628 # define MAX_CHAR_GREEK 0
2629 # define MIN_CHAR_HEBREW 0
2630 # define MAX_CHAR_HEBREW 0
2631 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2632 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2633 # define MIN_CHAR_CYRILLIC 0
2634 # define MAX_CHAR_CYRILLIC 0
2637 make_charset (LEADING_BYTE_ASCII, Qascii,
2638 CHARSET_TYPE_94, 1, 0, 'B',
2639 CHARSET_LEFT_TO_RIGHT,
2640 build_string ("ASCII"),
2641 build_string ("ASCII)"),
2642 build_string ("ASCII (ISO646 IRV)"),
2643 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2644 Qnil, 0, 0x7F, 0, 0);
2645 Vcharset_control_1 =
2646 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2647 CHARSET_TYPE_94, 1, 1, 0,
2648 CHARSET_LEFT_TO_RIGHT,
2649 build_string ("C1"),
2650 build_string ("Control characters"),
2651 build_string ("Control characters 128-191"),
2653 Qnil, 0x80, 0x9F, 0, 0);
2654 Vcharset_latin_iso8859_1 =
2655 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2656 CHARSET_TYPE_96, 1, 1, 'A',
2657 CHARSET_LEFT_TO_RIGHT,
2658 build_string ("Latin-1"),
2659 build_string ("ISO8859-1 (Latin-1)"),
2660 build_string ("ISO8859-1 (Latin-1)"),
2661 build_string ("iso8859-1"),
2662 Qnil, 0xA0, 0xFF, 0, 32);
2663 Vcharset_latin_iso8859_2 =
2664 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2665 CHARSET_TYPE_96, 1, 1, 'B',
2666 CHARSET_LEFT_TO_RIGHT,
2667 build_string ("Latin-2"),
2668 build_string ("ISO8859-2 (Latin-2)"),
2669 build_string ("ISO8859-2 (Latin-2)"),
2670 build_string ("iso8859-2"),
2672 Vcharset_latin_iso8859_3 =
2673 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2674 CHARSET_TYPE_96, 1, 1, 'C',
2675 CHARSET_LEFT_TO_RIGHT,
2676 build_string ("Latin-3"),
2677 build_string ("ISO8859-3 (Latin-3)"),
2678 build_string ("ISO8859-3 (Latin-3)"),
2679 build_string ("iso8859-3"),
2681 Vcharset_latin_iso8859_4 =
2682 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2683 CHARSET_TYPE_96, 1, 1, 'D',
2684 CHARSET_LEFT_TO_RIGHT,
2685 build_string ("Latin-4"),
2686 build_string ("ISO8859-4 (Latin-4)"),
2687 build_string ("ISO8859-4 (Latin-4)"),
2688 build_string ("iso8859-4"),
2690 Vcharset_thai_tis620 =
2691 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2692 CHARSET_TYPE_96, 1, 1, 'T',
2693 CHARSET_LEFT_TO_RIGHT,
2694 build_string ("TIS620"),
2695 build_string ("TIS620 (Thai)"),
2696 build_string ("TIS620.2529 (Thai)"),
2697 build_string ("tis620"),
2698 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2699 Vcharset_greek_iso8859_7 =
2700 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2701 CHARSET_TYPE_96, 1, 1, 'F',
2702 CHARSET_LEFT_TO_RIGHT,
2703 build_string ("ISO8859-7"),
2704 build_string ("ISO8859-7 (Greek)"),
2705 build_string ("ISO8859-7 (Greek)"),
2706 build_string ("iso8859-7"),
2707 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2708 Vcharset_arabic_iso8859_6 =
2709 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2710 CHARSET_TYPE_96, 1, 1, 'G',
2711 CHARSET_RIGHT_TO_LEFT,
2712 build_string ("ISO8859-6"),
2713 build_string ("ISO8859-6 (Arabic)"),
2714 build_string ("ISO8859-6 (Arabic)"),
2715 build_string ("iso8859-6"),
2717 Vcharset_hebrew_iso8859_8 =
2718 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2719 CHARSET_TYPE_96, 1, 1, 'H',
2720 CHARSET_RIGHT_TO_LEFT,
2721 build_string ("ISO8859-8"),
2722 build_string ("ISO8859-8 (Hebrew)"),
2723 build_string ("ISO8859-8 (Hebrew)"),
2724 build_string ("iso8859-8"),
2725 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2726 Vcharset_katakana_jisx0201 =
2727 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2728 CHARSET_TYPE_94, 1, 1, 'I',
2729 CHARSET_LEFT_TO_RIGHT,
2730 build_string ("JISX0201 Kana"),
2731 build_string ("JISX0201.1976 (Japanese Kana)"),
2732 build_string ("JISX0201.1976 Japanese Kana"),
2733 build_string ("jisx0201\\.1976"),
2735 MIN_CHAR_HALFWIDTH_KATAKANA,
2736 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2737 Vcharset_latin_jisx0201 =
2738 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2739 CHARSET_TYPE_94, 1, 0, 'J',
2740 CHARSET_LEFT_TO_RIGHT,
2741 build_string ("JISX0201 Roman"),
2742 build_string ("JISX0201.1976 (Japanese Roman)"),
2743 build_string ("JISX0201.1976 Japanese Roman"),
2744 build_string ("jisx0201\\.1976"),
2746 Vcharset_cyrillic_iso8859_5 =
2747 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2748 CHARSET_TYPE_96, 1, 1, 'L',
2749 CHARSET_LEFT_TO_RIGHT,
2750 build_string ("ISO8859-5"),
2751 build_string ("ISO8859-5 (Cyrillic)"),
2752 build_string ("ISO8859-5 (Cyrillic)"),
2753 build_string ("iso8859-5"),
2754 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2755 Vcharset_latin_iso8859_9 =
2756 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2757 CHARSET_TYPE_96, 1, 1, 'M',
2758 CHARSET_LEFT_TO_RIGHT,
2759 build_string ("Latin-5"),
2760 build_string ("ISO8859-9 (Latin-5)"),
2761 build_string ("ISO8859-9 (Latin-5)"),
2762 build_string ("iso8859-9"),
2764 Vcharset_japanese_jisx0208_1978 =
2765 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2766 CHARSET_TYPE_94X94, 2, 0, '@',
2767 CHARSET_LEFT_TO_RIGHT,
2768 build_string ("JIS X0208:1978"),
2769 build_string ("JIS X0208:1978 (Japanese)"),
2771 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2772 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2774 Vcharset_chinese_gb2312 =
2775 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2776 CHARSET_TYPE_94X94, 2, 0, 'A',
2777 CHARSET_LEFT_TO_RIGHT,
2778 build_string ("GB2312"),
2779 build_string ("GB2312)"),
2780 build_string ("GB2312 Chinese simplified"),
2781 build_string ("gb2312"),
2783 Vcharset_japanese_jisx0208 =
2784 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2785 CHARSET_TYPE_94X94, 2, 0, 'B',
2786 CHARSET_LEFT_TO_RIGHT,
2787 build_string ("JISX0208"),
2788 build_string ("JIS X0208:1983 (Japanese)"),
2789 build_string ("JIS X0208:1983 Japanese Kanji"),
2790 build_string ("jisx0208\\.1983"),
2792 Vcharset_korean_ksc5601 =
2793 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2794 CHARSET_TYPE_94X94, 2, 0, 'C',
2795 CHARSET_LEFT_TO_RIGHT,
2796 build_string ("KSC5601"),
2797 build_string ("KSC5601 (Korean"),
2798 build_string ("KSC5601 Korean Hangul and Hanja"),
2799 build_string ("ksc5601"),
2801 Vcharset_japanese_jisx0212 =
2802 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2803 CHARSET_TYPE_94X94, 2, 0, 'D',
2804 CHARSET_LEFT_TO_RIGHT,
2805 build_string ("JISX0212"),
2806 build_string ("JISX0212 (Japanese)"),
2807 build_string ("JISX0212 Japanese Supplement"),
2808 build_string ("jisx0212"),
2811 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2812 Vcharset_chinese_cns11643_1 =
2813 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2814 CHARSET_TYPE_94X94, 2, 0, 'G',
2815 CHARSET_LEFT_TO_RIGHT,
2816 build_string ("CNS11643-1"),
2817 build_string ("CNS11643-1 (Chinese traditional)"),
2819 ("CNS 11643 Plane 1 Chinese traditional"),
2820 build_string (CHINESE_CNS_PLANE_RE("1")),
2822 Vcharset_chinese_cns11643_2 =
2823 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2824 CHARSET_TYPE_94X94, 2, 0, 'H',
2825 CHARSET_LEFT_TO_RIGHT,
2826 build_string ("CNS11643-2"),
2827 build_string ("CNS11643-2 (Chinese traditional)"),
2829 ("CNS 11643 Plane 2 Chinese traditional"),
2830 build_string (CHINESE_CNS_PLANE_RE("2")),
2833 Vcharset_latin_viscii_lower =
2834 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2835 CHARSET_TYPE_96, 1, 1, '1',
2836 CHARSET_LEFT_TO_RIGHT,
2837 build_string ("VISCII lower"),
2838 build_string ("VISCII lower (Vietnamese)"),
2839 build_string ("VISCII lower (Vietnamese)"),
2840 build_string ("MULEVISCII-LOWER"),
2842 Vcharset_latin_viscii_upper =
2843 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2844 CHARSET_TYPE_96, 1, 1, '2',
2845 CHARSET_LEFT_TO_RIGHT,
2846 build_string ("VISCII upper"),
2847 build_string ("VISCII upper (Vietnamese)"),
2848 build_string ("VISCII upper (Vietnamese)"),
2849 build_string ("MULEVISCII-UPPER"),
2851 Vcharset_latin_viscii =
2852 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2853 CHARSET_TYPE_256, 1, 2, 0,
2854 CHARSET_LEFT_TO_RIGHT,
2855 build_string ("VISCII"),
2856 build_string ("VISCII 1.1 (Vietnamese)"),
2857 build_string ("VISCII 1.1 (Vietnamese)"),
2858 build_string ("VISCII1\\.1"),
2860 Vcharset_ethiopic_ucs =
2861 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs,
2862 CHARSET_TYPE_256X256, 2, 2, 0,
2863 CHARSET_LEFT_TO_RIGHT,
2864 build_string ("Ethiopic (UCS)"),
2865 build_string ("Ethiopic (UCS)"),
2866 build_string ("Ethiopic of UCS"),
2867 build_string ("Ethiopic-Unicode"),
2868 Qnil, 0x1200, 0x137F, 0x1200, 0);
2869 Vcharset_hiragana_jisx0208 =
2870 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2871 CHARSET_TYPE_94X94, 2, 0, 'B',
2872 CHARSET_LEFT_TO_RIGHT,
2873 build_string ("Hiragana"),
2874 build_string ("Hiragana of JIS X0208"),
2875 build_string ("Japanese Hiragana of JIS X0208"),
2876 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2877 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2878 (0x24 - 33) * 94 + (0x21 - 33), 33);
2879 Vcharset_katakana_jisx0208 =
2880 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2881 CHARSET_TYPE_94X94, 2, 0, 'B',
2882 CHARSET_LEFT_TO_RIGHT,
2883 build_string ("Katakana"),
2884 build_string ("Katakana of JIS X0208"),
2885 build_string ("Japanese Katakana of JIS X0208"),
2886 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2887 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2888 (0x25 - 33) * 94 + (0x21 - 33), 33);
2890 Vcharset_chinese_big5_1 =
2891 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2892 CHARSET_TYPE_94X94, 2, 0, '0',
2893 CHARSET_LEFT_TO_RIGHT,
2894 build_string ("Big5"),
2895 build_string ("Big5 (Level-1)"),
2897 ("Big5 Level-1 Chinese traditional"),
2898 build_string ("big5"),
2900 Vcharset_chinese_big5_2 =
2901 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2902 CHARSET_TYPE_94X94, 2, 0, '1',
2903 CHARSET_LEFT_TO_RIGHT,
2904 build_string ("Big5"),
2905 build_string ("Big5 (Level-2)"),
2907 ("Big5 Level-2 Chinese traditional"),
2908 build_string ("big5"),
2911 #ifdef ENABLE_COMPOSITE_CHARS
2912 /* #### For simplicity, we put composite chars into a 96x96 charset.
2913 This is going to lead to problems because you can run out of
2914 room, esp. as we don't yet recycle numbers. */
2915 Vcharset_composite =
2916 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2917 CHARSET_TYPE_96X96, 2, 0, 0,
2918 CHARSET_LEFT_TO_RIGHT,
2919 build_string ("Composite"),
2920 build_string ("Composite characters"),
2921 build_string ("Composite characters"),
2924 composite_char_row_next = 32;
2925 composite_char_col_next = 32;
2927 Vcomposite_char_string2char_hash_table =
2928 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2929 Vcomposite_char_char2string_hash_table =
2930 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2931 staticpro (&Vcomposite_char_string2char_hash_table);
2932 staticpro (&Vcomposite_char_char2string_hash_table);
2933 #endif /* ENABLE_COMPOSITE_CHARS */