1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_ethiopic_ucs;
66 Lisp_Object Vcharset_hiragana_jisx0208;
67 Lisp_Object Vcharset_katakana_jisx0208;
69 Lisp_Object Vcharset_chinese_big5_1;
70 Lisp_Object Vcharset_chinese_big5_2;
72 #ifdef ENABLE_COMPOSITE_CHARS
73 Lisp_Object Vcharset_composite;
75 /* Hash tables for composite chars. One maps string representing
76 composed chars to their equivalent chars; one goes the
78 Lisp_Object Vcomposite_char_char2string_hash_table;
79 Lisp_Object Vcomposite_char_string2char_hash_table;
81 static int composite_char_row_next;
82 static int composite_char_col_next;
84 #endif /* ENABLE_COMPOSITE_CHARS */
86 /* Table of charsets indexed by leading byte. */
87 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
89 /* Table of charsets indexed by type/final-byte/direction. */
91 Lisp_Object charset_by_attributes[4][128];
93 Lisp_Object charset_by_attributes[4][128][2];
97 /* Table of number of bytes in the string representation of a character
98 indexed by the first byte of that representation.
100 rep_bytes_by_first_byte(c) is more efficient than the equivalent
101 canonical computation:
103 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
105 Bytecount rep_bytes_by_first_byte[0xA0] =
106 { /* 0x00 - 0x7f are for straight ASCII */
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
115 /* 0x80 - 0x8f are for Dimension-1 official charsets */
117 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
119 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121 /* 0x90 - 0x9d are for Dimension-2 official charsets */
122 /* 0x9e is for Dimension-1 private charsets */
123 /* 0x9f is for Dimension-2 private charsets */
124 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
131 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
133 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
136 for (i = 0; i < 256; i++)
138 markobj (cte->property[i]);
144 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
146 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
147 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
150 for (i = 0; i < 256; i++)
151 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
153 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
155 if (!char_byte_table_equal (cte1->property[i],
156 cte2->property[i], depth + 1))
163 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
169 char_byte_table_hash (Lisp_Object obj, int depth)
171 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
173 return internal_array_hash (cte->property, 256, depth);
176 static const struct lrecord_description char_byte_table_description[] = {
177 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
181 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
182 mark_char_byte_table,
183 internal_object_printer,
184 0, char_byte_table_equal,
185 char_byte_table_hash,
186 char_byte_table_description,
187 struct Lisp_Char_Byte_Table);
190 make_char_byte_table (Lisp_Object initval)
194 struct Lisp_Char_Byte_Table *cte =
195 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
196 &lrecord_char_byte_table);
198 for (i = 0; i < 256; i++)
199 cte->property[i] = initval;
201 XSETCHAR_BYTE_TABLE (obj, cte);
206 copy_char_byte_table (Lisp_Object entry)
208 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
211 struct Lisp_Char_Byte_Table *ctenew =
212 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
213 &lrecord_char_byte_table);
215 for (i = 0; i < 256; i++)
217 Lisp_Object new = cte->property[i];
218 if (CHAR_BYTE_TABLE_P (new))
219 ctenew->property[i] = copy_char_byte_table (new);
221 ctenew->property[i] = new;
224 XSETCHAR_BYTE_TABLE (obj, ctenew);
230 mark_char_code_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
232 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
238 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
240 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
241 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
243 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
247 char_code_table_hash (Lisp_Object obj, int depth)
249 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
251 return char_code_table_hash (cte->table, depth + 1);
254 static const struct lrecord_description char_code_table_description[] = {
255 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
259 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
260 mark_char_code_table,
261 internal_object_printer,
262 0, char_code_table_equal,
263 char_code_table_hash,
264 char_code_table_description,
265 struct Lisp_Char_Code_Table);
268 make_char_code_table (Lisp_Object initval)
271 struct Lisp_Char_Code_Table *cte =
272 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
273 &lrecord_char_code_table);
275 cte->table = make_char_byte_table (initval);
277 XSETCHAR_CODE_TABLE (obj, cte);
282 copy_char_code_table (Lisp_Object entry)
284 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
286 struct Lisp_Char_Code_Table *ctenew =
287 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
288 &lrecord_char_code_table);
290 ctenew->table = copy_char_byte_table (cte->table);
291 XSETCHAR_CODE_TABLE (obj, ctenew);
297 get_char_code_table (Emchar ch, Lisp_Object table)
299 unsigned int code = ch;
300 struct Lisp_Char_Byte_Table* cpt
301 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
302 Lisp_Object ret = cpt->property [(unsigned char)(code >> 24)];
304 if (CHAR_BYTE_TABLE_P (ret))
305 cpt = XCHAR_BYTE_TABLE (ret);
309 ret = cpt->property [(unsigned char) (code >> 16)];
310 if (CHAR_BYTE_TABLE_P (ret))
311 cpt = XCHAR_BYTE_TABLE (ret);
315 ret = cpt->property [(unsigned char) (code >> 8)];
316 if (CHAR_BYTE_TABLE_P (ret))
317 cpt = XCHAR_BYTE_TABLE (ret);
321 return cpt->property [(unsigned char) code];
325 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
327 unsigned int code = ch;
328 struct Lisp_Char_Byte_Table* cpt1
329 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
330 Lisp_Object ret = cpt1->property[(unsigned char)(code >> 24)];
332 if (CHAR_BYTE_TABLE_P (ret))
334 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
336 ret = cpt2->property[(unsigned char)(code >> 16)];
337 if (CHAR_BYTE_TABLE_P (ret))
339 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
341 ret = cpt3->property[(unsigned char)(code >> 8)];
342 if (CHAR_BYTE_TABLE_P (ret))
344 struct Lisp_Char_Byte_Table* cpt4
345 = XCHAR_BYTE_TABLE (ret);
347 cpt4->property[(unsigned char)code] = value;
349 else if (!EQ (ret, value))
351 Lisp_Object cpt4 = make_char_byte_table (ret);
353 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
354 cpt3->property[(unsigned char)(code >> 8)] = cpt4;
357 else if (!EQ (ret, value))
359 Lisp_Object cpt3 = make_char_byte_table (ret);
360 Lisp_Object cpt4 = make_char_byte_table (ret);
362 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
363 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)]
365 cpt2->property[(unsigned char)(code >> 16)] = cpt3;
368 else if (!EQ (ret, value))
370 Lisp_Object cpt2 = make_char_byte_table (ret);
371 Lisp_Object cpt3 = make_char_byte_table (ret);
372 Lisp_Object cpt4 = make_char_byte_table (ret);
374 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)code] = value;
375 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(code >> 8)] = cpt4;
376 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(code >> 16)] = cpt3;
377 cpt1->property[(unsigned char)(code >> 24)] = cpt2;
382 Lisp_Object Vcharacter_attribute_table;
383 Lisp_Object Vcharacter_composition_table;
384 Lisp_Object Vcharacter_variant_table;
386 Lisp_Object Q_decomposition;
389 Lisp_Object QnoBreak;
392 Lisp_Object Qfraction;
398 to_char_code (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
404 else if (EQ (v, Qcompat))
406 else if (EQ (v, QnoBreak))
408 else if (EQ (v, Qsuper))
410 else if (EQ (v, Qsub))
412 else if (EQ (v, Qfraction))
414 else if (EQ (v, Qwide))
416 else if (EQ (v, Qnarrow))
418 else if (EQ (v, Qfont))
421 signal_simple_error (err_msg, err_arg);
424 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
425 Return character corresponding with list.
429 Lisp_Object table = Vcharacter_composition_table;
430 Lisp_Object rest = list;
434 Lisp_Object v = Fcar (rest);
436 Emchar c = to_char_code (v, "Invalid value for composition", list);
438 ret = get_char_code_table (c, table);
443 if (!CHAR_CODE_TABLE_P (ret))
448 else if (!CONSP (rest))
450 else if (CHAR_CODE_TABLE_P (ret))
453 signal_simple_error ("Invalid table is found with", list);
455 signal_simple_error ("Invalid value for composition", list);
458 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
459 Return variants of CHARACTER.
463 CHECK_CHAR (character);
464 return Fcopy_list (get_char_code_table (XCHAR (character),
465 Vcharacter_variant_table));
468 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
469 Return the alist of attributes of CHARACTER.
473 CHECK_CHAR (character);
474 return Fcopy_alist (get_char_code_table (XCHAR (character),
475 Vcharacter_attribute_table));
478 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
479 Return the value of CHARACTER's ATTRIBUTE.
481 (character, attribute))
484 = get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
490 if (!NILP (ccs = Ffind_charset (attribute)))
493 return Fcdr (Fassq (attribute, ret));
497 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
500 Emchar char_code = XCHAR (character);
502 = get_char_code_table (char_code, Vcharacter_attribute_table);
505 cell = Fassq (attribute, ret);
509 ret = Fcons (Fcons (attribute, value), ret);
511 else if (!EQ (Fcdr (cell), value))
513 Fsetcdr (cell, value);
515 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
519 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
520 Store CHARACTER's ATTRIBUTE with VALUE.
522 (character, attribute, value))
526 ccs = Ffind_charset (attribute);
530 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
535 /* ad-hoc method for `ascii' */
536 if ((XCHARSET_CHARS (ccs) == 94) &&
537 (XCHARSET_BYTE_OFFSET (ccs) != 33))
538 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
540 ccs_len = XCHARSET_CHARS (ccs);
543 signal_simple_error ("Invalid value for coded-charset",
547 rest = Fget_char_attribute (character, attribute);
554 Lisp_Object ei = Fcar (rest);
556 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
557 nv = XVECTOR_DATA(v)[i];
564 XVECTOR_DATA(v)[i] = Qnil;
565 v = XCHARSET_DECODING_TABLE (ccs);
570 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
573 if (XCHARSET_GRAPHIC (ccs) == 1)
574 value = Fcopy_list (value);
579 Lisp_Object ei = Fcar (rest);
582 signal_simple_error ("Invalid value for coded-charset", value);
584 if ((i < 0) || (255 < i))
585 signal_simple_error ("Invalid value for coded-charset", value);
586 if (XCHARSET_GRAPHIC (ccs) == 1)
589 Fsetcar (rest, make_int (i));
591 i -= XCHARSET_BYTE_OFFSET (ccs);
592 nv = XVECTOR_DATA(v)[i];
598 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
605 XVECTOR_DATA(v)[i] = character;
607 else if (EQ (attribute, Q_decomposition))
609 Lisp_Object rest = value;
610 Lisp_Object table = Vcharacter_composition_table;
613 signal_simple_error ("Invalid value for ->decomposition",
618 Lisp_Object v = Fcar (rest);
621 = to_char_code (v, "Invalid value for ->decomposition", value);
626 put_char_code_table (c, character, table);
631 ntable = get_char_code_table (c, table);
632 if (!CHAR_CODE_TABLE_P (ntable))
634 ntable = make_char_code_table (Qnil);
635 put_char_code_table (c, ntable, table);
641 else if (EQ (attribute, Q_ucs))
647 signal_simple_error ("Invalid value for ->ucs", value);
651 ret = get_char_code_table (c, Vcharacter_variant_table);
652 if (NILP (Fmemq (character, ret)))
654 put_char_code_table (c, Fcons (character, ret),
655 Vcharacter_variant_table);
658 return put_char_attribute (character, attribute, value);
663 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
664 Store character's ATTRIBUTES.
668 Lisp_Object rest = attributes;
669 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
670 Lisp_Object character;
676 Lisp_Object cell = Fcar (rest);
680 signal_simple_error ("Invalid argument", attributes);
681 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
682 && XCHARSET_FINAL (ccs))
686 if (XCHARSET_DIMENSION (ccs) == 1)
688 Lisp_Object eb1 = Fcar (Fcdr (cell));
692 signal_simple_error ("Invalid argument", attributes);
694 switch (XCHARSET_CHARS (ccs))
698 + (XCHARSET_FINAL (ccs) - '0') * 94 + (b1 - 33);
702 + (XCHARSET_FINAL (ccs) - '0') * 96 + (b1 - 32);
708 else if (XCHARSET_DIMENSION (ccs) == 2)
710 Lisp_Object eb1 = Fcar (Fcdr (cell));
711 Lisp_Object eb2 = Fcar (Fcdr (Fcdr (cell)));
715 signal_simple_error ("Invalid argument", attributes);
718 signal_simple_error ("Invalid argument", attributes);
720 switch (XCHARSET_CHARS (ccs))
723 code = MIN_CHAR_94x94
724 + (XCHARSET_FINAL (ccs) - '0') * 94 * 94
725 + (b1 - 33) * 94 + (b2 - 33);
728 code = MIN_CHAR_96x96
729 + (XCHARSET_FINAL (ccs) - '0') * 96 * 96
730 + (b1 - 32) * 96 + (b2 - 32);
741 character = make_char (code);
742 goto setup_attributes;
748 else if (!INTP (code))
749 signal_simple_error ("Invalid argument", attributes);
751 character = make_char (XINT (code));
757 Lisp_Object cell = Fcar (rest);
760 signal_simple_error ("Invalid argument", attributes);
761 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
765 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
768 Lisp_Object Vutf_2000_version;
772 int leading_code_private_11;
775 Lisp_Object Qcharsetp;
777 /* Qdoc_string, Qdimension, Qchars defined in general.c */
778 Lisp_Object Qregistry, Qfinal, Qgraphic;
779 Lisp_Object Qdirection;
780 Lisp_Object Qreverse_direction_charset;
781 Lisp_Object Qleading_byte;
782 Lisp_Object Qshort_name, Qlong_name;
798 Qjapanese_jisx0208_1978,
810 Qvietnamese_viscii_lower,
811 Qvietnamese_viscii_upper,
820 Lisp_Object Ql2r, Qr2l;
822 Lisp_Object Vcharset_hash_table;
825 static Charset_ID next_allocated_leading_byte;
827 static Charset_ID next_allocated_1_byte_leading_byte;
828 static Charset_ID next_allocated_2_byte_leading_byte;
831 /* Composite characters are characters constructed by overstriking two
832 or more regular characters.
834 1) The old Mule implementation involves storing composite characters
835 in a buffer as a tag followed by all of the actual characters
836 used to make up the composite character. I think this is a bad
837 idea; it greatly complicates code that wants to handle strings
838 one character at a time because it has to deal with the possibility
839 of great big ungainly characters. It's much more reasonable to
840 simply store an index into a table of composite characters.
842 2) The current implementation only allows for 16,384 separate
843 composite characters over the lifetime of the XEmacs process.
844 This could become a potential problem if the user
845 edited lots of different files that use composite characters.
846 Due to FSF bogosity, increasing the number of allowable
847 composite characters under Mule would decrease the number
848 of possible faces that can exist. Mule already has shrunk
849 this to 2048, and further shrinkage would become uncomfortable.
850 No such problems exist in XEmacs.
852 Composite characters could be represented as 0x80 C1 C2 C3,
853 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
854 for slightly under 2^20 (one million) composite characters
855 over the XEmacs process lifetime, and you only need to
856 increase the size of a Mule character from 19 to 21 bits.
857 Or you could use 0x80 C1 C2 C3 C4, allowing for about
858 85 million (slightly over 2^26) composite characters. */
861 /************************************************************************/
862 /* Basic Emchar functions */
863 /************************************************************************/
865 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
866 string in STR. Returns the number of bytes stored.
867 Do not call this directly. Use the macro set_charptr_emchar() instead.
871 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
886 else if ( c <= 0x7ff )
888 *p++ = (c >> 6) | 0xc0;
889 *p++ = (c & 0x3f) | 0x80;
891 else if ( c <= 0xffff )
893 *p++ = (c >> 12) | 0xe0;
894 *p++ = ((c >> 6) & 0x3f) | 0x80;
895 *p++ = (c & 0x3f) | 0x80;
897 else if ( c <= 0x1fffff )
899 *p++ = (c >> 18) | 0xf0;
900 *p++ = ((c >> 12) & 0x3f) | 0x80;
901 *p++ = ((c >> 6) & 0x3f) | 0x80;
902 *p++ = (c & 0x3f) | 0x80;
904 else if ( c <= 0x3ffffff )
906 *p++ = (c >> 24) | 0xf8;
907 *p++ = ((c >> 18) & 0x3f) | 0x80;
908 *p++ = ((c >> 12) & 0x3f) | 0x80;
909 *p++ = ((c >> 6) & 0x3f) | 0x80;
910 *p++ = (c & 0x3f) | 0x80;
914 *p++ = (c >> 30) | 0xfc;
915 *p++ = ((c >> 24) & 0x3f) | 0x80;
916 *p++ = ((c >> 18) & 0x3f) | 0x80;
917 *p++ = ((c >> 12) & 0x3f) | 0x80;
918 *p++ = ((c >> 6) & 0x3f) | 0x80;
919 *p++ = (c & 0x3f) | 0x80;
922 BREAKUP_CHAR (c, charset, c1, c2);
923 lb = CHAR_LEADING_BYTE (c);
924 if (LEADING_BYTE_PRIVATE_P (lb))
925 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
927 if (EQ (charset, Vcharset_control_1))
936 /* Return the first character from a Mule-encoded string in STR,
937 assuming it's non-ASCII. Do not call this directly.
938 Use the macro charptr_emchar() instead. */
941 non_ascii_charptr_emchar (CONST Bufbyte *str)
954 else if ( b >= 0xf8 )
959 else if ( b >= 0xf0 )
964 else if ( b >= 0xe0 )
969 else if ( b >= 0xc0 )
979 for( ; len > 0; len-- )
982 ch = ( ch << 6 ) | ( b & 0x3f );
986 Bufbyte i0 = *str, i1, i2 = 0;
989 if (i0 == LEADING_BYTE_CONTROL_1)
990 return (Emchar) (*++str - 0x20);
992 if (LEADING_BYTE_PREFIX_P (i0))
997 charset = CHARSET_BY_LEADING_BYTE (i0);
998 if (XCHARSET_DIMENSION (charset) == 2)
1001 return MAKE_CHAR (charset, i1, i2);
1005 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1006 Do not call this directly. Use the macro valid_char_p() instead. */
1010 non_ascii_valid_char_p (Emchar ch)
1014 /* Must have only lowest 19 bits set */
1018 f1 = CHAR_FIELD1 (ch);
1019 f2 = CHAR_FIELD2 (ch);
1020 f3 = CHAR_FIELD3 (ch);
1024 Lisp_Object charset;
1026 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1027 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1028 f2 > MAX_CHAR_FIELD2_PRIVATE)
1033 if (f3 != 0x20 && f3 != 0x7F)
1037 NOTE: This takes advantage of the fact that
1038 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1039 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1041 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1042 return (XCHARSET_CHARS (charset) == 96);
1046 Lisp_Object charset;
1048 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1049 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1050 f1 > MAX_CHAR_FIELD1_PRIVATE)
1052 if (f2 < 0x20 || f3 < 0x20)
1055 #ifdef ENABLE_COMPOSITE_CHARS
1056 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1058 if (UNBOUNDP (Fgethash (make_int (ch),
1059 Vcomposite_char_char2string_hash_table,
1064 #endif /* ENABLE_COMPOSITE_CHARS */
1066 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
1069 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1071 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1074 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1076 return (XCHARSET_CHARS (charset) == 96);
1082 /************************************************************************/
1083 /* Basic string functions */
1084 /************************************************************************/
1086 /* Copy the character pointed to by PTR into STR, assuming it's
1087 non-ASCII. Do not call this directly. Use the macro
1088 charptr_copy_char() instead. */
1091 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
1093 Bufbyte *strptr = str;
1095 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1097 /* Notice fallthrough. */
1099 case 6: *++strptr = *ptr++;
1100 case 5: *++strptr = *ptr++;
1102 case 4: *++strptr = *ptr++;
1103 case 3: *++strptr = *ptr++;
1104 case 2: *++strptr = *ptr;
1109 return strptr + 1 - str;
1113 /************************************************************************/
1114 /* streams of Emchars */
1115 /************************************************************************/
1117 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1118 The functions below are not meant to be called directly; use
1119 the macros in insdel.h. */
1122 Lstream_get_emchar_1 (Lstream *stream, int ch)
1124 Bufbyte str[MAX_EMCHAR_LEN];
1125 Bufbyte *strptr = str;
1127 str[0] = (Bufbyte) ch;
1128 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1130 /* Notice fallthrough. */
1133 ch = Lstream_getc (stream);
1135 *++strptr = (Bufbyte) ch;
1137 ch = Lstream_getc (stream);
1139 *++strptr = (Bufbyte) ch;
1142 ch = Lstream_getc (stream);
1144 *++strptr = (Bufbyte) ch;
1146 ch = Lstream_getc (stream);
1148 *++strptr = (Bufbyte) ch;
1150 ch = Lstream_getc (stream);
1152 *++strptr = (Bufbyte) ch;
1157 return charptr_emchar (str);
1161 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1163 Bufbyte str[MAX_EMCHAR_LEN];
1164 Bytecount len = set_charptr_emchar (str, ch);
1165 return Lstream_write (stream, str, len);
1169 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1171 Bufbyte str[MAX_EMCHAR_LEN];
1172 Bytecount len = set_charptr_emchar (str, ch);
1173 Lstream_unread (stream, str, len);
1177 /************************************************************************/
1178 /* charset object */
1179 /************************************************************************/
1182 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
1184 struct Lisp_Charset *cs = XCHARSET (obj);
1186 markobj (cs->short_name);
1187 markobj (cs->long_name);
1188 markobj (cs->doc_string);
1189 markobj (cs->registry);
1190 markobj (cs->ccl_program);
1192 markobj (cs->decoding_table);
1198 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1200 struct Lisp_Charset *cs = XCHARSET (obj);
1204 error ("printing unreadable object #<charset %s 0x%x>",
1205 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1208 write_c_string ("#<charset ", printcharfun);
1209 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1210 write_c_string (" ", printcharfun);
1211 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1212 write_c_string (" ", printcharfun);
1213 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1214 write_c_string (" ", printcharfun);
1215 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1216 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1217 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1218 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1219 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1221 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1222 CHARSET_COLUMNS (cs),
1223 CHARSET_GRAPHIC (cs),
1224 CHARSET_FINAL (cs));
1225 write_c_string (buf, printcharfun);
1226 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1227 sprintf (buf, " 0x%x>", cs->header.uid);
1228 write_c_string (buf, printcharfun);
1231 static const struct lrecord_description charset_description[] = {
1232 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1234 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1239 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1240 mark_charset, print_charset, 0, 0, 0,
1241 charset_description,
1242 struct Lisp_Charset);
1244 /* Make a new charset. */
1247 make_charset (Charset_ID id, Lisp_Object name,
1248 unsigned char type, unsigned char columns, unsigned char graphic,
1249 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1250 Lisp_Object long_name, Lisp_Object doc,
1252 Lisp_Object decoding_table,
1253 Emchar ucs_min, Emchar ucs_max,
1254 Emchar code_offset, unsigned char byte_offset)
1257 struct Lisp_Charset *cs =
1258 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1259 XSETCHARSET (obj, cs);
1261 CHARSET_ID (cs) = id;
1262 CHARSET_NAME (cs) = name;
1263 CHARSET_SHORT_NAME (cs) = short_name;
1264 CHARSET_LONG_NAME (cs) = long_name;
1265 CHARSET_DIRECTION (cs) = direction;
1266 CHARSET_TYPE (cs) = type;
1267 CHARSET_COLUMNS (cs) = columns;
1268 CHARSET_GRAPHIC (cs) = graphic;
1269 CHARSET_FINAL (cs) = final;
1270 CHARSET_DOC_STRING (cs) = doc;
1271 CHARSET_REGISTRY (cs) = reg;
1272 CHARSET_CCL_PROGRAM (cs) = Qnil;
1273 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1275 CHARSET_DECODING_TABLE(cs) = Qnil;
1276 CHARSET_UCS_MIN(cs) = ucs_min;
1277 CHARSET_UCS_MAX(cs) = ucs_max;
1278 CHARSET_CODE_OFFSET(cs) = code_offset;
1279 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1282 switch (CHARSET_TYPE (cs))
1284 case CHARSET_TYPE_94:
1285 CHARSET_DIMENSION (cs) = 1;
1286 CHARSET_CHARS (cs) = 94;
1288 case CHARSET_TYPE_96:
1289 CHARSET_DIMENSION (cs) = 1;
1290 CHARSET_CHARS (cs) = 96;
1292 case CHARSET_TYPE_94X94:
1293 CHARSET_DIMENSION (cs) = 2;
1294 CHARSET_CHARS (cs) = 94;
1296 case CHARSET_TYPE_96X96:
1297 CHARSET_DIMENSION (cs) = 2;
1298 CHARSET_CHARS (cs) = 96;
1301 case CHARSET_TYPE_128:
1302 CHARSET_DIMENSION (cs) = 1;
1303 CHARSET_CHARS (cs) = 128;
1305 case CHARSET_TYPE_128X128:
1306 CHARSET_DIMENSION (cs) = 2;
1307 CHARSET_CHARS (cs) = 128;
1309 case CHARSET_TYPE_256:
1310 CHARSET_DIMENSION (cs) = 1;
1311 CHARSET_CHARS (cs) = 256;
1313 case CHARSET_TYPE_256X256:
1314 CHARSET_DIMENSION (cs) = 2;
1315 CHARSET_CHARS (cs) = 256;
1321 if (id == LEADING_BYTE_ASCII)
1322 CHARSET_REP_BYTES (cs) = 1;
1324 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1326 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1331 /* some charsets do not have final characters. This includes
1332 ASCII, Control-1, Composite, and the two faux private
1335 if (code_offset == 0)
1337 assert (NILP (charset_by_attributes[type][final]));
1338 charset_by_attributes[type][final] = obj;
1341 assert (NILP (charset_by_attributes[type][final][direction]));
1342 charset_by_attributes[type][final][direction] = obj;
1346 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1347 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1350 /* official leading byte */
1351 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1354 /* Some charsets are "faux" and don't have names or really exist at
1355 all except in the leading-byte table. */
1357 Fputhash (name, obj, Vcharset_hash_table);
1362 get_unallocated_leading_byte (int dimension)
1367 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1370 lb = next_allocated_leading_byte++;
1374 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1377 lb = next_allocated_1_byte_leading_byte++;
1381 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1384 lb = next_allocated_2_byte_leading_byte++;
1390 ("No more character sets free for this dimension",
1391 make_int (dimension));
1398 range_charset_code_point (Lisp_Object charset, Emchar ch)
1402 if ((XCHARSET_UCS_MIN (charset) <= ch)
1403 && (ch <= XCHARSET_UCS_MAX (charset)))
1405 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1407 if (XCHARSET_DIMENSION (charset) == 1)
1408 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
1409 else if (XCHARSET_DIMENSION (charset) == 2)
1410 return list2 (make_int (d / XCHARSET_CHARS (charset)
1411 + XCHARSET_BYTE_OFFSET (charset)),
1412 make_int (d % XCHARSET_CHARS (charset)
1413 + XCHARSET_BYTE_OFFSET (charset)));
1414 else if (XCHARSET_DIMENSION (charset) == 3)
1415 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1416 * XCHARSET_CHARS (charset))
1417 + XCHARSET_BYTE_OFFSET (charset)),
1418 make_int (d / XCHARSET_CHARS (charset)
1419 % XCHARSET_CHARS (charset)
1420 + XCHARSET_BYTE_OFFSET (charset)),
1421 make_int (d % XCHARSET_CHARS (charset)
1422 + XCHARSET_BYTE_OFFSET (charset)));
1423 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1424 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1425 * XCHARSET_CHARS (charset)
1426 * XCHARSET_CHARS (charset))
1427 + XCHARSET_BYTE_OFFSET (charset)),
1428 make_int (d / (XCHARSET_CHARS (charset)
1429 * XCHARSET_CHARS (charset))
1430 % XCHARSET_CHARS (charset)
1431 + XCHARSET_BYTE_OFFSET (charset)),
1432 make_int (d / XCHARSET_CHARS (charset)
1433 % XCHARSET_CHARS (charset)
1434 + XCHARSET_BYTE_OFFSET (charset)),
1435 make_int (d % XCHARSET_CHARS (charset)
1436 + XCHARSET_BYTE_OFFSET (charset)));
1438 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1440 if (XCHARSET_DIMENSION (charset) == 1)
1442 if (XCHARSET_CHARS (charset) == 94)
1444 if (((d = ch - (MIN_CHAR_94
1445 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1447 return list1 (make_int (d + 33));
1449 else if (XCHARSET_CHARS (charset) == 96)
1451 if (((d = ch - (MIN_CHAR_96
1452 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1454 return list1 (make_int (d + 32));
1459 else if (XCHARSET_DIMENSION (charset) == 2)
1461 if (XCHARSET_CHARS (charset) == 94)
1463 if (((d = ch - (MIN_CHAR_94x94
1464 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1467 return list2 (make_int ((d / 94) + 33),
1468 make_int (d % 94 + 33));
1470 else if (XCHARSET_CHARS (charset) == 96)
1472 if (((d = ch - (MIN_CHAR_96x96
1473 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1476 return list2 (make_int ((d / 96) + 32),
1477 make_int (d % 96 + 32));
1485 split_builtin_char (Emchar c)
1487 if (c < MIN_CHAR_OBS_94x94)
1489 if (c <= MAX_CHAR_BASIC_LATIN)
1491 return list2 (Vcharset_ascii, make_int (c));
1495 return list2 (Vcharset_control_1, make_int (c & 0x7F));
1499 return list2 (Vcharset_latin_iso8859_1, make_int (c & 0x7F));
1501 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1503 return list2 (Vcharset_greek_iso8859_7,
1504 make_int (c - MIN_CHAR_GREEK + 0x20));
1506 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1508 return list2 (Vcharset_cyrillic_iso8859_5,
1509 make_int (c - MIN_CHAR_CYRILLIC + 0x20));
1511 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1513 return list2 (Vcharset_hebrew_iso8859_8,
1514 make_int (c - MIN_CHAR_HEBREW + 0x20));
1516 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1518 return list2 (Vcharset_thai_tis620,
1519 make_int (c - MIN_CHAR_THAI + 0x20));
1521 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1522 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1524 return list2 (Vcharset_katakana_jisx0201,
1525 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1529 return list3 (Vcharset_ucs_bmp,
1530 make_int (c >> 8), make_int (c & 0xff));
1533 else if (c <= MAX_CHAR_OBS_94x94)
1535 return list3 (CHARSET_BY_ATTRIBUTES
1536 (CHARSET_TYPE_94X94,
1537 ((c - MIN_CHAR_OBS_94x94) / (94 * 94)) + '@',
1538 CHARSET_LEFT_TO_RIGHT),
1539 make_int ((((c - MIN_CHAR_OBS_94x94) / 94) % 94) + 33),
1540 make_int (((c - MIN_CHAR_OBS_94x94) % 94) + 33));
1542 else if (c <= MAX_CHAR_94)
1544 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1545 ((c - MIN_CHAR_94) / 94) + '0',
1546 CHARSET_LEFT_TO_RIGHT),
1547 make_int (((c - MIN_CHAR_94) % 94) + 33));
1549 else if (c <= MAX_CHAR_96)
1551 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1552 ((c - MIN_CHAR_96) / 96) + '0',
1553 CHARSET_LEFT_TO_RIGHT),
1554 make_int (((c - MIN_CHAR_96) % 96) + 32));
1556 else if (c <= MAX_CHAR_94x94)
1558 return list3 (CHARSET_BY_ATTRIBUTES
1559 (CHARSET_TYPE_94X94,
1560 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1561 CHARSET_LEFT_TO_RIGHT),
1562 make_int ((((c - MIN_CHAR_94x94) / 94) % 94) + 33),
1563 make_int (((c - MIN_CHAR_94x94) % 94) + 33));
1565 else if (c <= MAX_CHAR_96x96)
1567 return list3 (CHARSET_BY_ATTRIBUTES
1568 (CHARSET_TYPE_96X96,
1569 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1570 CHARSET_LEFT_TO_RIGHT),
1571 make_int ((((c - MIN_CHAR_96x96) / 96) % 96) + 32),
1572 make_int (((c - MIN_CHAR_96x96) % 96) + 32));
1581 charset_code_point (Lisp_Object charset, Emchar ch)
1583 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1585 if (!EQ (cdef, Qnil))
1587 Lisp_Object field = Fassq (charset, cdef);
1589 if (!EQ (field, Qnil))
1590 return Fcdr (field);
1592 return range_charset_code_point (charset, ch);
1595 Lisp_Object Vdefault_coded_charset_priority_list;
1599 /************************************************************************/
1600 /* Basic charset Lisp functions */
1601 /************************************************************************/
1603 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1604 Return non-nil if OBJECT is a charset.
1608 return CHARSETP (object) ? Qt : Qnil;
1611 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1612 Retrieve the charset of the given name.
1613 If CHARSET-OR-NAME is a charset object, it is simply returned.
1614 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1615 nil is returned. Otherwise the associated charset object is returned.
1619 if (CHARSETP (charset_or_name))
1620 return charset_or_name;
1622 CHECK_SYMBOL (charset_or_name);
1623 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1626 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1627 Retrieve the charset of the given name.
1628 Same as `find-charset' except an error is signalled if there is no such
1629 charset instead of returning nil.
1633 Lisp_Object charset = Ffind_charset (name);
1636 signal_simple_error ("No such charset", name);
1640 /* We store the charsets in hash tables with the names as the key and the
1641 actual charset object as the value. Occasionally we need to use them
1642 in a list format. These routines provide us with that. */
1643 struct charset_list_closure
1645 Lisp_Object *charset_list;
1649 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1650 void *charset_list_closure)
1652 /* This function can GC */
1653 struct charset_list_closure *chcl =
1654 (struct charset_list_closure*) charset_list_closure;
1655 Lisp_Object *charset_list = chcl->charset_list;
1657 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1661 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1662 Return a list of the names of all defined charsets.
1666 Lisp_Object charset_list = Qnil;
1667 struct gcpro gcpro1;
1668 struct charset_list_closure charset_list_closure;
1670 GCPRO1 (charset_list);
1671 charset_list_closure.charset_list = &charset_list;
1672 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1673 &charset_list_closure);
1676 return charset_list;
1679 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1680 Return the name of the given charset.
1684 return XCHARSET_NAME (Fget_charset (charset));
1687 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1688 Define a new character set.
1689 This function is for use with Mule support.
1690 NAME is a symbol, the name by which the character set is normally referred.
1691 DOC-STRING is a string describing the character set.
1692 PROPS is a property list, describing the specific nature of the
1693 character set. Recognized properties are:
1695 'short-name Short version of the charset name (ex: Latin-1)
1696 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1697 'registry A regular expression matching the font registry field for
1699 'dimension Number of octets used to index a character in this charset.
1700 Either 1 or 2. Defaults to 1.
1701 'columns Number of columns used to display a character in this charset.
1702 Only used in TTY mode. (Under X, the actual width of a
1703 character can be derived from the font used to display the
1704 characters.) If unspecified, defaults to the dimension
1705 (this is almost always the correct value).
1706 'chars Number of characters in each dimension (94 or 96).
1707 Defaults to 94. Note that if the dimension is 2, the
1708 character set thus described is 94x94 or 96x96.
1709 'final Final byte of ISO 2022 escape sequence. Must be
1710 supplied. Each combination of (DIMENSION, CHARS) defines a
1711 separate namespace for final bytes. Note that ISO
1712 2022 restricts the final byte to the range
1713 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1714 dimension == 2. Note also that final bytes in the range
1715 0x30 - 0x3F are reserved for user-defined (not official)
1717 'graphic 0 (use left half of font on output) or 1 (use right half
1718 of font on output). Defaults to 0. For example, for
1719 a font whose registry is ISO8859-1, the left half
1720 (octets 0x20 - 0x7F) is the `ascii' character set, while
1721 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1722 character set. With 'graphic set to 0, the octets
1723 will have their high bit cleared; with it set to 1,
1724 the octets will have their high bit set.
1725 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1727 'ccl-program A compiled CCL program used to convert a character in
1728 this charset into an index into the font. This is in
1729 addition to the 'graphic property. The CCL program
1730 is passed the octets of the character, with the high
1731 bit cleared and set depending upon whether the value
1732 of the 'graphic property is 0 or 1.
1734 (name, doc_string, props))
1736 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1737 int direction = CHARSET_LEFT_TO_RIGHT;
1739 Lisp_Object registry = Qnil;
1740 Lisp_Object charset;
1741 Lisp_Object rest, keyword, value;
1742 Lisp_Object ccl_program = Qnil;
1743 Lisp_Object short_name = Qnil, long_name = Qnil;
1744 int byte_offset = -1;
1746 CHECK_SYMBOL (name);
1747 if (!NILP (doc_string))
1748 CHECK_STRING (doc_string);
1750 charset = Ffind_charset (name);
1751 if (!NILP (charset))
1752 signal_simple_error ("Cannot redefine existing charset", name);
1754 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1756 if (EQ (keyword, Qshort_name))
1758 CHECK_STRING (value);
1762 if (EQ (keyword, Qlong_name))
1764 CHECK_STRING (value);
1768 else if (EQ (keyword, Qdimension))
1771 dimension = XINT (value);
1772 if (dimension < 1 || dimension > 2)
1773 signal_simple_error ("Invalid value for 'dimension", value);
1776 else if (EQ (keyword, Qchars))
1779 chars = XINT (value);
1780 if (chars != 94 && chars != 96)
1781 signal_simple_error ("Invalid value for 'chars", value);
1784 else if (EQ (keyword, Qcolumns))
1787 columns = XINT (value);
1788 if (columns != 1 && columns != 2)
1789 signal_simple_error ("Invalid value for 'columns", value);
1792 else if (EQ (keyword, Qgraphic))
1795 graphic = XINT (value);
1797 if (graphic < 0 || graphic > 2)
1799 if (graphic < 0 || graphic > 1)
1801 signal_simple_error ("Invalid value for 'graphic", value);
1804 else if (EQ (keyword, Qregistry))
1806 CHECK_STRING (value);
1810 else if (EQ (keyword, Qdirection))
1812 if (EQ (value, Ql2r))
1813 direction = CHARSET_LEFT_TO_RIGHT;
1814 else if (EQ (value, Qr2l))
1815 direction = CHARSET_RIGHT_TO_LEFT;
1817 signal_simple_error ("Invalid value for 'direction", value);
1820 else if (EQ (keyword, Qfinal))
1822 CHECK_CHAR_COERCE_INT (value);
1823 final = XCHAR (value);
1824 if (final < '0' || final > '~')
1825 signal_simple_error ("Invalid value for 'final", value);
1828 else if (EQ (keyword, Qccl_program))
1830 CHECK_VECTOR (value);
1831 ccl_program = value;
1835 signal_simple_error ("Unrecognized property", keyword);
1839 error ("'final must be specified");
1840 if (dimension == 2 && final > 0x5F)
1842 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1846 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1848 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1850 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1851 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1853 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1855 id = get_unallocated_leading_byte (dimension);
1857 if (NILP (doc_string))
1858 doc_string = build_string ("");
1860 if (NILP (registry))
1861 registry = build_string ("");
1863 if (NILP (short_name))
1864 XSETSTRING (short_name, XSYMBOL (name)->name);
1866 if (NILP (long_name))
1867 long_name = doc_string;
1870 columns = dimension;
1872 if (byte_offset < 0)
1876 else if (chars == 96)
1882 charset = make_charset (id, name, type, columns, graphic,
1883 final, direction, short_name, long_name,
1884 doc_string, registry,
1885 Qnil, 0, 0, 0, byte_offset);
1886 if (!NILP (ccl_program))
1887 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1891 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1893 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1894 NEW-NAME is the name of the new charset. Return the new charset.
1896 (charset, new_name))
1898 Lisp_Object new_charset = Qnil;
1899 int id, dimension, columns, graphic, final;
1900 int direction, type;
1901 Lisp_Object registry, doc_string, short_name, long_name;
1902 struct Lisp_Charset *cs;
1904 charset = Fget_charset (charset);
1905 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1906 signal_simple_error ("Charset already has reverse-direction charset",
1909 CHECK_SYMBOL (new_name);
1910 if (!NILP (Ffind_charset (new_name)))
1911 signal_simple_error ("Cannot redefine existing charset", new_name);
1913 cs = XCHARSET (charset);
1915 type = CHARSET_TYPE (cs);
1916 columns = CHARSET_COLUMNS (cs);
1917 dimension = CHARSET_DIMENSION (cs);
1918 id = get_unallocated_leading_byte (dimension);
1920 graphic = CHARSET_GRAPHIC (cs);
1921 final = CHARSET_FINAL (cs);
1922 direction = CHARSET_RIGHT_TO_LEFT;
1923 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1924 direction = CHARSET_LEFT_TO_RIGHT;
1925 doc_string = CHARSET_DOC_STRING (cs);
1926 short_name = CHARSET_SHORT_NAME (cs);
1927 long_name = CHARSET_LONG_NAME (cs);
1928 registry = CHARSET_REGISTRY (cs);
1930 new_charset = make_charset (id, new_name, type, columns,
1931 graphic, final, direction, short_name, long_name,
1932 doc_string, registry,
1934 CHARSET_DECODING_TABLE(cs),
1935 CHARSET_UCS_MIN(cs),
1936 CHARSET_UCS_MAX(cs),
1937 CHARSET_CODE_OFFSET(cs),
1938 CHARSET_BYTE_OFFSET(cs)
1944 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1945 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1950 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1951 Define symbol ALIAS as an alias for CHARSET.
1955 CHECK_SYMBOL (alias);
1956 charset = Fget_charset (charset);
1957 return Fputhash (alias, charset, Vcharset_hash_table);
1960 /* #### Reverse direction charsets not yet implemented. */
1962 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1964 Return the reverse-direction charset parallel to CHARSET, if any.
1965 This is the charset with the same properties (in particular, the same
1966 dimension, number of characters per dimension, and final byte) as
1967 CHARSET but whose characters are displayed in the opposite direction.
1971 charset = Fget_charset (charset);
1972 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1976 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1977 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1978 If DIRECTION is omitted, both directions will be checked (left-to-right
1979 will be returned if character sets exist for both directions).
1981 (dimension, chars, final, direction))
1983 int dm, ch, fi, di = -1;
1985 Lisp_Object obj = Qnil;
1987 CHECK_INT (dimension);
1988 dm = XINT (dimension);
1989 if (dm < 1 || dm > 2)
1990 signal_simple_error ("Invalid value for DIMENSION", dimension);
1994 if (ch != 94 && ch != 96)
1995 signal_simple_error ("Invalid value for CHARS", chars);
1997 CHECK_CHAR_COERCE_INT (final);
1999 if (fi < '0' || fi > '~')
2000 signal_simple_error ("Invalid value for FINAL", final);
2002 if (EQ (direction, Ql2r))
2003 di = CHARSET_LEFT_TO_RIGHT;
2004 else if (EQ (direction, Qr2l))
2005 di = CHARSET_RIGHT_TO_LEFT;
2006 else if (!NILP (direction))
2007 signal_simple_error ("Invalid value for DIRECTION", direction);
2009 if (dm == 2 && fi > 0x5F)
2011 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2014 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2016 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2020 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2022 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2025 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2028 return XCHARSET_NAME (obj);
2032 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2033 Return short name of CHARSET.
2037 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2040 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2041 Return long name of CHARSET.
2045 return XCHARSET_LONG_NAME (Fget_charset (charset));
2048 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2049 Return description of CHARSET.
2053 return XCHARSET_DOC_STRING (Fget_charset (charset));
2056 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2057 Return dimension of CHARSET.
2061 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2064 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2065 Return property PROP of CHARSET.
2066 Recognized properties are those listed in `make-charset', as well as
2067 'name and 'doc-string.
2071 struct Lisp_Charset *cs;
2073 charset = Fget_charset (charset);
2074 cs = XCHARSET (charset);
2076 CHECK_SYMBOL (prop);
2077 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2078 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2079 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2080 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2081 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2082 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2083 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2084 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2085 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2086 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2087 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2088 if (EQ (prop, Qdirection))
2089 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2090 if (EQ (prop, Qreverse_direction_charset))
2092 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2096 return XCHARSET_NAME (obj);
2098 signal_simple_error ("Unrecognized charset property name", prop);
2099 return Qnil; /* not reached */
2102 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2103 Return charset identification number of CHARSET.
2107 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2110 /* #### We need to figure out which properties we really want to
2113 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2114 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2116 (charset, ccl_program))
2118 charset = Fget_charset (charset);
2119 CHECK_VECTOR (ccl_program);
2120 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2125 invalidate_charset_font_caches (Lisp_Object charset)
2127 /* Invalidate font cache entries for charset on all devices. */
2128 Lisp_Object devcons, concons, hash_table;
2129 DEVICE_LOOP_NO_BREAK (devcons, concons)
2131 struct device *d = XDEVICE (XCAR (devcons));
2132 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2133 if (!UNBOUNDP (hash_table))
2134 Fclrhash (hash_table);
2138 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2139 Set the 'registry property of CHARSET to REGISTRY.
2141 (charset, registry))
2143 charset = Fget_charset (charset);
2144 CHECK_STRING (registry);
2145 XCHARSET_REGISTRY (charset) = registry;
2146 invalidate_charset_font_caches (charset);
2147 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2152 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2153 Return mapping-table of CHARSET.
2157 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2160 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2161 Set mapping-table of CHARSET to TABLE.
2165 struct Lisp_Charset *cs;
2166 Lisp_Object old_table;
2169 charset = Fget_charset (charset);
2170 cs = XCHARSET (charset);
2172 if (EQ (table, Qnil))
2174 CHARSET_DECODING_TABLE(cs) = table;
2177 else if (VECTORP (table))
2181 /* ad-hoc method for `ascii' */
2182 if ((CHARSET_CHARS (cs) == 94) &&
2183 (CHARSET_BYTE_OFFSET (cs) != 33))
2184 ccs_len = 128 - CHARSET_BYTE_OFFSET (cs);
2186 ccs_len = CHARSET_CHARS (cs);
2188 if (XVECTOR_LENGTH (table) > ccs_len)
2189 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2190 old_table = CHARSET_DECODING_TABLE(cs);
2191 CHARSET_DECODING_TABLE(cs) = table;
2194 signal_error (Qwrong_type_argument,
2195 list2 (build_translated_string ("vector-or-nil-p"),
2197 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2199 switch (CHARSET_DIMENSION (cs))
2202 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2204 Lisp_Object c = XVECTOR_DATA(table)[i];
2209 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2213 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2215 Lisp_Object v = XVECTOR_DATA(table)[i];
2221 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2223 CHARSET_DECODING_TABLE(cs) = old_table;
2224 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2226 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2228 Lisp_Object c = XVECTOR_DATA(v)[j];
2231 put_char_attribute (c, charset,
2234 (i + CHARSET_BYTE_OFFSET (cs)),
2236 (j + CHARSET_BYTE_OFFSET (cs))));
2240 put_char_attribute (v, charset,
2242 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2251 /************************************************************************/
2252 /* Lisp primitives for working with characters */
2253 /************************************************************************/
2255 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2256 Make a character from CHARSET and octets ARG1 and ARG2.
2257 ARG2 is required only for characters from two-dimensional charsets.
2258 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2259 character s with caron.
2261 (charset, arg1, arg2))
2263 struct Lisp_Charset *cs;
2265 int lowlim, highlim;
2267 charset = Fget_charset (charset);
2268 cs = XCHARSET (charset);
2270 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2271 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2273 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2275 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2276 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2279 /* It is useful (and safe, according to Olivier Galibert) to strip
2280 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2281 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2282 Latin 2 code of the character. */
2290 if (a1 < lowlim || a1 > highlim)
2291 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2293 if (CHARSET_DIMENSION (cs) == 1)
2297 ("Charset is of dimension one; second octet must be nil", arg2);
2298 return make_char (MAKE_CHAR (charset, a1, 0));
2307 a2 = XINT (arg2) & 0x7f;
2309 if (a2 < lowlim || a2 > highlim)
2310 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2312 return make_char (MAKE_CHAR (charset, a1, a2));
2315 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2316 Return the character set of char CH.
2320 CHECK_CHAR_COERCE_INT (ch);
2322 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2325 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2326 Return list of charset and one or two position-codes of CHAR.
2330 /* This function can GC */
2331 struct gcpro gcpro1, gcpro2;
2332 Lisp_Object charset = Qnil;
2333 Lisp_Object rc = Qnil;
2336 GCPRO2 (charset, rc);
2337 CHECK_CHAR_COERCE_INT (character);
2339 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2341 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2343 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2347 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2355 #ifdef ENABLE_COMPOSITE_CHARS
2356 /************************************************************************/
2357 /* composite character functions */
2358 /************************************************************************/
2361 lookup_composite_char (Bufbyte *str, int len)
2363 Lisp_Object lispstr = make_string (str, len);
2364 Lisp_Object ch = Fgethash (lispstr,
2365 Vcomposite_char_string2char_hash_table,
2371 if (composite_char_row_next >= 128)
2372 signal_simple_error ("No more composite chars available", lispstr);
2373 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2374 composite_char_col_next);
2375 Fputhash (make_char (emch), lispstr,
2376 Vcomposite_char_char2string_hash_table);
2377 Fputhash (lispstr, make_char (emch),
2378 Vcomposite_char_string2char_hash_table);
2379 composite_char_col_next++;
2380 if (composite_char_col_next >= 128)
2382 composite_char_col_next = 32;
2383 composite_char_row_next++;
2392 composite_char_string (Emchar ch)
2394 Lisp_Object str = Fgethash (make_char (ch),
2395 Vcomposite_char_char2string_hash_table,
2397 assert (!UNBOUNDP (str));
2401 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2402 Convert a string into a single composite character.
2403 The character is the result of overstriking all the characters in
2408 CHECK_STRING (string);
2409 return make_char (lookup_composite_char (XSTRING_DATA (string),
2410 XSTRING_LENGTH (string)));
2413 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2414 Return a string of the characters comprising a composite character.
2422 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2423 signal_simple_error ("Must be composite char", ch);
2424 return composite_char_string (emch);
2426 #endif /* ENABLE_COMPOSITE_CHARS */
2429 /************************************************************************/
2430 /* initialization */
2431 /************************************************************************/
2434 syms_of_mule_charset (void)
2436 DEFSUBR (Fcharsetp);
2437 DEFSUBR (Ffind_charset);
2438 DEFSUBR (Fget_charset);
2439 DEFSUBR (Fcharset_list);
2440 DEFSUBR (Fcharset_name);
2441 DEFSUBR (Fmake_charset);
2442 DEFSUBR (Fmake_reverse_direction_charset);
2443 /* DEFSUBR (Freverse_direction_charset); */
2444 DEFSUBR (Fdefine_charset_alias);
2445 DEFSUBR (Fcharset_from_attributes);
2446 DEFSUBR (Fcharset_short_name);
2447 DEFSUBR (Fcharset_long_name);
2448 DEFSUBR (Fcharset_description);
2449 DEFSUBR (Fcharset_dimension);
2450 DEFSUBR (Fcharset_property);
2451 DEFSUBR (Fcharset_id);
2452 DEFSUBR (Fset_charset_ccl_program);
2453 DEFSUBR (Fset_charset_registry);
2455 DEFSUBR (Fchar_attribute_alist);
2456 DEFSUBR (Fget_char_attribute);
2457 DEFSUBR (Fput_char_attribute);
2458 DEFSUBR (Fdefine_char);
2459 DEFSUBR (Fchar_variants);
2460 DEFSUBR (Fget_composite_char);
2461 DEFSUBR (Fcharset_mapping_table);
2462 DEFSUBR (Fset_charset_mapping_table);
2465 DEFSUBR (Fmake_char);
2466 DEFSUBR (Fchar_charset);
2467 DEFSUBR (Fsplit_char);
2469 #ifdef ENABLE_COMPOSITE_CHARS
2470 DEFSUBR (Fmake_composite_char);
2471 DEFSUBR (Fcomposite_char_string);
2474 defsymbol (&Qcharsetp, "charsetp");
2475 defsymbol (&Qregistry, "registry");
2476 defsymbol (&Qfinal, "final");
2477 defsymbol (&Qgraphic, "graphic");
2478 defsymbol (&Qdirection, "direction");
2479 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2480 defsymbol (&Qshort_name, "short-name");
2481 defsymbol (&Qlong_name, "long-name");
2483 defsymbol (&Ql2r, "l2r");
2484 defsymbol (&Qr2l, "r2l");
2486 /* Charsets, compatible with FSF 20.3
2487 Naming convention is Script-Charset[-Edition] */
2488 defsymbol (&Qascii, "ascii");
2489 defsymbol (&Qcontrol_1, "control-1");
2490 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2491 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2492 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2493 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2494 defsymbol (&Qthai_tis620, "thai-tis620");
2495 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2496 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2497 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2498 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2499 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2500 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2501 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2502 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2503 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2504 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2505 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2506 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2507 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2508 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2510 defsymbol (&Q_ucs, "->ucs");
2511 defsymbol (&Q_decomposition, "->decomposition");
2512 defsymbol (&Qcompat, "compat");
2513 defsymbol (&QnoBreak, "noBreak");
2514 defsymbol (&Qsuper, "super");
2515 defsymbol (&Qsub, "sub");
2516 defsymbol (&Qfraction, "fraction");
2517 defsymbol (&Qwide, "wide");
2518 defsymbol (&Qnarrow, "narrow");
2519 defsymbol (&Qfont, "font");
2520 defsymbol (&Qucs, "ucs");
2521 defsymbol (&Qucs_bmp, "ucs-bmp");
2522 defsymbol (&Qlatin_viscii, "latin-viscii");
2523 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2524 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2525 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2526 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2527 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
2528 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2529 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2531 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2532 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2534 defsymbol (&Qcomposite, "composite");
2538 vars_of_mule_charset (void)
2545 /* Table of charsets indexed by leading byte. */
2546 for (i = 0; i < countof (charset_by_leading_byte); i++)
2547 charset_by_leading_byte[i] = Qnil;
2550 /* Table of charsets indexed by type/final-byte. */
2551 for (i = 0; i < countof (charset_by_attributes); i++)
2552 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2553 charset_by_attributes[i][j] = Qnil;
2555 /* Table of charsets indexed by type/final-byte/direction. */
2556 for (i = 0; i < countof (charset_by_attributes); i++)
2557 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2558 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2559 charset_by_attributes[i][j][k] = Qnil;
2563 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2565 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2566 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2570 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2571 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2572 Leading-code of private TYPE9N charset of column-width 1.
2574 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2578 Vutf_2000_version = build_string("0.12 (Kashiwara)");
2579 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2580 Version number of UTF-2000.
2583 staticpro (&Vcharacter_attribute_table);
2584 Vcharacter_attribute_table = make_char_code_table (Qnil);
2586 staticpro (&Vcharacter_composition_table);
2587 Vcharacter_composition_table = make_char_code_table (Qnil);
2589 staticpro (&Vcharacter_variant_table);
2590 Vcharacter_variant_table = make_char_code_table (Qnil);
2592 Vdefault_coded_charset_priority_list = Qnil;
2593 DEFVAR_LISP ("default-coded-charset-priority-list",
2594 &Vdefault_coded_charset_priority_list /*
2595 Default order of preferred coded-character-sets.
2601 complex_vars_of_mule_charset (void)
2603 staticpro (&Vcharset_hash_table);
2604 Vcharset_hash_table =
2605 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2607 /* Predefined character sets. We store them into variables for
2612 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2613 CHARSET_TYPE_256X256, 1, 2, 0,
2614 CHARSET_LEFT_TO_RIGHT,
2615 build_string ("BMP"),
2616 build_string ("BMP"),
2617 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2618 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2619 Qnil, 0, 0xFFFF, 0, 0);
2621 # define MIN_CHAR_THAI 0
2622 # define MAX_CHAR_THAI 0
2623 # define MIN_CHAR_GREEK 0
2624 # define MAX_CHAR_GREEK 0
2625 # define MIN_CHAR_HEBREW 0
2626 # define MAX_CHAR_HEBREW 0
2627 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2628 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2629 # define MIN_CHAR_CYRILLIC 0
2630 # define MAX_CHAR_CYRILLIC 0
2633 make_charset (LEADING_BYTE_ASCII, Qascii,
2634 CHARSET_TYPE_94, 1, 0, 'B',
2635 CHARSET_LEFT_TO_RIGHT,
2636 build_string ("ASCII"),
2637 build_string ("ASCII)"),
2638 build_string ("ASCII (ISO646 IRV)"),
2639 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2640 Qnil, 0, 0x7F, 0, 0);
2641 Vcharset_control_1 =
2642 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2643 CHARSET_TYPE_94, 1, 1, 0,
2644 CHARSET_LEFT_TO_RIGHT,
2645 build_string ("C1"),
2646 build_string ("Control characters"),
2647 build_string ("Control characters 128-191"),
2649 Qnil, 0x80, 0x9F, 0, 0);
2650 Vcharset_latin_iso8859_1 =
2651 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2652 CHARSET_TYPE_96, 1, 1, 'A',
2653 CHARSET_LEFT_TO_RIGHT,
2654 build_string ("Latin-1"),
2655 build_string ("ISO8859-1 (Latin-1)"),
2656 build_string ("ISO8859-1 (Latin-1)"),
2657 build_string ("iso8859-1"),
2658 Qnil, 0xA0, 0xFF, 0, 32);
2659 Vcharset_latin_iso8859_2 =
2660 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2661 CHARSET_TYPE_96, 1, 1, 'B',
2662 CHARSET_LEFT_TO_RIGHT,
2663 build_string ("Latin-2"),
2664 build_string ("ISO8859-2 (Latin-2)"),
2665 build_string ("ISO8859-2 (Latin-2)"),
2666 build_string ("iso8859-2"),
2668 Vcharset_latin_iso8859_3 =
2669 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2670 CHARSET_TYPE_96, 1, 1, 'C',
2671 CHARSET_LEFT_TO_RIGHT,
2672 build_string ("Latin-3"),
2673 build_string ("ISO8859-3 (Latin-3)"),
2674 build_string ("ISO8859-3 (Latin-3)"),
2675 build_string ("iso8859-3"),
2677 Vcharset_latin_iso8859_4 =
2678 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2679 CHARSET_TYPE_96, 1, 1, 'D',
2680 CHARSET_LEFT_TO_RIGHT,
2681 build_string ("Latin-4"),
2682 build_string ("ISO8859-4 (Latin-4)"),
2683 build_string ("ISO8859-4 (Latin-4)"),
2684 build_string ("iso8859-4"),
2686 Vcharset_thai_tis620 =
2687 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2688 CHARSET_TYPE_96, 1, 1, 'T',
2689 CHARSET_LEFT_TO_RIGHT,
2690 build_string ("TIS620"),
2691 build_string ("TIS620 (Thai)"),
2692 build_string ("TIS620.2529 (Thai)"),
2693 build_string ("tis620"),
2694 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2695 Vcharset_greek_iso8859_7 =
2696 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2697 CHARSET_TYPE_96, 1, 1, 'F',
2698 CHARSET_LEFT_TO_RIGHT,
2699 build_string ("ISO8859-7"),
2700 build_string ("ISO8859-7 (Greek)"),
2701 build_string ("ISO8859-7 (Greek)"),
2702 build_string ("iso8859-7"),
2703 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2704 Vcharset_arabic_iso8859_6 =
2705 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2706 CHARSET_TYPE_96, 1, 1, 'G',
2707 CHARSET_RIGHT_TO_LEFT,
2708 build_string ("ISO8859-6"),
2709 build_string ("ISO8859-6 (Arabic)"),
2710 build_string ("ISO8859-6 (Arabic)"),
2711 build_string ("iso8859-6"),
2713 Vcharset_hebrew_iso8859_8 =
2714 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2715 CHARSET_TYPE_96, 1, 1, 'H',
2716 CHARSET_RIGHT_TO_LEFT,
2717 build_string ("ISO8859-8"),
2718 build_string ("ISO8859-8 (Hebrew)"),
2719 build_string ("ISO8859-8 (Hebrew)"),
2720 build_string ("iso8859-8"),
2721 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2722 Vcharset_katakana_jisx0201 =
2723 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2724 CHARSET_TYPE_94, 1, 1, 'I',
2725 CHARSET_LEFT_TO_RIGHT,
2726 build_string ("JISX0201 Kana"),
2727 build_string ("JISX0201.1976 (Japanese Kana)"),
2728 build_string ("JISX0201.1976 Japanese Kana"),
2729 build_string ("jisx0201\\.1976"),
2731 MIN_CHAR_HALFWIDTH_KATAKANA,
2732 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2733 Vcharset_latin_jisx0201 =
2734 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2735 CHARSET_TYPE_94, 1, 0, 'J',
2736 CHARSET_LEFT_TO_RIGHT,
2737 build_string ("JISX0201 Roman"),
2738 build_string ("JISX0201.1976 (Japanese Roman)"),
2739 build_string ("JISX0201.1976 Japanese Roman"),
2740 build_string ("jisx0201\\.1976"),
2742 Vcharset_cyrillic_iso8859_5 =
2743 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2744 CHARSET_TYPE_96, 1, 1, 'L',
2745 CHARSET_LEFT_TO_RIGHT,
2746 build_string ("ISO8859-5"),
2747 build_string ("ISO8859-5 (Cyrillic)"),
2748 build_string ("ISO8859-5 (Cyrillic)"),
2749 build_string ("iso8859-5"),
2750 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2751 Vcharset_latin_iso8859_9 =
2752 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2753 CHARSET_TYPE_96, 1, 1, 'M',
2754 CHARSET_LEFT_TO_RIGHT,
2755 build_string ("Latin-5"),
2756 build_string ("ISO8859-9 (Latin-5)"),
2757 build_string ("ISO8859-9 (Latin-5)"),
2758 build_string ("iso8859-9"),
2760 Vcharset_japanese_jisx0208_1978 =
2761 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2762 CHARSET_TYPE_94X94, 2, 0, '@',
2763 CHARSET_LEFT_TO_RIGHT,
2764 build_string ("JIS X0208:1978"),
2765 build_string ("JIS X0208:1978 (Japanese)"),
2767 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2768 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2770 Vcharset_chinese_gb2312 =
2771 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2772 CHARSET_TYPE_94X94, 2, 0, 'A',
2773 CHARSET_LEFT_TO_RIGHT,
2774 build_string ("GB2312"),
2775 build_string ("GB2312)"),
2776 build_string ("GB2312 Chinese simplified"),
2777 build_string ("gb2312"),
2779 Vcharset_japanese_jisx0208 =
2780 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2781 CHARSET_TYPE_94X94, 2, 0, 'B',
2782 CHARSET_LEFT_TO_RIGHT,
2783 build_string ("JISX0208"),
2784 build_string ("JIS X0208:1983 (Japanese)"),
2785 build_string ("JIS X0208:1983 Japanese Kanji"),
2786 build_string ("jisx0208\\.1983"),
2788 Vcharset_korean_ksc5601 =
2789 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2790 CHARSET_TYPE_94X94, 2, 0, 'C',
2791 CHARSET_LEFT_TO_RIGHT,
2792 build_string ("KSC5601"),
2793 build_string ("KSC5601 (Korean"),
2794 build_string ("KSC5601 Korean Hangul and Hanja"),
2795 build_string ("ksc5601"),
2797 Vcharset_japanese_jisx0212 =
2798 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2799 CHARSET_TYPE_94X94, 2, 0, 'D',
2800 CHARSET_LEFT_TO_RIGHT,
2801 build_string ("JISX0212"),
2802 build_string ("JISX0212 (Japanese)"),
2803 build_string ("JISX0212 Japanese Supplement"),
2804 build_string ("jisx0212"),
2807 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2808 Vcharset_chinese_cns11643_1 =
2809 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2810 CHARSET_TYPE_94X94, 2, 0, 'G',
2811 CHARSET_LEFT_TO_RIGHT,
2812 build_string ("CNS11643-1"),
2813 build_string ("CNS11643-1 (Chinese traditional)"),
2815 ("CNS 11643 Plane 1 Chinese traditional"),
2816 build_string (CHINESE_CNS_PLANE_RE("1")),
2818 Vcharset_chinese_cns11643_2 =
2819 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2820 CHARSET_TYPE_94X94, 2, 0, 'H',
2821 CHARSET_LEFT_TO_RIGHT,
2822 build_string ("CNS11643-2"),
2823 build_string ("CNS11643-2 (Chinese traditional)"),
2825 ("CNS 11643 Plane 2 Chinese traditional"),
2826 build_string (CHINESE_CNS_PLANE_RE("2")),
2829 Vcharset_latin_viscii_lower =
2830 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2831 CHARSET_TYPE_96, 1, 1, '1',
2832 CHARSET_LEFT_TO_RIGHT,
2833 build_string ("VISCII lower"),
2834 build_string ("VISCII lower (Vietnamese)"),
2835 build_string ("VISCII lower (Vietnamese)"),
2836 build_string ("MULEVISCII-LOWER"),
2838 Vcharset_latin_viscii_upper =
2839 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2840 CHARSET_TYPE_96, 1, 1, '2',
2841 CHARSET_LEFT_TO_RIGHT,
2842 build_string ("VISCII upper"),
2843 build_string ("VISCII upper (Vietnamese)"),
2844 build_string ("VISCII upper (Vietnamese)"),
2845 build_string ("MULEVISCII-UPPER"),
2847 Vcharset_latin_viscii =
2848 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2849 CHARSET_TYPE_256, 1, 2, 0,
2850 CHARSET_LEFT_TO_RIGHT,
2851 build_string ("VISCII"),
2852 build_string ("VISCII 1.1 (Vietnamese)"),
2853 build_string ("VISCII 1.1 (Vietnamese)"),
2854 build_string ("VISCII1\\.1"),
2856 Vcharset_ethiopic_ucs =
2857 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs,
2858 CHARSET_TYPE_256X256, 2, 2, 0,
2859 CHARSET_LEFT_TO_RIGHT,
2860 build_string ("Ethiopic (UCS)"),
2861 build_string ("Ethiopic (UCS)"),
2862 build_string ("Ethiopic of UCS"),
2863 build_string ("Ethiopic-Unicode"),
2864 Qnil, 0x1200, 0x137F, 0x1200, 0);
2865 Vcharset_hiragana_jisx0208 =
2866 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2867 CHARSET_TYPE_94X94, 2, 0, 'B',
2868 CHARSET_LEFT_TO_RIGHT,
2869 build_string ("Hiragana"),
2870 build_string ("Hiragana of JIS X0208"),
2871 build_string ("Japanese Hiragana of JIS X0208"),
2872 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2873 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2874 (0x24 - 33) * 94 + (0x21 - 33), 33);
2875 Vcharset_katakana_jisx0208 =
2876 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2877 CHARSET_TYPE_94X94, 2, 0, 'B',
2878 CHARSET_LEFT_TO_RIGHT,
2879 build_string ("Katakana"),
2880 build_string ("Katakana of JIS X0208"),
2881 build_string ("Japanese Katakana of JIS X0208"),
2882 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2883 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2884 (0x25 - 33) * 94 + (0x21 - 33), 33);
2886 Vcharset_chinese_big5_1 =
2887 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2888 CHARSET_TYPE_94X94, 2, 0, '0',
2889 CHARSET_LEFT_TO_RIGHT,
2890 build_string ("Big5"),
2891 build_string ("Big5 (Level-1)"),
2893 ("Big5 Level-1 Chinese traditional"),
2894 build_string ("big5"),
2896 Vcharset_chinese_big5_2 =
2897 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2898 CHARSET_TYPE_94X94, 2, 0, '1',
2899 CHARSET_LEFT_TO_RIGHT,
2900 build_string ("Big5"),
2901 build_string ("Big5 (Level-2)"),
2903 ("Big5 Level-2 Chinese traditional"),
2904 build_string ("big5"),
2907 #ifdef ENABLE_COMPOSITE_CHARS
2908 /* #### For simplicity, we put composite chars into a 96x96 charset.
2909 This is going to lead to problems because you can run out of
2910 room, esp. as we don't yet recycle numbers. */
2911 Vcharset_composite =
2912 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2913 CHARSET_TYPE_96X96, 2, 0, 0,
2914 CHARSET_LEFT_TO_RIGHT,
2915 build_string ("Composite"),
2916 build_string ("Composite characters"),
2917 build_string ("Composite characters"),
2920 composite_char_row_next = 32;
2921 composite_char_col_next = 32;
2923 Vcomposite_char_string2char_hash_table =
2924 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2925 Vcomposite_char_char2string_hash_table =
2926 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2927 staticpro (&Vcomposite_char_string2char_hash_table);
2928 staticpro (&Vcomposite_char_char2string_hash_table);
2929 #endif /* ENABLE_COMPOSITE_CHARS */