1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_japanese_jisx0208;
60 Lisp_Object Vcharset_japanese_jisx0208_1990;
61 Lisp_Object Vcharset_korean_ksc5601;
62 Lisp_Object Vcharset_japanese_jisx0212;
63 Lisp_Object Vcharset_chinese_cns11643_1;
64 Lisp_Object Vcharset_chinese_cns11643_2;
66 Lisp_Object Vcharset_ucs;
67 Lisp_Object Vcharset_ucs_bmp;
68 Lisp_Object Vcharset_ucs_cns;
69 Lisp_Object Vcharset_latin_viscii;
70 Lisp_Object Vcharset_latin_tcvn5712;
71 Lisp_Object Vcharset_latin_viscii_lower;
72 Lisp_Object Vcharset_latin_viscii_upper;
73 Lisp_Object Vcharset_chinese_big5;
74 Lisp_Object Vcharset_ideograph_daikanwa;
75 Lisp_Object Vcharset_mojikyo;
76 Lisp_Object Vcharset_mojikyo_2022_1;
77 Lisp_Object Vcharset_mojikyo_pj_1;
78 Lisp_Object Vcharset_mojikyo_pj_2;
79 Lisp_Object Vcharset_mojikyo_pj_3;
80 Lisp_Object Vcharset_mojikyo_pj_4;
81 Lisp_Object Vcharset_mojikyo_pj_5;
82 Lisp_Object Vcharset_mojikyo_pj_6;
83 Lisp_Object Vcharset_mojikyo_pj_7;
84 Lisp_Object Vcharset_mojikyo_pj_8;
85 Lisp_Object Vcharset_mojikyo_pj_9;
86 Lisp_Object Vcharset_mojikyo_pj_10;
87 Lisp_Object Vcharset_mojikyo_pj_11;
88 Lisp_Object Vcharset_mojikyo_pj_12;
89 Lisp_Object Vcharset_mojikyo_pj_13;
90 Lisp_Object Vcharset_mojikyo_pj_14;
91 Lisp_Object Vcharset_mojikyo_pj_15;
92 Lisp_Object Vcharset_mojikyo_pj_16;
93 Lisp_Object Vcharset_mojikyo_pj_17;
94 Lisp_Object Vcharset_mojikyo_pj_18;
95 Lisp_Object Vcharset_mojikyo_pj_19;
96 Lisp_Object Vcharset_mojikyo_pj_20;
97 Lisp_Object Vcharset_mojikyo_pj_21;
98 Lisp_Object Vcharset_ethiopic_ucs;
100 Lisp_Object Vcharset_chinese_big5_1;
101 Lisp_Object Vcharset_chinese_big5_2;
103 #ifdef ENABLE_COMPOSITE_CHARS
104 Lisp_Object Vcharset_composite;
106 /* Hash tables for composite chars. One maps string representing
107 composed chars to their equivalent chars; one goes the
109 Lisp_Object Vcomposite_char_char2string_hash_table;
110 Lisp_Object Vcomposite_char_string2char_hash_table;
112 static int composite_char_row_next;
113 static int composite_char_col_next;
115 #endif /* ENABLE_COMPOSITE_CHARS */
117 struct charset_lookup *chlook;
119 static const struct lrecord_description charset_lookup_description_1[] = {
120 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
129 static const struct struct_description charset_lookup_description = {
130 sizeof (struct charset_lookup),
131 charset_lookup_description_1
135 /* Table of number of bytes in the string representation of a character
136 indexed by the first byte of that representation.
138 rep_bytes_by_first_byte(c) is more efficient than the equivalent
139 canonical computation:
141 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
143 const Bytecount rep_bytes_by_first_byte[0xA0] =
144 { /* 0x00 - 0x7f are for straight ASCII */
145 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
147 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
148 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
149 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
150 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
151 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
152 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
153 /* 0x80 - 0x8f are for Dimension-1 official charsets */
155 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
157 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
159 /* 0x90 - 0x9d are for Dimension-2 official charsets */
160 /* 0x9e is for Dimension-1 private charsets */
161 /* 0x9f is for Dimension-2 private charsets */
162 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
168 #define BT_UINT8_MIN 0
169 #define BT_UINT8_MAX (UCHAR_MAX - 3)
170 #define BT_UINT8_t (UCHAR_MAX - 2)
171 #define BT_UINT8_nil (UCHAR_MAX - 1)
172 #define BT_UINT8_unbound UCHAR_MAX
174 INLINE_HEADER int INT_UINT8_P (Lisp_Object obj);
175 INLINE_HEADER int UINT8_VALUE_P (Lisp_Object obj);
176 INLINE_HEADER unsigned char UINT8_ENCODE (Lisp_Object obj);
177 INLINE_HEADER Lisp_Object UINT8_DECODE (unsigned char n);
178 INLINE_HEADER unsigned short UINT8_TO_UINT16 (unsigned char n);
181 INT_UINT8_P (Lisp_Object obj)
185 int num = XINT (obj);
187 return (BT_UINT8_MIN <= num) && (num <= BT_UINT8_MAX);
194 UINT8_VALUE_P (Lisp_Object obj)
196 return EQ (obj, Qunbound)
197 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT8_P (obj);
200 INLINE_HEADER unsigned char
201 UINT8_ENCODE (Lisp_Object obj)
203 if (EQ (obj, Qunbound))
204 return BT_UINT8_unbound;
205 else if (EQ (obj, Qnil))
207 else if (EQ (obj, Qt))
213 INLINE_HEADER Lisp_Object
214 UINT8_DECODE (unsigned char n)
216 if (n == BT_UINT8_unbound)
218 else if (n == BT_UINT8_nil)
220 else if (n == BT_UINT8_t)
227 mark_uint8_byte_table (Lisp_Object obj)
233 print_uint8_byte_table (Lisp_Object obj,
234 Lisp_Object printcharfun, int escapeflag)
236 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
238 struct gcpro gcpro1, gcpro2;
239 GCPRO2 (obj, printcharfun);
241 write_c_string ("\n#<uint8-byte-table", printcharfun);
242 for (i = 0; i < 256; i++)
244 unsigned char n = bte->property[i];
246 write_c_string ("\n ", printcharfun);
247 write_c_string (" ", printcharfun);
248 if (n == BT_UINT8_unbound)
249 write_c_string ("void", printcharfun);
250 else if (n == BT_UINT8_nil)
251 write_c_string ("nil", printcharfun);
252 else if (n == BT_UINT8_t)
253 write_c_string ("t", printcharfun);
258 sprintf (buf, "%hd", n);
259 write_c_string (buf, printcharfun);
263 write_c_string (">", printcharfun);
267 uint8_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
269 Lisp_Uint8_Byte_Table *te1 = XUINT8_BYTE_TABLE (obj1);
270 Lisp_Uint8_Byte_Table *te2 = XUINT8_BYTE_TABLE (obj2);
273 for (i = 0; i < 256; i++)
274 if (te1->property[i] != te2->property[i])
280 uint8_byte_table_hash (Lisp_Object obj, int depth)
282 Lisp_Uint8_Byte_Table *te = XUINT8_BYTE_TABLE (obj);
286 for (i = 0; i < 256; i++)
287 hash = HASH2 (hash, te->property[i]);
291 DEFINE_LRECORD_IMPLEMENTATION ("uint8-byte-table", uint8_byte_table,
292 mark_uint8_byte_table,
293 print_uint8_byte_table,
294 0, uint8_byte_table_equal,
295 uint8_byte_table_hash,
296 0 /* uint8_byte_table_description */,
297 Lisp_Uint8_Byte_Table);
300 make_uint8_byte_table (unsigned char initval)
304 Lisp_Uint8_Byte_Table *cte;
306 cte = alloc_lcrecord_type (Lisp_Uint8_Byte_Table,
307 &lrecord_uint8_byte_table);
309 for (i = 0; i < 256; i++)
310 cte->property[i] = initval;
312 XSETUINT8_BYTE_TABLE (obj, cte);
317 uint8_byte_table_same_value_p (Lisp_Object obj)
319 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
320 unsigned char v0 = bte->property[0];
323 for (i = 1; i < 256; i++)
325 if (bte->property[i] != v0)
332 #define BT_UINT16_MIN 0
333 #define BT_UINT16_MAX (USHRT_MAX - 3)
334 #define BT_UINT16_t (USHRT_MAX - 2)
335 #define BT_UINT16_nil (USHRT_MAX - 1)
336 #define BT_UINT16_unbound USHRT_MAX
338 INLINE_HEADER int INT_UINT16_P (Lisp_Object obj);
339 INLINE_HEADER int UINT16_VALUE_P (Lisp_Object obj);
340 INLINE_HEADER unsigned short UINT16_ENCODE (Lisp_Object obj);
341 INLINE_HEADER Lisp_Object UINT16_DECODE (unsigned short us);
344 INT_UINT16_P (Lisp_Object obj)
348 int num = XINT (obj);
350 return (BT_UINT16_MIN <= num) && (num <= BT_UINT16_MAX);
357 UINT16_VALUE_P (Lisp_Object obj)
359 return EQ (obj, Qunbound)
360 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT16_P (obj);
363 INLINE_HEADER unsigned short
364 UINT16_ENCODE (Lisp_Object obj)
366 if (EQ (obj, Qunbound))
367 return BT_UINT16_unbound;
368 else if (EQ (obj, Qnil))
369 return BT_UINT16_nil;
370 else if (EQ (obj, Qt))
376 INLINE_HEADER Lisp_Object
377 UINT16_DECODE (unsigned short n)
379 if (n == BT_UINT16_unbound)
381 else if (n == BT_UINT16_nil)
383 else if (n == BT_UINT16_t)
389 INLINE_HEADER unsigned short
390 UINT8_TO_UINT16 (unsigned char n)
392 if (n == BT_UINT8_unbound)
393 return BT_UINT16_unbound;
394 else if (n == BT_UINT8_nil)
395 return BT_UINT16_nil;
396 else if (n == BT_UINT8_t)
403 mark_uint16_byte_table (Lisp_Object obj)
409 print_uint16_byte_table (Lisp_Object obj,
410 Lisp_Object printcharfun, int escapeflag)
412 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
414 struct gcpro gcpro1, gcpro2;
415 GCPRO2 (obj, printcharfun);
417 write_c_string ("\n#<uint16-byte-table", printcharfun);
418 for (i = 0; i < 256; i++)
420 unsigned short n = bte->property[i];
422 write_c_string ("\n ", printcharfun);
423 write_c_string (" ", printcharfun);
424 if (n == BT_UINT16_unbound)
425 write_c_string ("void", printcharfun);
426 else if (n == BT_UINT16_nil)
427 write_c_string ("nil", printcharfun);
428 else if (n == BT_UINT16_t)
429 write_c_string ("t", printcharfun);
434 sprintf (buf, "%hd", n);
435 write_c_string (buf, printcharfun);
439 write_c_string (">", printcharfun);
443 uint16_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
445 Lisp_Uint16_Byte_Table *te1 = XUINT16_BYTE_TABLE (obj1);
446 Lisp_Uint16_Byte_Table *te2 = XUINT16_BYTE_TABLE (obj2);
449 for (i = 0; i < 256; i++)
450 if (te1->property[i] != te2->property[i])
456 uint16_byte_table_hash (Lisp_Object obj, int depth)
458 Lisp_Uint16_Byte_Table *te = XUINT16_BYTE_TABLE (obj);
462 for (i = 0; i < 256; i++)
463 hash = HASH2 (hash, te->property[i]);
467 DEFINE_LRECORD_IMPLEMENTATION ("uint16-byte-table", uint16_byte_table,
468 mark_uint16_byte_table,
469 print_uint16_byte_table,
470 0, uint16_byte_table_equal,
471 uint16_byte_table_hash,
472 0 /* uint16_byte_table_description */,
473 Lisp_Uint16_Byte_Table);
476 make_uint16_byte_table (unsigned short initval)
480 Lisp_Uint16_Byte_Table *cte;
482 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
483 &lrecord_uint16_byte_table);
485 for (i = 0; i < 256; i++)
486 cte->property[i] = initval;
488 XSETUINT16_BYTE_TABLE (obj, cte);
493 expand_uint8_byte_table_to_uint16 (Lisp_Object table)
497 Lisp_Uint8_Byte_Table* bte = XUINT8_BYTE_TABLE(table);
498 Lisp_Uint16_Byte_Table* cte;
500 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
501 &lrecord_uint16_byte_table);
502 for (i = 0; i < 256; i++)
504 cte->property[i] = UINT8_TO_UINT16 (bte->property[i]);
506 XSETUINT16_BYTE_TABLE (obj, cte);
511 uint16_byte_table_same_value_p (Lisp_Object obj)
513 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
514 unsigned short v0 = bte->property[0];
517 for (i = 1; i < 256; i++)
519 if (bte->property[i] != v0)
527 mark_byte_table (Lisp_Object obj)
529 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
532 for (i = 0; i < 256; i++)
534 mark_object (cte->property[i]);
540 print_byte_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
542 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
544 struct gcpro gcpro1, gcpro2;
545 GCPRO2 (obj, printcharfun);
547 write_c_string ("\n#<byte-table", printcharfun);
548 for (i = 0; i < 256; i++)
550 Lisp_Object elt = bte->property[i];
552 write_c_string ("\n ", printcharfun);
553 write_c_string (" ", printcharfun);
554 if (EQ (elt, Qunbound))
555 write_c_string ("void", printcharfun);
557 print_internal (elt, printcharfun, escapeflag);
560 write_c_string (">", printcharfun);
564 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
566 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
567 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
570 for (i = 0; i < 256; i++)
571 if (BYTE_TABLE_P (cte1->property[i]))
573 if (BYTE_TABLE_P (cte2->property[i]))
575 if (!byte_table_equal (cte1->property[i],
576 cte2->property[i], depth + 1))
583 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
589 byte_table_hash (Lisp_Object obj, int depth)
591 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
593 return internal_array_hash (cte->property, 256, depth);
596 static const struct lrecord_description byte_table_description[] = {
597 { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Byte_Table, property), 256 },
601 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
606 byte_table_description,
610 make_byte_table (Lisp_Object initval)
614 Lisp_Byte_Table *cte;
616 cte = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
618 for (i = 0; i < 256; i++)
619 cte->property[i] = initval;
621 XSETBYTE_TABLE (obj, cte);
626 byte_table_same_value_p (Lisp_Object obj)
628 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
629 Lisp_Object v0 = bte->property[0];
632 for (i = 1; i < 256; i++)
634 if (!internal_equal (bte->property[i], v0, 0))
641 Lisp_Object get_byte_table (Lisp_Object table, unsigned char idx);
642 Lisp_Object put_byte_table (Lisp_Object table, unsigned char idx,
646 get_byte_table (Lisp_Object table, unsigned char idx)
648 if (UINT8_BYTE_TABLE_P (table))
649 return UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[idx]);
650 else if (UINT16_BYTE_TABLE_P (table))
651 return UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[idx]);
652 else if (BYTE_TABLE_P (table))
653 return XBYTE_TABLE(table)->property[idx];
659 put_byte_table (Lisp_Object table, unsigned char idx, Lisp_Object value)
661 if (UINT8_BYTE_TABLE_P (table))
663 if (UINT8_VALUE_P (value))
665 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
666 if (!UINT8_BYTE_TABLE_P (value) &&
667 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
668 && uint8_byte_table_same_value_p (table))
673 else if (UINT16_VALUE_P (value))
675 Lisp_Object new = expand_uint8_byte_table_to_uint16 (table);
677 XUINT16_BYTE_TABLE(new)->property[idx] = UINT16_ENCODE (value);
682 Lisp_Object new = make_byte_table (Qnil);
685 for (i = 0; i < 256; i++)
687 XBYTE_TABLE(new)->property[i]
688 = UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[i]);
690 XBYTE_TABLE(new)->property[idx] = value;
694 else if (UINT16_BYTE_TABLE_P (table))
696 if (UINT16_VALUE_P (value))
698 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
699 if (!UINT8_BYTE_TABLE_P (value) &&
700 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
701 && uint16_byte_table_same_value_p (table))
708 Lisp_Object new = make_byte_table (Qnil);
711 for (i = 0; i < 256; i++)
713 XBYTE_TABLE(new)->property[i]
714 = UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[i]);
716 XBYTE_TABLE(new)->property[idx] = value;
720 else if (BYTE_TABLE_P (table))
722 XBYTE_TABLE(table)->property[idx] = value;
723 if (!UINT8_BYTE_TABLE_P (value) &&
724 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
725 && byte_table_same_value_p (table))
730 else if (!internal_equal (table, value, 0))
732 if (UINT8_VALUE_P (table) && UINT8_VALUE_P (value))
734 table = make_uint8_byte_table (UINT8_ENCODE (table));
735 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
737 else if (UINT16_VALUE_P (table) && UINT16_VALUE_P (value))
739 table = make_uint16_byte_table (UINT16_ENCODE (table));
740 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
744 table = make_byte_table (table);
745 XBYTE_TABLE(table)->property[idx] = value;
752 mark_char_id_table (Lisp_Object obj)
754 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
760 print_char_id_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
762 Lisp_Object table = XCHAR_ID_TABLE (obj)->table;
764 struct gcpro gcpro1, gcpro2;
765 GCPRO2 (obj, printcharfun);
767 write_c_string ("#<char-id-table ", printcharfun);
768 for (i = 0; i < 256; i++)
770 Lisp_Object elt = get_byte_table (table, i);
771 if (i != 0) write_c_string ("\n ", printcharfun);
772 if (EQ (elt, Qunbound))
773 write_c_string ("void", printcharfun);
775 print_internal (elt, printcharfun, escapeflag);
778 write_c_string (">", printcharfun);
782 char_id_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
784 Lisp_Object table1 = XCHAR_ID_TABLE (obj1)->table;
785 Lisp_Object table2 = XCHAR_ID_TABLE (obj2)->table;
788 for (i = 0; i < 256; i++)
790 if (!internal_equal (get_byte_table (table1, i),
791 get_byte_table (table2, i), 0))
798 char_id_table_hash (Lisp_Object obj, int depth)
800 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
802 return char_id_table_hash (cte->table, depth + 1);
805 static const struct lrecord_description char_id_table_description[] = {
806 { XD_LISP_OBJECT, offsetof(Lisp_Char_ID_Table, table) },
810 DEFINE_LRECORD_IMPLEMENTATION ("char-id-table", char_id_table,
813 0, char_id_table_equal,
815 char_id_table_description,
819 make_char_id_table (Lisp_Object initval)
822 Lisp_Char_ID_Table *cte;
824 cte = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
826 cte->table = make_byte_table (initval);
828 XSETCHAR_ID_TABLE (obj, cte);
834 get_char_id_table (Emchar ch, Lisp_Object table)
836 unsigned int code = ch;
843 (XCHAR_ID_TABLE (table)->table,
844 (unsigned char)(code >> 24)),
845 (unsigned char) (code >> 16)),
846 (unsigned char) (code >> 8)),
847 (unsigned char) code);
850 void put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table);
852 put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table)
854 unsigned int code = ch;
855 Lisp_Object table1, table2, table3, table4;
857 table1 = XCHAR_ID_TABLE (table)->table;
858 table2 = get_byte_table (table1, (unsigned char)(code >> 24));
859 table3 = get_byte_table (table2, (unsigned char)(code >> 16));
860 table4 = get_byte_table (table3, (unsigned char)(code >> 8));
862 table4 = put_byte_table (table4, (unsigned char)code, value);
863 table3 = put_byte_table (table3, (unsigned char)(code >> 8), table4);
864 table2 = put_byte_table (table2, (unsigned char)(code >> 16), table3);
865 XCHAR_ID_TABLE (table)->table
866 = put_byte_table (table1, (unsigned char)(code >> 24), table2);
870 Lisp_Object Vchar_attribute_hash_table;
871 Lisp_Object Vcharacter_composition_table;
872 Lisp_Object Vcharacter_variant_table;
874 Lisp_Object Qideograph_daikanwa;
875 Lisp_Object Q_decomposition;
879 Lisp_Object Qisolated;
880 Lisp_Object Qinitial;
883 Lisp_Object Qvertical;
884 Lisp_Object QnoBreak;
885 Lisp_Object Qfraction;
895 Emchar to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
897 Lisp_Object put_char_ccs_code_point (Lisp_Object character,
898 Lisp_Object ccs, Lisp_Object value);
899 Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
902 to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
908 else if (EQ (v, Qcompat))
910 else if (EQ (v, Qisolated))
912 else if (EQ (v, Qinitial))
914 else if (EQ (v, Qmedial))
916 else if (EQ (v, Qfinal))
918 else if (EQ (v, Qvertical))
920 else if (EQ (v, QnoBreak))
922 else if (EQ (v, Qfraction))
924 else if (EQ (v, Qsuper))
926 else if (EQ (v, Qsub))
928 else if (EQ (v, Qcircle))
930 else if (EQ (v, Qsquare))
932 else if (EQ (v, Qwide))
934 else if (EQ (v, Qnarrow))
936 else if (EQ (v, Qsmall))
938 else if (EQ (v, Qfont))
941 signal_simple_error (err_msg, err_arg);
944 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
945 Return character corresponding with list.
949 Lisp_Object table = Vcharacter_composition_table;
950 Lisp_Object rest = list;
954 Lisp_Object v = Fcar (rest);
956 Emchar c = to_char_id (v, "Invalid value for composition", list);
958 ret = get_char_id_table (c, table);
963 if (!CHAR_ID_TABLE_P (ret))
968 else if (!CONSP (rest))
970 else if (CHAR_ID_TABLE_P (ret))
973 signal_simple_error ("Invalid table is found with", list);
975 signal_simple_error ("Invalid value for composition", list);
978 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
979 Return variants of CHARACTER.
983 CHECK_CHAR (character);
984 return Fcopy_list (get_char_id_table (XCHAR (character),
985 Vcharacter_variant_table));
989 /* We store the char-attributes in hash tables with the names as the
990 key and the actual char-id-table object as the value. Occasionally
991 we need to use them in a list format. These routines provide us
993 struct char_attribute_list_closure
995 Lisp_Object *char_attribute_list;
999 add_char_attribute_to_list_mapper (Lisp_Object key, Lisp_Object value,
1000 void *char_attribute_list_closure)
1002 /* This function can GC */
1003 struct char_attribute_list_closure *calcl
1004 = (struct char_attribute_list_closure*) char_attribute_list_closure;
1005 Lisp_Object *char_attribute_list = calcl->char_attribute_list;
1007 *char_attribute_list = Fcons (key, *char_attribute_list);
1011 DEFUN ("char-attribute-list", Fchar_attribute_list, 0, 0, 0, /*
1012 Return the list of all existing character attributes except coded-charsets.
1016 Lisp_Object char_attribute_list = Qnil;
1017 struct gcpro gcpro1;
1018 struct char_attribute_list_closure char_attribute_list_closure;
1020 GCPRO1 (char_attribute_list);
1021 char_attribute_list_closure.char_attribute_list = &char_attribute_list;
1022 elisp_maphash (add_char_attribute_to_list_mapper,
1023 Vchar_attribute_hash_table,
1024 &char_attribute_list_closure);
1026 return char_attribute_list;
1029 DEFUN ("find-char-attribute-table", Ffind_char_attribute_table, 1, 1, 0, /*
1030 Return char-id-table corresponding to ATTRIBUTE.
1034 return Fgethash (attribute, Vchar_attribute_hash_table, Qnil);
1038 /* We store the char-id-tables in hash tables with the attributes as
1039 the key and the actual char-id-table object as the value. Each
1040 char-id-table stores values of an attribute corresponding with
1041 characters. Occasionally we need to get attributes of a character
1042 in a association-list format. These routines provide us with
1044 struct char_attribute_alist_closure
1047 Lisp_Object *char_attribute_alist;
1051 add_char_attribute_alist_mapper (Lisp_Object key, Lisp_Object value,
1052 void *char_attribute_alist_closure)
1054 /* This function can GC */
1055 struct char_attribute_alist_closure *caacl =
1056 (struct char_attribute_alist_closure*) char_attribute_alist_closure;
1057 Lisp_Object ret = get_char_id_table (caacl->char_id, value);
1058 if (!UNBOUNDP (ret))
1060 Lisp_Object *char_attribute_alist = caacl->char_attribute_alist;
1061 *char_attribute_alist
1062 = Fcons (Fcons (key, ret), *char_attribute_alist);
1067 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
1068 Return the alist of attributes of CHARACTER.
1072 Lisp_Object alist = Qnil;
1075 CHECK_CHAR (character);
1077 struct gcpro gcpro1;
1078 struct char_attribute_alist_closure char_attribute_alist_closure;
1081 char_attribute_alist_closure.char_id = XCHAR (character);
1082 char_attribute_alist_closure.char_attribute_alist = &alist;
1083 elisp_maphash (add_char_attribute_alist_mapper,
1084 Vchar_attribute_hash_table,
1085 &char_attribute_alist_closure);
1089 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1091 Lisp_Object ccs = chlook->charset_by_leading_byte[i];
1095 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1098 if ( CHAR_ID_TABLE_P (encoding_table)
1099 && INTP (cpos = get_char_id_table (XCHAR (character),
1102 alist = Fcons (Fcons (ccs, cpos), alist);
1109 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 3, 0, /*
1110 Return the value of CHARACTER's ATTRIBUTE.
1111 Return DEFAULT-VALUE if the value is not exist.
1113 (character, attribute, default_value))
1117 CHECK_CHAR (character);
1118 if (!NILP (ccs = Ffind_charset (attribute)))
1120 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1122 if (CHAR_ID_TABLE_P (encoding_table))
1123 return get_char_id_table (XCHAR (character), encoding_table);
1127 Lisp_Object table = Fgethash (attribute,
1128 Vchar_attribute_hash_table,
1130 if (!UNBOUNDP (table))
1132 Lisp_Object ret = get_char_id_table (XCHAR (character), table);
1133 if (!UNBOUNDP (ret))
1137 return default_value;
1140 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
1141 Store CHARACTER's ATTRIBUTE with VALUE.
1143 (character, attribute, value))
1147 CHECK_CHAR (character);
1148 ccs = Ffind_charset (attribute);
1151 return put_char_ccs_code_point (character, ccs, value);
1153 else if (EQ (attribute, Q_decomposition))
1158 signal_simple_error ("Invalid value for ->decomposition",
1161 if (CONSP (Fcdr (value)))
1163 Lisp_Object rest = value;
1164 Lisp_Object table = Vcharacter_composition_table;
1168 GET_EXTERNAL_LIST_LENGTH (rest, len);
1169 seq = make_vector (len, Qnil);
1171 while (CONSP (rest))
1173 Lisp_Object v = Fcar (rest);
1176 = to_char_id (v, "Invalid value for ->decomposition", value);
1179 XVECTOR_DATA(seq)[i++] = v;
1181 XVECTOR_DATA(seq)[i++] = make_char (c);
1185 put_char_id_table (c, character, table);
1190 ntable = get_char_id_table (c, table);
1191 if (!CHAR_ID_TABLE_P (ntable))
1193 ntable = make_char_id_table (Qnil);
1194 put_char_id_table (c, ntable, table);
1202 Lisp_Object v = Fcar (value);
1206 Emchar c = XINT (v);
1208 = get_char_id_table (c, Vcharacter_variant_table);
1210 if (NILP (Fmemq (v, ret)))
1212 put_char_id_table (c, Fcons (character, ret),
1213 Vcharacter_variant_table);
1216 seq = make_vector (1, v);
1220 else if (EQ (attribute, Q_ucs))
1226 signal_simple_error ("Invalid value for ->ucs", value);
1230 ret = get_char_id_table (c, Vcharacter_variant_table);
1231 if (NILP (Fmemq (character, ret)))
1233 put_char_id_table (c, Fcons (character, ret),
1234 Vcharacter_variant_table);
1238 Lisp_Object table = Fgethash (attribute,
1239 Vchar_attribute_hash_table,
1244 table = make_char_id_table (Qunbound);
1245 Fputhash (attribute, table, Vchar_attribute_hash_table);
1247 put_char_id_table (XCHAR (character), value, table);
1252 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
1253 Remove CHARACTER's ATTRIBUTE.
1255 (character, attribute))
1259 CHECK_CHAR (character);
1260 ccs = Ffind_charset (attribute);
1263 return remove_char_ccs (character, ccs);
1267 Lisp_Object table = Fgethash (attribute,
1268 Vchar_attribute_hash_table,
1270 if (!UNBOUNDP (table))
1272 put_char_id_table (XCHAR (character), Qunbound, table);
1279 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
1281 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
1283 /* ad-hoc method for `ascii' */
1284 if ((CHARSET_CHARS (cs) == 94) &&
1285 (CHARSET_BYTE_OFFSET (cs) != 33))
1286 return 128 - CHARSET_BYTE_OFFSET (cs);
1288 return CHARSET_CHARS (cs);
1291 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
1293 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
1295 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
1299 if (XVECTOR_LENGTH (v) > ccs_len)
1302 for (i = 0; i < XVECTOR_LENGTH (v); i++)
1304 Lisp_Object c = XVECTOR_DATA(v)[i];
1306 if (!NILP (c) && !CHARP (c))
1310 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
1322 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1325 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1335 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1336 nv = XVECTOR_DATA(v)[i];
1342 XVECTOR_DATA(v)[i] = Qnil;
1346 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1347 int code_point, Lisp_Object character);
1349 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1350 int code_point, Lisp_Object character)
1354 int ccs_len = XVECTOR_LENGTH (v);
1359 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1360 nv = XVECTOR_DATA(v)[i];
1364 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
1370 XVECTOR_DATA(v)[i] = character;
1374 put_char_ccs_code_point (Lisp_Object character,
1375 Lisp_Object ccs, Lisp_Object value)
1377 Lisp_Object encoding_table;
1379 if (!EQ (XCHARSET_NAME (ccs), Qucs)
1380 || (XCHAR (character) != XINT (value)))
1382 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
1383 int dim = XCHARSET_DIMENSION (ccs);
1384 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
1385 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
1389 { /* obsolete representation: value must be a list of bytes */
1390 Lisp_Object ret = Fcar (value);
1394 signal_simple_error ("Invalid value for coded-charset", value);
1395 code_point = XINT (ret);
1396 if (XCHARSET_GRAPHIC (ccs) == 1)
1398 rest = Fcdr (value);
1399 while (!NILP (rest))
1404 signal_simple_error ("Invalid value for coded-charset",
1408 signal_simple_error ("Invalid value for coded-charset",
1411 if (XCHARSET_GRAPHIC (ccs) == 1)
1413 code_point = (code_point << 8) | j;
1416 value = make_int (code_point);
1418 else if (INTP (value))
1420 code_point = XINT (value);
1421 if (XCHARSET_GRAPHIC (ccs) == 1)
1423 code_point &= 0x7F7F7F7F;
1424 value = make_int (code_point);
1428 signal_simple_error ("Invalid value for coded-charset", value);
1432 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1435 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
1440 XCHARSET_DECODING_TABLE (ccs)
1441 = v = make_older_vector (ccs_len, Qnil);
1444 decoding_table_put_char (v, dim, byte_offset, code_point, character);
1446 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
1448 XCHARSET_ENCODING_TABLE (ccs)
1449 = encoding_table = make_char_id_table (Qnil);
1451 put_char_id_table (XCHAR (character), value, encoding_table);
1456 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
1458 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1459 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1461 if (VECTORP (decoding_table))
1463 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1467 decoding_table_remove_char (decoding_table,
1468 XCHARSET_DIMENSION (ccs),
1469 XCHARSET_BYTE_OFFSET (ccs),
1473 if (CHAR_ID_TABLE_P (encoding_table))
1475 put_char_id_table (XCHAR (character), Qnil, encoding_table);
1480 EXFUN (Fmake_char, 3);
1481 EXFUN (Fdecode_char, 2);
1483 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
1484 Store character's ATTRIBUTES.
1488 Lisp_Object rest = attributes;
1489 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
1490 Lisp_Object character;
1494 while (CONSP (rest))
1496 Lisp_Object cell = Fcar (rest);
1500 signal_simple_error ("Invalid argument", attributes);
1501 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
1502 && ((XCHARSET_FINAL (ccs) != 0) ||
1503 (XCHARSET_UCS_MAX (ccs) > 0)) )
1507 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1509 character = Fdecode_char (ccs, cell);
1510 goto setup_attributes;
1514 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
1517 signal_simple_error ("Invalid argument", attributes);
1519 character = make_char (XINT (code) + 0x100000);
1520 goto setup_attributes;
1524 else if (!INTP (code))
1525 signal_simple_error ("Invalid argument", attributes);
1527 character = make_char (XINT (code));
1531 while (CONSP (rest))
1533 Lisp_Object cell = Fcar (rest);
1536 signal_simple_error ("Invalid argument", attributes);
1538 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
1544 Lisp_Object Vutf_2000_version;
1548 int leading_code_private_11;
1551 Lisp_Object Qcharsetp;
1553 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1554 Lisp_Object Qregistry, Qfinal, Qgraphic;
1555 Lisp_Object Qdirection;
1556 Lisp_Object Qreverse_direction_charset;
1557 Lisp_Object Qleading_byte;
1558 Lisp_Object Qshort_name, Qlong_name;
1572 Qcyrillic_iso8859_5,
1574 Qjapanese_jisx0208_1978,
1577 Qjapanese_jisx0208_1990,
1580 Qchinese_cns11643_1,
1581 Qchinese_cns11643_2,
1587 Qlatin_viscii_lower,
1588 Qlatin_viscii_upper,
1589 Qvietnamese_viscii_lower,
1590 Qvietnamese_viscii_upper,
1621 Lisp_Object Ql2r, Qr2l;
1623 Lisp_Object Vcharset_hash_table;
1625 /* Composite characters are characters constructed by overstriking two
1626 or more regular characters.
1628 1) The old Mule implementation involves storing composite characters
1629 in a buffer as a tag followed by all of the actual characters
1630 used to make up the composite character. I think this is a bad
1631 idea; it greatly complicates code that wants to handle strings
1632 one character at a time because it has to deal with the possibility
1633 of great big ungainly characters. It's much more reasonable to
1634 simply store an index into a table of composite characters.
1636 2) The current implementation only allows for 16,384 separate
1637 composite characters over the lifetime of the XEmacs process.
1638 This could become a potential problem if the user
1639 edited lots of different files that use composite characters.
1640 Due to FSF bogosity, increasing the number of allowable
1641 composite characters under Mule would decrease the number
1642 of possible faces that can exist. Mule already has shrunk
1643 this to 2048, and further shrinkage would become uncomfortable.
1644 No such problems exist in XEmacs.
1646 Composite characters could be represented as 0x80 C1 C2 C3,
1647 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1648 for slightly under 2^20 (one million) composite characters
1649 over the XEmacs process lifetime, and you only need to
1650 increase the size of a Mule character from 19 to 21 bits.
1651 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1652 85 million (slightly over 2^26) composite characters. */
1655 /************************************************************************/
1656 /* Basic Emchar functions */
1657 /************************************************************************/
1659 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1660 string in STR. Returns the number of bytes stored.
1661 Do not call this directly. Use the macro set_charptr_emchar() instead.
1665 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1671 Lisp_Object charset;
1680 else if ( c <= 0x7ff )
1682 *p++ = (c >> 6) | 0xc0;
1683 *p++ = (c & 0x3f) | 0x80;
1685 else if ( c <= 0xffff )
1687 *p++ = (c >> 12) | 0xe0;
1688 *p++ = ((c >> 6) & 0x3f) | 0x80;
1689 *p++ = (c & 0x3f) | 0x80;
1691 else if ( c <= 0x1fffff )
1693 *p++ = (c >> 18) | 0xf0;
1694 *p++ = ((c >> 12) & 0x3f) | 0x80;
1695 *p++ = ((c >> 6) & 0x3f) | 0x80;
1696 *p++ = (c & 0x3f) | 0x80;
1698 else if ( c <= 0x3ffffff )
1700 *p++ = (c >> 24) | 0xf8;
1701 *p++ = ((c >> 18) & 0x3f) | 0x80;
1702 *p++ = ((c >> 12) & 0x3f) | 0x80;
1703 *p++ = ((c >> 6) & 0x3f) | 0x80;
1704 *p++ = (c & 0x3f) | 0x80;
1708 *p++ = (c >> 30) | 0xfc;
1709 *p++ = ((c >> 24) & 0x3f) | 0x80;
1710 *p++ = ((c >> 18) & 0x3f) | 0x80;
1711 *p++ = ((c >> 12) & 0x3f) | 0x80;
1712 *p++ = ((c >> 6) & 0x3f) | 0x80;
1713 *p++ = (c & 0x3f) | 0x80;
1716 BREAKUP_CHAR (c, charset, c1, c2);
1717 lb = CHAR_LEADING_BYTE (c);
1718 if (LEADING_BYTE_PRIVATE_P (lb))
1719 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1721 if (EQ (charset, Vcharset_control_1))
1730 /* Return the first character from a Mule-encoded string in STR,
1731 assuming it's non-ASCII. Do not call this directly.
1732 Use the macro charptr_emchar() instead. */
1735 non_ascii_charptr_emchar (const Bufbyte *str)
1748 else if ( b >= 0xf8 )
1753 else if ( b >= 0xf0 )
1758 else if ( b >= 0xe0 )
1763 else if ( b >= 0xc0 )
1773 for( ; len > 0; len-- )
1776 ch = ( ch << 6 ) | ( b & 0x3f );
1780 Bufbyte i0 = *str, i1, i2 = 0;
1781 Lisp_Object charset;
1783 if (i0 == LEADING_BYTE_CONTROL_1)
1784 return (Emchar) (*++str - 0x20);
1786 if (LEADING_BYTE_PREFIX_P (i0))
1791 charset = CHARSET_BY_LEADING_BYTE (i0);
1792 if (XCHARSET_DIMENSION (charset) == 2)
1795 return MAKE_CHAR (charset, i1, i2);
1799 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1800 Do not call this directly. Use the macro valid_char_p() instead. */
1804 non_ascii_valid_char_p (Emchar ch)
1808 /* Must have only lowest 19 bits set */
1812 f1 = CHAR_FIELD1 (ch);
1813 f2 = CHAR_FIELD2 (ch);
1814 f3 = CHAR_FIELD3 (ch);
1818 Lisp_Object charset;
1820 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1821 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1822 f2 > MAX_CHAR_FIELD2_PRIVATE)
1827 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1828 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1832 NOTE: This takes advantage of the fact that
1833 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1834 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1836 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1837 if (EQ (charset, Qnil))
1839 return (XCHARSET_CHARS (charset) == 96);
1843 Lisp_Object charset;
1845 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1846 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1847 f1 > MAX_CHAR_FIELD1_PRIVATE)
1849 if (f2 < 0x20 || f3 < 0x20)
1852 #ifdef ENABLE_COMPOSITE_CHARS
1853 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1855 if (UNBOUNDP (Fgethash (make_int (ch),
1856 Vcomposite_char_char2string_hash_table,
1861 #endif /* ENABLE_COMPOSITE_CHARS */
1863 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1864 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1867 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1869 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1872 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1874 if (EQ (charset, Qnil))
1876 return (XCHARSET_CHARS (charset) == 96);
1882 /************************************************************************/
1883 /* Basic string functions */
1884 /************************************************************************/
1886 /* Copy the character pointed to by SRC into DST. Do not call this
1887 directly. Use the macro charptr_copy_char() instead.
1888 Return the number of bytes copied. */
1891 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
1893 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
1895 for (i = bytes; i; i--, dst++, src++)
1901 /************************************************************************/
1902 /* streams of Emchars */
1903 /************************************************************************/
1905 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1906 The functions below are not meant to be called directly; use
1907 the macros in insdel.h. */
1910 Lstream_get_emchar_1 (Lstream *stream, int ch)
1912 Bufbyte str[MAX_EMCHAR_LEN];
1913 Bufbyte *strptr = str;
1916 str[0] = (Bufbyte) ch;
1918 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
1920 int c = Lstream_getc (stream);
1921 bufpos_checking_assert (c >= 0);
1922 *++strptr = (Bufbyte) c;
1924 return charptr_emchar (str);
1928 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1930 Bufbyte str[MAX_EMCHAR_LEN];
1931 Bytecount len = set_charptr_emchar (str, ch);
1932 return Lstream_write (stream, str, len);
1936 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1938 Bufbyte str[MAX_EMCHAR_LEN];
1939 Bytecount len = set_charptr_emchar (str, ch);
1940 Lstream_unread (stream, str, len);
1944 /************************************************************************/
1945 /* charset object */
1946 /************************************************************************/
1949 mark_charset (Lisp_Object obj)
1951 Lisp_Charset *cs = XCHARSET (obj);
1953 mark_object (cs->short_name);
1954 mark_object (cs->long_name);
1955 mark_object (cs->doc_string);
1956 mark_object (cs->registry);
1957 mark_object (cs->ccl_program);
1959 mark_object (cs->encoding_table);
1960 /* mark_object (cs->decoding_table); */
1966 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1968 Lisp_Charset *cs = XCHARSET (obj);
1972 error ("printing unreadable object #<charset %s 0x%x>",
1973 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1976 write_c_string ("#<charset ", printcharfun);
1977 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1978 write_c_string (" ", printcharfun);
1979 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1980 write_c_string (" ", printcharfun);
1981 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1982 write_c_string (" ", printcharfun);
1983 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1984 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
1986 CHARSET_DIMENSION (cs),
1987 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1988 CHARSET_COLUMNS (cs),
1989 CHARSET_GRAPHIC (cs),
1990 CHARSET_FINAL (cs));
1991 write_c_string (buf, printcharfun);
1992 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1993 sprintf (buf, " 0x%x>", cs->header.uid);
1994 write_c_string (buf, printcharfun);
1997 static const struct lrecord_description charset_description[] = {
1998 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
1999 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
2000 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
2001 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
2002 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
2003 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
2004 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
2006 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
2007 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
2012 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
2013 mark_charset, print_charset, 0, 0, 0,
2014 charset_description,
2017 /* Make a new charset. */
2018 /* #### SJT Should generic properties be allowed? */
2020 make_charset (Charset_ID id, Lisp_Object name,
2021 unsigned short chars, unsigned char dimension,
2022 unsigned char columns, unsigned char graphic,
2023 Bufbyte final, unsigned char direction, Lisp_Object short_name,
2024 Lisp_Object long_name, Lisp_Object doc,
2026 Lisp_Object decoding_table,
2027 Emchar ucs_min, Emchar ucs_max,
2028 Emchar code_offset, unsigned char byte_offset)
2031 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
2035 XSETCHARSET (obj, cs);
2037 CHARSET_ID (cs) = id;
2038 CHARSET_NAME (cs) = name;
2039 CHARSET_SHORT_NAME (cs) = short_name;
2040 CHARSET_LONG_NAME (cs) = long_name;
2041 CHARSET_CHARS (cs) = chars;
2042 CHARSET_DIMENSION (cs) = dimension;
2043 CHARSET_DIRECTION (cs) = direction;
2044 CHARSET_COLUMNS (cs) = columns;
2045 CHARSET_GRAPHIC (cs) = graphic;
2046 CHARSET_FINAL (cs) = final;
2047 CHARSET_DOC_STRING (cs) = doc;
2048 CHARSET_REGISTRY (cs) = reg;
2049 CHARSET_CCL_PROGRAM (cs) = Qnil;
2050 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
2052 CHARSET_DECODING_TABLE(cs) = Qnil;
2053 CHARSET_ENCODING_TABLE(cs) = Qnil;
2054 CHARSET_UCS_MIN(cs) = ucs_min;
2055 CHARSET_UCS_MAX(cs) = ucs_max;
2056 CHARSET_CODE_OFFSET(cs) = code_offset;
2057 CHARSET_BYTE_OFFSET(cs) = byte_offset;
2061 if (id == LEADING_BYTE_ASCII)
2062 CHARSET_REP_BYTES (cs) = 1;
2064 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
2066 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
2071 /* some charsets do not have final characters. This includes
2072 ASCII, Control-1, Composite, and the two faux private
2074 unsigned char iso2022_type
2075 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
2077 if (code_offset == 0)
2079 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
2080 chlook->charset_by_attributes[iso2022_type][final] = obj;
2084 (chlook->charset_by_attributes[iso2022_type][final][direction]));
2085 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
2089 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
2090 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
2092 /* Some charsets are "faux" and don't have names or really exist at
2093 all except in the leading-byte table. */
2095 Fputhash (name, obj, Vcharset_hash_table);
2100 get_unallocated_leading_byte (int dimension)
2105 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
2108 lb = chlook->next_allocated_leading_byte++;
2112 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
2115 lb = chlook->next_allocated_1_byte_leading_byte++;
2119 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
2122 lb = chlook->next_allocated_2_byte_leading_byte++;
2128 ("No more character sets free for this dimension",
2129 make_int (dimension));
2135 /* Number of Big5 characters which have the same code in 1st byte. */
2137 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2140 make_builtin_char (Lisp_Object charset, int c1, int c2)
2142 if (XCHARSET_UCS_MAX (charset))
2145 = (XCHARSET_DIMENSION (charset) == 1
2147 c1 - XCHARSET_BYTE_OFFSET (charset)
2149 (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
2150 + c2 - XCHARSET_BYTE_OFFSET (charset))
2151 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
2152 if ((code < XCHARSET_UCS_MIN (charset))
2153 || (XCHARSET_UCS_MAX (charset) < code))
2154 signal_simple_error ("Arguments makes invalid character",
2158 else if (XCHARSET_DIMENSION (charset) == 1)
2160 switch (XCHARSET_CHARS (charset))
2164 + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
2167 + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
2174 if (EQ (charset, Vcharset_chinese_big5))
2176 int B1 = c1, B2 = c2;
2178 = (B1 - 0xA1) * BIG5_SAME_ROW
2179 + B2 - (B2 < 0x7F ? 0x40 : 0x62);
2183 charset = Vcharset_chinese_big5_1;
2187 charset = Vcharset_chinese_big5_2;
2188 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
2193 switch (XCHARSET_CHARS (charset))
2196 return MIN_CHAR_94x94
2197 + (XCHARSET_FINAL (charset) - '0') * 94 * 94
2198 + (c1 - 33) * 94 + (c2 - 33);
2200 return MIN_CHAR_96x96
2201 + (XCHARSET_FINAL (charset) - '0') * 96 * 96
2202 + (c1 - 32) * 96 + (c2 - 32);
2210 range_charset_code_point (Lisp_Object charset, Emchar ch)
2214 if ((XCHARSET_UCS_MIN (charset) <= ch)
2215 && (ch <= XCHARSET_UCS_MAX (charset)))
2217 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
2219 if (XCHARSET_CHARS (charset) == 256)
2221 else if (XCHARSET_DIMENSION (charset) == 1)
2222 return d + XCHARSET_BYTE_OFFSET (charset);
2223 else if (XCHARSET_DIMENSION (charset) == 2)
2225 ((d / XCHARSET_CHARS (charset)
2226 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2227 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2228 else if (XCHARSET_DIMENSION (charset) == 3)
2230 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2231 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2232 | ((d / XCHARSET_CHARS (charset)
2233 % XCHARSET_CHARS (charset)
2234 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2235 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2236 else /* if (XCHARSET_DIMENSION (charset) == 4) */
2238 ((d / (XCHARSET_CHARS (charset)
2239 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2240 + XCHARSET_BYTE_OFFSET (charset)) << 24)
2241 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2242 % XCHARSET_CHARS (charset)
2243 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2244 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
2245 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2246 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2248 else if (XCHARSET_CODE_OFFSET (charset) == 0)
2250 if (XCHARSET_DIMENSION (charset) == 1)
2252 if (XCHARSET_CHARS (charset) == 94)
2254 if (((d = ch - (MIN_CHAR_94
2255 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
2259 else if (XCHARSET_CHARS (charset) == 96)
2261 if (((d = ch - (MIN_CHAR_96
2262 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
2269 else if (XCHARSET_DIMENSION (charset) == 2)
2271 if (XCHARSET_CHARS (charset) == 94)
2273 if (((d = ch - (MIN_CHAR_94x94
2274 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
2277 return (((d / 94) + 33) << 8) | (d % 94 + 33);
2279 else if (XCHARSET_CHARS (charset) == 96)
2281 if (((d = ch - (MIN_CHAR_96x96
2282 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
2285 return (((d / 96) + 32) << 8) | (d % 96 + 32);
2291 if (EQ (charset, Vcharset_mojikyo_2022_1)
2292 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
2294 int m = ch - MIN_CHAR_MOJIKYO - 1;
2295 int byte1 = m / (94 * 60) + 33;
2296 int byte2 = (m % (94 * 60)) / 94;
2297 int byte3 = m % 94 + 33;
2303 return (byte1 << 16) | (byte2 << 8) | byte3;
2309 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
2311 if (c <= MAX_CHAR_BASIC_LATIN)
2313 *charset = Vcharset_ascii;
2318 *charset = Vcharset_control_1;
2323 *charset = Vcharset_latin_iso8859_1;
2327 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
2329 *charset = Vcharset_greek_iso8859_7;
2330 return c - MIN_CHAR_GREEK + 0x20;
2332 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
2334 *charset = Vcharset_cyrillic_iso8859_5;
2335 return c - MIN_CHAR_CYRILLIC + 0x20;
2338 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
2340 *charset = Vcharset_hebrew_iso8859_8;
2341 return c - MIN_CHAR_HEBREW + 0x20;
2343 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
2345 *charset = Vcharset_thai_tis620;
2346 return c - MIN_CHAR_THAI + 0x20;
2349 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
2350 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
2352 return list2 (Vcharset_katakana_jisx0201,
2353 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
2356 else if (c <= MAX_CHAR_BMP)
2358 *charset = Vcharset_ucs_bmp;
2361 else if (c < MIN_CHAR_DAIKANWA)
2363 *charset = Vcharset_ucs;
2366 else if (c <= MAX_CHAR_DAIKANWA)
2368 *charset = Vcharset_ideograph_daikanwa;
2369 return c - MIN_CHAR_DAIKANWA;
2371 else if (c <= MAX_CHAR_MOJIKYO_0)
2373 *charset = Vcharset_mojikyo;
2374 return c - MIN_CHAR_MOJIKYO_0;
2376 else if (c < MIN_CHAR_94)
2378 *charset = Vcharset_ucs;
2381 else if (c <= MAX_CHAR_94)
2383 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
2384 ((c - MIN_CHAR_94) / 94) + '0',
2385 CHARSET_LEFT_TO_RIGHT);
2386 if (!NILP (*charset))
2387 return ((c - MIN_CHAR_94) % 94) + 33;
2390 *charset = Vcharset_ucs;
2394 else if (c <= MAX_CHAR_96)
2396 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
2397 ((c - MIN_CHAR_96) / 96) + '0',
2398 CHARSET_LEFT_TO_RIGHT);
2399 if (!NILP (*charset))
2400 return ((c - MIN_CHAR_96) % 96) + 32;
2403 *charset = Vcharset_ucs;
2407 else if (c <= MAX_CHAR_94x94)
2410 = CHARSET_BY_ATTRIBUTES (94, 2,
2411 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
2412 CHARSET_LEFT_TO_RIGHT);
2413 if (!NILP (*charset))
2414 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
2415 | (((c - MIN_CHAR_94x94) % 94) + 33);
2418 *charset = Vcharset_ucs;
2422 else if (c <= MAX_CHAR_96x96)
2425 = CHARSET_BY_ATTRIBUTES (96, 2,
2426 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
2427 CHARSET_LEFT_TO_RIGHT);
2428 if (!NILP (*charset))
2429 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
2430 | (((c - MIN_CHAR_96x96) % 96) + 32);
2433 *charset = Vcharset_ucs;
2437 else if (c < MIN_CHAR_MOJIKYO)
2439 *charset = Vcharset_ucs;
2442 else if (c <= MAX_CHAR_MOJIKYO)
2444 *charset = Vcharset_mojikyo;
2445 return c - MIN_CHAR_MOJIKYO;
2449 *charset = Vcharset_ucs;
2454 Lisp_Object Vdefault_coded_charset_priority_list;
2458 /************************************************************************/
2459 /* Basic charset Lisp functions */
2460 /************************************************************************/
2462 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
2463 Return non-nil if OBJECT is a charset.
2467 return CHARSETP (object) ? Qt : Qnil;
2470 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
2471 Retrieve the charset of the given name.
2472 If CHARSET-OR-NAME is a charset object, it is simply returned.
2473 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
2474 nil is returned. Otherwise the associated charset object is returned.
2478 if (CHARSETP (charset_or_name))
2479 return charset_or_name;
2481 CHECK_SYMBOL (charset_or_name);
2482 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
2485 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
2486 Retrieve the charset of the given name.
2487 Same as `find-charset' except an error is signalled if there is no such
2488 charset instead of returning nil.
2492 Lisp_Object charset = Ffind_charset (name);
2495 signal_simple_error ("No such charset", name);
2499 /* We store the charsets in hash tables with the names as the key and the
2500 actual charset object as the value. Occasionally we need to use them
2501 in a list format. These routines provide us with that. */
2502 struct charset_list_closure
2504 Lisp_Object *charset_list;
2508 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
2509 void *charset_list_closure)
2511 /* This function can GC */
2512 struct charset_list_closure *chcl =
2513 (struct charset_list_closure*) charset_list_closure;
2514 Lisp_Object *charset_list = chcl->charset_list;
2516 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
2520 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
2521 Return a list of the names of all defined charsets.
2525 Lisp_Object charset_list = Qnil;
2526 struct gcpro gcpro1;
2527 struct charset_list_closure charset_list_closure;
2529 GCPRO1 (charset_list);
2530 charset_list_closure.charset_list = &charset_list;
2531 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
2532 &charset_list_closure);
2535 return charset_list;
2538 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
2539 Return the name of charset CHARSET.
2543 return XCHARSET_NAME (Fget_charset (charset));
2546 /* #### SJT Should generic properties be allowed? */
2547 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
2548 Define a new character set.
2549 This function is for use with Mule support.
2550 NAME is a symbol, the name by which the character set is normally referred.
2551 DOC-STRING is a string describing the character set.
2552 PROPS is a property list, describing the specific nature of the
2553 character set. Recognized properties are:
2555 'short-name Short version of the charset name (ex: Latin-1)
2556 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
2557 'registry A regular expression matching the font registry field for
2559 'dimension Number of octets used to index a character in this charset.
2560 Either 1 or 2. Defaults to 1.
2561 'columns Number of columns used to display a character in this charset.
2562 Only used in TTY mode. (Under X, the actual width of a
2563 character can be derived from the font used to display the
2564 characters.) If unspecified, defaults to the dimension
2565 (this is almost always the correct value).
2566 'chars Number of characters in each dimension (94 or 96).
2567 Defaults to 94. Note that if the dimension is 2, the
2568 character set thus described is 94x94 or 96x96.
2569 'final Final byte of ISO 2022 escape sequence. Must be
2570 supplied. Each combination of (DIMENSION, CHARS) defines a
2571 separate namespace for final bytes. Note that ISO
2572 2022 restricts the final byte to the range
2573 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2574 dimension == 2. Note also that final bytes in the range
2575 0x30 - 0x3F are reserved for user-defined (not official)
2577 'graphic 0 (use left half of font on output) or 1 (use right half
2578 of font on output). Defaults to 0. For example, for
2579 a font whose registry is ISO8859-1, the left half
2580 (octets 0x20 - 0x7F) is the `ascii' character set, while
2581 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2582 character set. With 'graphic set to 0, the octets
2583 will have their high bit cleared; with it set to 1,
2584 the octets will have their high bit set.
2585 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2587 'ccl-program A compiled CCL program used to convert a character in
2588 this charset into an index into the font. This is in
2589 addition to the 'graphic property. The CCL program
2590 is passed the octets of the character, with the high
2591 bit cleared and set depending upon whether the value
2592 of the 'graphic property is 0 or 1.
2594 (name, doc_string, props))
2596 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2597 int direction = CHARSET_LEFT_TO_RIGHT;
2598 Lisp_Object registry = Qnil;
2599 Lisp_Object charset;
2600 Lisp_Object ccl_program = Qnil;
2601 Lisp_Object short_name = Qnil, long_name = Qnil;
2602 int byte_offset = -1;
2604 CHECK_SYMBOL (name);
2605 if (!NILP (doc_string))
2606 CHECK_STRING (doc_string);
2608 charset = Ffind_charset (name);
2609 if (!NILP (charset))
2610 signal_simple_error ("Cannot redefine existing charset", name);
2613 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
2615 if (EQ (keyword, Qshort_name))
2617 CHECK_STRING (value);
2621 if (EQ (keyword, Qlong_name))
2623 CHECK_STRING (value);
2627 else if (EQ (keyword, Qdimension))
2630 dimension = XINT (value);
2631 if (dimension < 1 || dimension > 2)
2632 signal_simple_error ("Invalid value for 'dimension", value);
2635 else if (EQ (keyword, Qchars))
2638 chars = XINT (value);
2639 if (chars != 94 && chars != 96)
2640 signal_simple_error ("Invalid value for 'chars", value);
2643 else if (EQ (keyword, Qcolumns))
2646 columns = XINT (value);
2647 if (columns != 1 && columns != 2)
2648 signal_simple_error ("Invalid value for 'columns", value);
2651 else if (EQ (keyword, Qgraphic))
2654 graphic = XINT (value);
2656 if (graphic < 0 || graphic > 2)
2658 if (graphic < 0 || graphic > 1)
2660 signal_simple_error ("Invalid value for 'graphic", value);
2663 else if (EQ (keyword, Qregistry))
2665 CHECK_STRING (value);
2669 else if (EQ (keyword, Qdirection))
2671 if (EQ (value, Ql2r))
2672 direction = CHARSET_LEFT_TO_RIGHT;
2673 else if (EQ (value, Qr2l))
2674 direction = CHARSET_RIGHT_TO_LEFT;
2676 signal_simple_error ("Invalid value for 'direction", value);
2679 else if (EQ (keyword, Qfinal))
2681 CHECK_CHAR_COERCE_INT (value);
2682 final = XCHAR (value);
2683 if (final < '0' || final > '~')
2684 signal_simple_error ("Invalid value for 'final", value);
2687 else if (EQ (keyword, Qccl_program))
2689 struct ccl_program test_ccl;
2691 if (setup_ccl_program (&test_ccl, value) < 0)
2692 signal_simple_error ("Invalid value for 'ccl-program", value);
2693 ccl_program = value;
2697 signal_simple_error ("Unrecognized property", keyword);
2702 error ("'final must be specified");
2703 if (dimension == 2 && final > 0x5F)
2705 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2708 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2709 CHARSET_LEFT_TO_RIGHT)) ||
2710 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2711 CHARSET_RIGHT_TO_LEFT)))
2713 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2715 id = get_unallocated_leading_byte (dimension);
2717 if (NILP (doc_string))
2718 doc_string = build_string ("");
2720 if (NILP (registry))
2721 registry = build_string ("");
2723 if (NILP (short_name))
2724 XSETSTRING (short_name, XSYMBOL (name)->name);
2726 if (NILP (long_name))
2727 long_name = doc_string;
2730 columns = dimension;
2732 if (byte_offset < 0)
2736 else if (chars == 96)
2742 charset = make_charset (id, name, chars, dimension, columns, graphic,
2743 final, direction, short_name, long_name,
2744 doc_string, registry,
2745 Qnil, 0, 0, 0, byte_offset);
2746 if (!NILP (ccl_program))
2747 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2751 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2753 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2754 NEW-NAME is the name of the new charset. Return the new charset.
2756 (charset, new_name))
2758 Lisp_Object new_charset = Qnil;
2759 int id, chars, dimension, columns, graphic, final;
2761 Lisp_Object registry, doc_string, short_name, long_name;
2764 charset = Fget_charset (charset);
2765 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2766 signal_simple_error ("Charset already has reverse-direction charset",
2769 CHECK_SYMBOL (new_name);
2770 if (!NILP (Ffind_charset (new_name)))
2771 signal_simple_error ("Cannot redefine existing charset", new_name);
2773 cs = XCHARSET (charset);
2775 chars = CHARSET_CHARS (cs);
2776 dimension = CHARSET_DIMENSION (cs);
2777 columns = CHARSET_COLUMNS (cs);
2778 id = get_unallocated_leading_byte (dimension);
2780 graphic = CHARSET_GRAPHIC (cs);
2781 final = CHARSET_FINAL (cs);
2782 direction = CHARSET_RIGHT_TO_LEFT;
2783 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2784 direction = CHARSET_LEFT_TO_RIGHT;
2785 doc_string = CHARSET_DOC_STRING (cs);
2786 short_name = CHARSET_SHORT_NAME (cs);
2787 long_name = CHARSET_LONG_NAME (cs);
2788 registry = CHARSET_REGISTRY (cs);
2790 new_charset = make_charset (id, new_name, chars, dimension, columns,
2791 graphic, final, direction, short_name, long_name,
2792 doc_string, registry,
2794 CHARSET_DECODING_TABLE(cs),
2795 CHARSET_UCS_MIN(cs),
2796 CHARSET_UCS_MAX(cs),
2797 CHARSET_CODE_OFFSET(cs),
2798 CHARSET_BYTE_OFFSET(cs)
2804 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2805 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2810 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2811 Define symbol ALIAS as an alias for CHARSET.
2815 CHECK_SYMBOL (alias);
2816 charset = Fget_charset (charset);
2817 return Fputhash (alias, charset, Vcharset_hash_table);
2820 /* #### Reverse direction charsets not yet implemented. */
2822 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2824 Return the reverse-direction charset parallel to CHARSET, if any.
2825 This is the charset with the same properties (in particular, the same
2826 dimension, number of characters per dimension, and final byte) as
2827 CHARSET but whose characters are displayed in the opposite direction.
2831 charset = Fget_charset (charset);
2832 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2836 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2837 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2838 If DIRECTION is omitted, both directions will be checked (left-to-right
2839 will be returned if character sets exist for both directions).
2841 (dimension, chars, final, direction))
2843 int dm, ch, fi, di = -1;
2844 Lisp_Object obj = Qnil;
2846 CHECK_INT (dimension);
2847 dm = XINT (dimension);
2848 if (dm < 1 || dm > 2)
2849 signal_simple_error ("Invalid value for DIMENSION", dimension);
2853 if (ch != 94 && ch != 96)
2854 signal_simple_error ("Invalid value for CHARS", chars);
2856 CHECK_CHAR_COERCE_INT (final);
2858 if (fi < '0' || fi > '~')
2859 signal_simple_error ("Invalid value for FINAL", final);
2861 if (EQ (direction, Ql2r))
2862 di = CHARSET_LEFT_TO_RIGHT;
2863 else if (EQ (direction, Qr2l))
2864 di = CHARSET_RIGHT_TO_LEFT;
2865 else if (!NILP (direction))
2866 signal_simple_error ("Invalid value for DIRECTION", direction);
2868 if (dm == 2 && fi > 0x5F)
2870 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2874 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
2876 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
2879 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
2882 return XCHARSET_NAME (obj);
2886 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2887 Return short name of CHARSET.
2891 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2894 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2895 Return long name of CHARSET.
2899 return XCHARSET_LONG_NAME (Fget_charset (charset));
2902 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2903 Return description of CHARSET.
2907 return XCHARSET_DOC_STRING (Fget_charset (charset));
2910 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2911 Return dimension of CHARSET.
2915 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2918 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2919 Return property PROP of CHARSET, a charset object or symbol naming a charset.
2920 Recognized properties are those listed in `make-charset', as well as
2921 'name and 'doc-string.
2927 charset = Fget_charset (charset);
2928 cs = XCHARSET (charset);
2930 CHECK_SYMBOL (prop);
2931 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2932 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2933 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2934 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2935 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2936 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2937 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2938 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2939 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2940 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2941 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2942 if (EQ (prop, Qdirection))
2943 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2944 if (EQ (prop, Qreverse_direction_charset))
2946 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2947 /* #### Is this translation OK? If so, error checking sufficient? */
2948 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
2950 signal_simple_error ("Unrecognized charset property name", prop);
2951 return Qnil; /* not reached */
2954 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2955 Return charset identification number of CHARSET.
2959 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2962 /* #### We need to figure out which properties we really want to
2965 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2966 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2968 (charset, ccl_program))
2970 struct ccl_program test_ccl;
2972 charset = Fget_charset (charset);
2973 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
2974 signal_simple_error ("Invalid ccl-program", ccl_program);
2975 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2980 invalidate_charset_font_caches (Lisp_Object charset)
2982 /* Invalidate font cache entries for charset on all devices. */
2983 Lisp_Object devcons, concons, hash_table;
2984 DEVICE_LOOP_NO_BREAK (devcons, concons)
2986 struct device *d = XDEVICE (XCAR (devcons));
2987 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2988 if (!UNBOUNDP (hash_table))
2989 Fclrhash (hash_table);
2993 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
2994 Set the 'registry property of CHARSET to REGISTRY.
2996 (charset, registry))
2998 charset = Fget_charset (charset);
2999 CHECK_STRING (registry);
3000 XCHARSET_REGISTRY (charset) = registry;
3001 invalidate_charset_font_caches (charset);
3002 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
3007 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
3008 Return mapping-table of CHARSET.
3012 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
3015 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
3016 Set mapping-table of CHARSET to TABLE.
3020 struct Lisp_Charset *cs;
3024 charset = Fget_charset (charset);
3025 cs = XCHARSET (charset);
3029 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
3030 make_vector_newer (CHARSET_DECODING_TABLE(cs));
3031 CHARSET_DECODING_TABLE(cs) = Qnil;
3034 else if (VECTORP (table))
3036 int ccs_len = CHARSET_BYTE_SIZE (cs);
3037 int ret = decoding_table_check_elements (table,
3038 CHARSET_DIMENSION (cs),
3043 signal_simple_error ("Too big table", table);
3045 signal_simple_error ("Invalid element is found", table);
3047 signal_simple_error ("Something wrong", table);
3049 CHARSET_DECODING_TABLE(cs) = Qnil;
3052 signal_error (Qwrong_type_argument,
3053 list2 (build_translated_string ("vector-or-nil-p"),
3056 byte_offset = CHARSET_BYTE_OFFSET (cs);
3057 switch (CHARSET_DIMENSION (cs))
3060 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3062 Lisp_Object c = XVECTOR_DATA(table)[i];
3065 put_char_ccs_code_point (c, charset,
3066 make_int (i + byte_offset));
3070 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3072 Lisp_Object v = XVECTOR_DATA(table)[i];
3078 for (j = 0; j < XVECTOR_LENGTH (v); j++)
3080 Lisp_Object c = XVECTOR_DATA(v)[j];
3083 put_char_ccs_code_point
3085 make_int ( ( (i + byte_offset) << 8 )
3091 put_char_ccs_code_point (v, charset,
3092 make_int (i + byte_offset));
3101 /************************************************************************/
3102 /* Lisp primitives for working with characters */
3103 /************************************************************************/
3106 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
3107 Make a character from CHARSET and code-point CODE.
3113 charset = Fget_charset (charset);
3116 if (XCHARSET_GRAPHIC (charset) == 1)
3118 return make_char (DECODE_CHAR (charset, c));
3121 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
3122 Make a builtin character from CHARSET and code-point CODE.
3129 charset = Fget_charset (charset);
3133 if ((final = XCHARSET_FINAL (charset)) >= '0')
3135 if (XCHARSET_DIMENSION (charset) == 1)
3137 switch (XCHARSET_CHARS (charset))
3141 make_char (MIN_CHAR_94 + (final - '0') * 94
3142 + ((c & 0x7F) - 33));
3145 make_char (MIN_CHAR_96 + (final - '0') * 96
3146 + ((c & 0x7F) - 32));
3148 return Fdecode_char (charset, code);
3153 switch (XCHARSET_CHARS (charset))
3157 make_char (MIN_CHAR_94x94
3158 + (final - '0') * 94 * 94
3159 + (((c >> 8) & 0x7F) - 33) * 94
3160 + ((c & 0x7F) - 33));
3163 make_char (MIN_CHAR_96x96
3164 + (final - '0') * 96 * 96
3165 + (((c >> 8) & 0x7F) - 32) * 96
3166 + ((c & 0x7F) - 32));
3168 return Fdecode_char (charset, code);
3172 else if (XCHARSET_UCS_MAX (charset))
3175 = (XCHARSET_DIMENSION (charset) == 1
3177 c - XCHARSET_BYTE_OFFSET (charset)
3179 ((c >> 8) - XCHARSET_BYTE_OFFSET (charset))
3180 * XCHARSET_CHARS (charset)
3181 + (c & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
3182 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
3183 if ((cid < XCHARSET_UCS_MIN (charset))
3184 || (XCHARSET_UCS_MAX (charset) < cid))
3185 return Fdecode_char (charset, code);
3186 return make_char (cid);
3189 return Fdecode_char (charset, code);
3193 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
3194 Make a character from CHARSET and octets ARG1 and ARG2.
3195 ARG2 is required only for characters from two-dimensional charsets.
3196 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
3197 character s with caron.
3199 (charset, arg1, arg2))
3203 int lowlim, highlim;
3205 charset = Fget_charset (charset);
3206 cs = XCHARSET (charset);
3208 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
3209 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
3211 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
3213 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
3214 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
3217 /* It is useful (and safe, according to Olivier Galibert) to strip
3218 the 8th bit off ARG1 and ARG2 because it allows programmers to
3219 write (make-char 'latin-iso8859-2 CODE) where code is the actual
3220 Latin 2 code of the character. */
3228 if (a1 < lowlim || a1 > highlim)
3229 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
3231 if (CHARSET_DIMENSION (cs) == 1)
3235 ("Charset is of dimension one; second octet must be nil", arg2);
3236 return make_char (MAKE_CHAR (charset, a1, 0));
3245 a2 = XINT (arg2) & 0x7f;
3247 if (a2 < lowlim || a2 > highlim)
3248 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
3250 return make_char (MAKE_CHAR (charset, a1, a2));
3253 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
3254 Return the character set of CHARACTER.
3258 CHECK_CHAR_COERCE_INT (character);
3260 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
3263 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
3264 Return the octet numbered N (should be 0 or 1) of CHARACTER.
3265 N defaults to 0 if omitted.
3269 Lisp_Object charset;
3272 CHECK_CHAR_COERCE_INT (character);
3274 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
3276 if (NILP (n) || EQ (n, Qzero))
3277 return make_int (octet0);
3278 else if (EQ (n, make_int (1)))
3279 return make_int (octet1);
3281 signal_simple_error ("Octet number must be 0 or 1", n);
3284 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
3285 Return list of charset and one or two position-codes of CHARACTER.
3289 /* This function can GC */
3290 struct gcpro gcpro1, gcpro2;
3291 Lisp_Object charset = Qnil;
3292 Lisp_Object rc = Qnil;
3300 GCPRO2 (charset, rc);
3301 CHECK_CHAR_COERCE_INT (character);
3304 code_point = ENCODE_CHAR (XCHAR (character), charset);
3305 dimension = XCHARSET_DIMENSION (charset);
3306 while (dimension > 0)
3308 rc = Fcons (make_int (code_point & 255), rc);
3312 rc = Fcons (XCHARSET_NAME (charset), rc);
3314 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
3316 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
3318 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
3322 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
3331 #ifdef ENABLE_COMPOSITE_CHARS
3332 /************************************************************************/
3333 /* composite character functions */
3334 /************************************************************************/
3337 lookup_composite_char (Bufbyte *str, int len)
3339 Lisp_Object lispstr = make_string (str, len);
3340 Lisp_Object ch = Fgethash (lispstr,
3341 Vcomposite_char_string2char_hash_table,
3347 if (composite_char_row_next >= 128)
3348 signal_simple_error ("No more composite chars available", lispstr);
3349 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
3350 composite_char_col_next);
3351 Fputhash (make_char (emch), lispstr,
3352 Vcomposite_char_char2string_hash_table);
3353 Fputhash (lispstr, make_char (emch),
3354 Vcomposite_char_string2char_hash_table);
3355 composite_char_col_next++;
3356 if (composite_char_col_next >= 128)
3358 composite_char_col_next = 32;
3359 composite_char_row_next++;
3368 composite_char_string (Emchar ch)
3370 Lisp_Object str = Fgethash (make_char (ch),
3371 Vcomposite_char_char2string_hash_table,
3373 assert (!UNBOUNDP (str));
3377 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
3378 Convert a string into a single composite character.
3379 The character is the result of overstriking all the characters in
3384 CHECK_STRING (string);
3385 return make_char (lookup_composite_char (XSTRING_DATA (string),
3386 XSTRING_LENGTH (string)));
3389 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3390 Return a string of the characters comprising a composite character.
3398 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
3399 signal_simple_error ("Must be composite char", ch);
3400 return composite_char_string (emch);
3402 #endif /* ENABLE_COMPOSITE_CHARS */
3405 /************************************************************************/
3406 /* initialization */
3407 /************************************************************************/
3410 syms_of_mule_charset (void)
3413 INIT_LRECORD_IMPLEMENTATION (uint8_byte_table);
3414 INIT_LRECORD_IMPLEMENTATION (uint16_byte_table);
3415 INIT_LRECORD_IMPLEMENTATION (byte_table);
3416 INIT_LRECORD_IMPLEMENTATION (char_id_table);
3418 INIT_LRECORD_IMPLEMENTATION (charset);
3420 DEFSUBR (Fcharsetp);
3421 DEFSUBR (Ffind_charset);
3422 DEFSUBR (Fget_charset);
3423 DEFSUBR (Fcharset_list);
3424 DEFSUBR (Fcharset_name);
3425 DEFSUBR (Fmake_charset);
3426 DEFSUBR (Fmake_reverse_direction_charset);
3427 /* DEFSUBR (Freverse_direction_charset); */
3428 DEFSUBR (Fdefine_charset_alias);
3429 DEFSUBR (Fcharset_from_attributes);
3430 DEFSUBR (Fcharset_short_name);
3431 DEFSUBR (Fcharset_long_name);
3432 DEFSUBR (Fcharset_description);
3433 DEFSUBR (Fcharset_dimension);
3434 DEFSUBR (Fcharset_property);
3435 DEFSUBR (Fcharset_id);
3436 DEFSUBR (Fset_charset_ccl_program);
3437 DEFSUBR (Fset_charset_registry);
3439 DEFSUBR (Fchar_attribute_list);
3440 DEFSUBR (Ffind_char_attribute_table);
3441 DEFSUBR (Fchar_attribute_alist);
3442 DEFSUBR (Fget_char_attribute);
3443 DEFSUBR (Fput_char_attribute);
3444 DEFSUBR (Fremove_char_attribute);
3445 DEFSUBR (Fdefine_char);
3446 DEFSUBR (Fchar_variants);
3447 DEFSUBR (Fget_composite_char);
3448 DEFSUBR (Fcharset_mapping_table);
3449 DEFSUBR (Fset_charset_mapping_table);
3453 DEFSUBR (Fdecode_char);
3454 DEFSUBR (Fdecode_builtin_char);
3456 DEFSUBR (Fmake_char);
3457 DEFSUBR (Fchar_charset);
3458 DEFSUBR (Fchar_octet);
3459 DEFSUBR (Fsplit_char);
3461 #ifdef ENABLE_COMPOSITE_CHARS
3462 DEFSUBR (Fmake_composite_char);
3463 DEFSUBR (Fcomposite_char_string);
3466 defsymbol (&Qcharsetp, "charsetp");
3467 defsymbol (&Qregistry, "registry");
3468 defsymbol (&Qfinal, "final");
3469 defsymbol (&Qgraphic, "graphic");
3470 defsymbol (&Qdirection, "direction");
3471 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3472 defsymbol (&Qshort_name, "short-name");
3473 defsymbol (&Qlong_name, "long-name");
3475 defsymbol (&Ql2r, "l2r");
3476 defsymbol (&Qr2l, "r2l");
3478 /* Charsets, compatible with FSF 20.3
3479 Naming convention is Script-Charset[-Edition] */
3480 defsymbol (&Qascii, "ascii");
3481 defsymbol (&Qcontrol_1, "control-1");
3482 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3483 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3484 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3485 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3486 defsymbol (&Qthai_tis620, "thai-tis620");
3487 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3488 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3489 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3490 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3491 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3492 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3493 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3494 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
3495 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
3496 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
3497 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
3498 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
3499 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
3500 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
3501 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
3503 defsymbol (&Q_ucs, "->ucs");
3504 defsymbol (&Q_decomposition, "->decomposition");
3505 defsymbol (&Qcompat, "compat");
3506 defsymbol (&Qisolated, "isolated");
3507 defsymbol (&Qinitial, "initial");
3508 defsymbol (&Qmedial, "medial");
3509 defsymbol (&Qfinal, "final");
3510 defsymbol (&Qvertical, "vertical");
3511 defsymbol (&QnoBreak, "noBreak");
3512 defsymbol (&Qfraction, "fraction");
3513 defsymbol (&Qsuper, "super");
3514 defsymbol (&Qsub, "sub");
3515 defsymbol (&Qcircle, "circle");
3516 defsymbol (&Qsquare, "square");
3517 defsymbol (&Qwide, "wide");
3518 defsymbol (&Qnarrow, "narrow");
3519 defsymbol (&Qsmall, "small");
3520 defsymbol (&Qfont, "font");
3521 defsymbol (&Qucs, "ucs");
3522 defsymbol (&Qucs_bmp, "ucs-bmp");
3523 defsymbol (&Qucs_cns, "ucs-cns");
3524 defsymbol (&Qlatin_viscii, "latin-viscii");
3525 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3526 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3527 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3528 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3529 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3530 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
3531 defsymbol (&Qchinese_big5, "chinese-big5");
3532 defsymbol (&Qmojikyo, "mojikyo");
3533 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
3534 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
3535 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
3536 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
3537 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
3538 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
3539 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
3540 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
3541 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
3542 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
3543 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
3544 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
3545 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
3546 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
3547 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
3548 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
3549 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
3550 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
3551 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
3552 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
3553 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
3554 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
3555 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3557 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3558 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3560 defsymbol (&Qcomposite, "composite");
3564 vars_of_mule_charset (void)
3571 chlook = xnew (struct charset_lookup);
3572 dumpstruct (&chlook, &charset_lookup_description);
3574 /* Table of charsets indexed by leading byte. */
3575 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3576 chlook->charset_by_leading_byte[i] = Qnil;
3579 /* Table of charsets indexed by type/final-byte. */
3580 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3581 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3582 chlook->charset_by_attributes[i][j] = Qnil;
3584 /* Table of charsets indexed by type/final-byte/direction. */
3585 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3586 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3587 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3588 chlook->charset_by_attributes[i][j][k] = Qnil;
3592 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3594 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3595 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3599 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3600 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3601 Leading-code of private TYPE9N charset of column-width 1.
3603 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3607 Vutf_2000_version = build_string("0.17 (Hōryūji)");
3608 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
3609 Version number of UTF-2000.
3612 staticpro (&Vcharacter_composition_table);
3613 Vcharacter_composition_table = make_char_id_table (Qnil);
3615 staticpro (&Vcharacter_variant_table);
3616 Vcharacter_variant_table = make_char_id_table (Qnil);
3618 Vdefault_coded_charset_priority_list = Qnil;
3619 DEFVAR_LISP ("default-coded-charset-priority-list",
3620 &Vdefault_coded_charset_priority_list /*
3621 Default order of preferred coded-character-sets.
3627 complex_vars_of_mule_charset (void)
3629 staticpro (&Vcharset_hash_table);
3630 Vcharset_hash_table =
3631 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3633 /* Predefined character sets. We store them into variables for
3637 staticpro (&Vchar_attribute_hash_table);
3638 Vchar_attribute_hash_table
3639 = make_lisp_hash_table (16, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3641 staticpro (&Vcharset_ucs);
3643 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
3644 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3645 build_string ("UCS"),
3646 build_string ("UCS"),
3647 build_string ("ISO/IEC 10646"),
3649 Qnil, 0, 0xFFFFFFF, 0, 0);
3650 staticpro (&Vcharset_ucs_bmp);
3652 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3653 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3654 build_string ("BMP"),
3655 build_string ("BMP"),
3656 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3657 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
3658 Qnil, 0, 0xFFFF, 0, 0);
3659 staticpro (&Vcharset_ucs_cns);
3661 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 4,
3662 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3663 build_string ("UCS for CNS"),
3664 build_string ("UCS for CNS 11643"),
3665 build_string ("ISO/IEC 10646 for CNS 11643"),
3667 Qnil, 0, 0xFFFFFFF, 0, 0);
3669 # define MIN_CHAR_THAI 0
3670 # define MAX_CHAR_THAI 0
3671 # define MIN_CHAR_HEBREW 0
3672 # define MAX_CHAR_HEBREW 0
3673 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3674 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3676 staticpro (&Vcharset_ascii);
3678 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3679 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3680 build_string ("ASCII"),
3681 build_string ("ASCII)"),
3682 build_string ("ASCII (ISO646 IRV)"),
3683 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3684 Qnil, 0, 0x7F, 0, 0);
3685 staticpro (&Vcharset_control_1);
3686 Vcharset_control_1 =
3687 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3688 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3689 build_string ("C1"),
3690 build_string ("Control characters"),
3691 build_string ("Control characters 128-191"),
3693 Qnil, 0x80, 0x9F, 0, 0);
3694 staticpro (&Vcharset_latin_iso8859_1);
3695 Vcharset_latin_iso8859_1 =
3696 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3697 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3698 build_string ("Latin-1"),
3699 build_string ("ISO8859-1 (Latin-1)"),
3700 build_string ("ISO8859-1 (Latin-1)"),
3701 build_string ("iso8859-1"),
3702 Qnil, 0xA0, 0xFF, 0, 32);
3703 staticpro (&Vcharset_latin_iso8859_2);
3704 Vcharset_latin_iso8859_2 =
3705 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3706 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3707 build_string ("Latin-2"),
3708 build_string ("ISO8859-2 (Latin-2)"),
3709 build_string ("ISO8859-2 (Latin-2)"),
3710 build_string ("iso8859-2"),
3712 staticpro (&Vcharset_latin_iso8859_3);
3713 Vcharset_latin_iso8859_3 =
3714 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3715 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3716 build_string ("Latin-3"),
3717 build_string ("ISO8859-3 (Latin-3)"),
3718 build_string ("ISO8859-3 (Latin-3)"),
3719 build_string ("iso8859-3"),
3721 staticpro (&Vcharset_latin_iso8859_4);
3722 Vcharset_latin_iso8859_4 =
3723 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3724 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3725 build_string ("Latin-4"),
3726 build_string ("ISO8859-4 (Latin-4)"),
3727 build_string ("ISO8859-4 (Latin-4)"),
3728 build_string ("iso8859-4"),
3730 staticpro (&Vcharset_thai_tis620);
3731 Vcharset_thai_tis620 =
3732 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3733 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3734 build_string ("TIS620"),
3735 build_string ("TIS620 (Thai)"),
3736 build_string ("TIS620.2529 (Thai)"),
3737 build_string ("tis620"),
3738 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3739 staticpro (&Vcharset_greek_iso8859_7);
3740 Vcharset_greek_iso8859_7 =
3741 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3742 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3743 build_string ("ISO8859-7"),
3744 build_string ("ISO8859-7 (Greek)"),
3745 build_string ("ISO8859-7 (Greek)"),
3746 build_string ("iso8859-7"),
3748 0 /* MIN_CHAR_GREEK */,
3749 0 /* MAX_CHAR_GREEK */, 0, 32);
3750 staticpro (&Vcharset_arabic_iso8859_6);
3751 Vcharset_arabic_iso8859_6 =
3752 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3753 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3754 build_string ("ISO8859-6"),
3755 build_string ("ISO8859-6 (Arabic)"),
3756 build_string ("ISO8859-6 (Arabic)"),
3757 build_string ("iso8859-6"),
3759 staticpro (&Vcharset_hebrew_iso8859_8);
3760 Vcharset_hebrew_iso8859_8 =
3761 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3762 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3763 build_string ("ISO8859-8"),
3764 build_string ("ISO8859-8 (Hebrew)"),
3765 build_string ("ISO8859-8 (Hebrew)"),
3766 build_string ("iso8859-8"),
3767 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3768 staticpro (&Vcharset_katakana_jisx0201);
3769 Vcharset_katakana_jisx0201 =
3770 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3771 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3772 build_string ("JISX0201 Kana"),
3773 build_string ("JISX0201.1976 (Japanese Kana)"),
3774 build_string ("JISX0201.1976 Japanese Kana"),
3775 build_string ("jisx0201\\.1976"),
3777 staticpro (&Vcharset_latin_jisx0201);
3778 Vcharset_latin_jisx0201 =
3779 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3780 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3781 build_string ("JISX0201 Roman"),
3782 build_string ("JISX0201.1976 (Japanese Roman)"),
3783 build_string ("JISX0201.1976 Japanese Roman"),
3784 build_string ("jisx0201\\.1976"),
3786 staticpro (&Vcharset_cyrillic_iso8859_5);
3787 Vcharset_cyrillic_iso8859_5 =
3788 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3789 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3790 build_string ("ISO8859-5"),
3791 build_string ("ISO8859-5 (Cyrillic)"),
3792 build_string ("ISO8859-5 (Cyrillic)"),
3793 build_string ("iso8859-5"),
3795 0 /* MIN_CHAR_CYRILLIC */,
3796 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3797 staticpro (&Vcharset_latin_iso8859_9);
3798 Vcharset_latin_iso8859_9 =
3799 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3800 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3801 build_string ("Latin-5"),
3802 build_string ("ISO8859-9 (Latin-5)"),
3803 build_string ("ISO8859-9 (Latin-5)"),
3804 build_string ("iso8859-9"),
3806 staticpro (&Vcharset_japanese_jisx0208_1978);
3807 Vcharset_japanese_jisx0208_1978 =
3808 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3809 Qjapanese_jisx0208_1978, 94, 2,
3810 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3811 build_string ("JIS X0208:1978"),
3812 build_string ("JIS X0208:1978 (Japanese)"),
3814 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3815 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3817 staticpro (&Vcharset_chinese_gb2312);
3818 Vcharset_chinese_gb2312 =
3819 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3820 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3821 build_string ("GB2312"),
3822 build_string ("GB2312)"),
3823 build_string ("GB2312 Chinese simplified"),
3824 build_string ("gb2312"),
3826 staticpro (&Vcharset_japanese_jisx0208);
3827 Vcharset_japanese_jisx0208 =
3828 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3829 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3830 build_string ("JISX0208"),
3831 build_string ("JIS X0208:1983 (Japanese)"),
3832 build_string ("JIS X0208:1983 Japanese Kanji"),
3833 build_string ("jisx0208\\.1983"),
3836 staticpro (&Vcharset_japanese_jisx0208_1990);
3837 Vcharset_japanese_jisx0208_1990 =
3838 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3839 Qjapanese_jisx0208_1990, 94, 2,
3840 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3841 build_string ("JISX0208-1990"),
3842 build_string ("JIS X0208:1990 (Japanese)"),
3843 build_string ("JIS X0208:1990 Japanese Kanji"),
3844 build_string ("jisx0208\\.1990"),
3846 MIN_CHAR_JIS_X0208_1990,
3847 MAX_CHAR_JIS_X0208_1990, 0, 33);
3849 staticpro (&Vcharset_korean_ksc5601);
3850 Vcharset_korean_ksc5601 =
3851 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3852 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3853 build_string ("KSC5601"),
3854 build_string ("KSC5601 (Korean"),
3855 build_string ("KSC5601 Korean Hangul and Hanja"),
3856 build_string ("ksc5601"),
3858 staticpro (&Vcharset_japanese_jisx0212);
3859 Vcharset_japanese_jisx0212 =
3860 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3861 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3862 build_string ("JISX0212"),
3863 build_string ("JISX0212 (Japanese)"),
3864 build_string ("JISX0212 Japanese Supplement"),
3865 build_string ("jisx0212"),
3868 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3869 staticpro (&Vcharset_chinese_cns11643_1);
3870 Vcharset_chinese_cns11643_1 =
3871 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3872 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3873 build_string ("CNS11643-1"),
3874 build_string ("CNS11643-1 (Chinese traditional)"),
3876 ("CNS 11643 Plane 1 Chinese traditional"),
3877 build_string (CHINESE_CNS_PLANE_RE("1")),
3879 staticpro (&Vcharset_chinese_cns11643_2);
3880 Vcharset_chinese_cns11643_2 =
3881 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3882 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3883 build_string ("CNS11643-2"),
3884 build_string ("CNS11643-2 (Chinese traditional)"),
3886 ("CNS 11643 Plane 2 Chinese traditional"),
3887 build_string (CHINESE_CNS_PLANE_RE("2")),
3890 staticpro (&Vcharset_latin_tcvn5712);
3891 Vcharset_latin_tcvn5712 =
3892 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3893 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3894 build_string ("TCVN 5712"),
3895 build_string ("TCVN 5712 (VSCII-2)"),
3896 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3897 build_string ("tcvn5712-1"),
3899 staticpro (&Vcharset_latin_viscii_lower);
3900 Vcharset_latin_viscii_lower =
3901 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3902 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3903 build_string ("VISCII lower"),
3904 build_string ("VISCII lower (Vietnamese)"),
3905 build_string ("VISCII lower (Vietnamese)"),
3906 build_string ("MULEVISCII-LOWER"),
3908 staticpro (&Vcharset_latin_viscii_upper);
3909 Vcharset_latin_viscii_upper =
3910 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3911 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3912 build_string ("VISCII upper"),
3913 build_string ("VISCII upper (Vietnamese)"),
3914 build_string ("VISCII upper (Vietnamese)"),
3915 build_string ("MULEVISCII-UPPER"),
3917 staticpro (&Vcharset_latin_viscii);
3918 Vcharset_latin_viscii =
3919 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3920 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3921 build_string ("VISCII"),
3922 build_string ("VISCII 1.1 (Vietnamese)"),
3923 build_string ("VISCII 1.1 (Vietnamese)"),
3924 build_string ("VISCII1\\.1"),
3926 staticpro (&Vcharset_chinese_big5);
3927 Vcharset_chinese_big5 =
3928 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3929 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3930 build_string ("Big5"),
3931 build_string ("Big5"),
3932 build_string ("Big5 Chinese traditional"),
3933 build_string ("big5"),
3935 staticpro (&Vcharset_ideograph_daikanwa);
3936 Vcharset_ideograph_daikanwa =
3937 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
3938 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3939 build_string ("Daikanwa"),
3940 build_string ("Morohashi's Daikanwa"),
3941 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3942 build_string ("Daikanwa"),
3943 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3944 staticpro (&Vcharset_mojikyo);
3946 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
3947 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3948 build_string ("Mojikyo"),
3949 build_string ("Mojikyo"),
3950 build_string ("Konjaku-Mojikyo"),
3952 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
3953 staticpro (&Vcharset_mojikyo_2022_1);
3954 Vcharset_mojikyo_2022_1 =
3955 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
3956 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
3957 build_string ("Mojikyo-2022-1"),
3958 build_string ("Mojikyo ISO-2022 Part 1"),
3959 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
3963 #define DEF_MOJIKYO_PJ(n) \
3964 staticpro (&Vcharset_mojikyo_pj_##n); \
3965 Vcharset_mojikyo_pj_##n = \
3966 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
3967 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3968 build_string ("Mojikyo-PJ-"#n), \
3969 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
3971 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
3973 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
3985 DEF_MOJIKYO_PJ (10);
3986 DEF_MOJIKYO_PJ (11);
3987 DEF_MOJIKYO_PJ (12);
3988 DEF_MOJIKYO_PJ (13);
3989 DEF_MOJIKYO_PJ (14);
3990 DEF_MOJIKYO_PJ (15);
3991 DEF_MOJIKYO_PJ (16);
3992 DEF_MOJIKYO_PJ (17);
3993 DEF_MOJIKYO_PJ (18);
3994 DEF_MOJIKYO_PJ (19);
3995 DEF_MOJIKYO_PJ (20);
3996 DEF_MOJIKYO_PJ (21);
3998 staticpro (&Vcharset_ethiopic_ucs);
3999 Vcharset_ethiopic_ucs =
4000 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
4001 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4002 build_string ("Ethiopic (UCS)"),
4003 build_string ("Ethiopic (UCS)"),
4004 build_string ("Ethiopic of UCS"),
4005 build_string ("Ethiopic-Unicode"),
4006 Qnil, 0x1200, 0x137F, 0x1200, 0);
4008 staticpro (&Vcharset_chinese_big5_1);
4009 Vcharset_chinese_big5_1 =
4010 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
4011 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
4012 build_string ("Big5"),
4013 build_string ("Big5 (Level-1)"),
4015 ("Big5 Level-1 Chinese traditional"),
4016 build_string ("big5"),
4018 staticpro (&Vcharset_chinese_big5_2);
4019 Vcharset_chinese_big5_2 =
4020 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
4021 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
4022 build_string ("Big5"),
4023 build_string ("Big5 (Level-2)"),
4025 ("Big5 Level-2 Chinese traditional"),
4026 build_string ("big5"),
4029 #ifdef ENABLE_COMPOSITE_CHARS
4030 /* #### For simplicity, we put composite chars into a 96x96 charset.
4031 This is going to lead to problems because you can run out of
4032 room, esp. as we don't yet recycle numbers. */
4033 staticpro (&Vcharset_composite);
4034 Vcharset_composite =
4035 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
4036 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4037 build_string ("Composite"),
4038 build_string ("Composite characters"),
4039 build_string ("Composite characters"),
4042 /* #### not dumped properly */
4043 composite_char_row_next = 32;
4044 composite_char_col_next = 32;
4046 Vcomposite_char_string2char_hash_table =
4047 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
4048 Vcomposite_char_char2string_hash_table =
4049 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
4050 staticpro (&Vcomposite_char_string2char_hash_table);
4051 staticpro (&Vcomposite_char_char2string_hash_table);
4052 #endif /* ENABLE_COMPOSITE_CHARS */