1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_japanese_jisx0208;
60 Lisp_Object Vcharset_japanese_jisx0208_1990;
61 Lisp_Object Vcharset_korean_ksc5601;
62 Lisp_Object Vcharset_japanese_jisx0212;
63 Lisp_Object Vcharset_chinese_cns11643_1;
64 Lisp_Object Vcharset_chinese_cns11643_2;
66 Lisp_Object Vcharset_ucs;
67 Lisp_Object Vcharset_ucs_bmp;
68 Lisp_Object Vcharset_ucs_cns;
69 Lisp_Object Vcharset_latin_viscii;
70 Lisp_Object Vcharset_latin_tcvn5712;
71 Lisp_Object Vcharset_latin_viscii_lower;
72 Lisp_Object Vcharset_latin_viscii_upper;
73 Lisp_Object Vcharset_chinese_big5;
74 Lisp_Object Vcharset_ideograph_daikanwa;
75 Lisp_Object Vcharset_mojikyo;
76 Lisp_Object Vcharset_mojikyo_2022_1;
77 Lisp_Object Vcharset_mojikyo_pj_1;
78 Lisp_Object Vcharset_mojikyo_pj_2;
79 Lisp_Object Vcharset_mojikyo_pj_3;
80 Lisp_Object Vcharset_mojikyo_pj_4;
81 Lisp_Object Vcharset_mojikyo_pj_5;
82 Lisp_Object Vcharset_mojikyo_pj_6;
83 Lisp_Object Vcharset_mojikyo_pj_7;
84 Lisp_Object Vcharset_mojikyo_pj_8;
85 Lisp_Object Vcharset_mojikyo_pj_9;
86 Lisp_Object Vcharset_mojikyo_pj_10;
87 Lisp_Object Vcharset_mojikyo_pj_11;
88 Lisp_Object Vcharset_mojikyo_pj_12;
89 Lisp_Object Vcharset_mojikyo_pj_13;
90 Lisp_Object Vcharset_mojikyo_pj_14;
91 Lisp_Object Vcharset_mojikyo_pj_15;
92 Lisp_Object Vcharset_mojikyo_pj_16;
93 Lisp_Object Vcharset_mojikyo_pj_17;
94 Lisp_Object Vcharset_mojikyo_pj_18;
95 Lisp_Object Vcharset_mojikyo_pj_19;
96 Lisp_Object Vcharset_mojikyo_pj_20;
97 Lisp_Object Vcharset_mojikyo_pj_21;
98 Lisp_Object Vcharset_ethiopic_ucs;
100 Lisp_Object Vcharset_chinese_big5_1;
101 Lisp_Object Vcharset_chinese_big5_2;
103 #ifdef ENABLE_COMPOSITE_CHARS
104 Lisp_Object Vcharset_composite;
106 /* Hash tables for composite chars. One maps string representing
107 composed chars to their equivalent chars; one goes the
109 Lisp_Object Vcomposite_char_char2string_hash_table;
110 Lisp_Object Vcomposite_char_string2char_hash_table;
112 static int composite_char_row_next;
113 static int composite_char_col_next;
115 #endif /* ENABLE_COMPOSITE_CHARS */
117 struct charset_lookup *chlook;
119 static const struct lrecord_description charset_lookup_description_1[] = {
120 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
129 static const struct struct_description charset_lookup_description = {
130 sizeof (struct charset_lookup),
131 charset_lookup_description_1
135 /* Table of number of bytes in the string representation of a character
136 indexed by the first byte of that representation.
138 rep_bytes_by_first_byte(c) is more efficient than the equivalent
139 canonical computation:
141 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
143 const Bytecount rep_bytes_by_first_byte[0xA0] =
144 { /* 0x00 - 0x7f are for straight ASCII */
145 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
147 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
148 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
149 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
150 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
151 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
152 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
153 /* 0x80 - 0x8f are for Dimension-1 official charsets */
155 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
157 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
159 /* 0x90 - 0x9d are for Dimension-2 official charsets */
160 /* 0x9e is for Dimension-1 private charsets */
161 /* 0x9f is for Dimension-2 private charsets */
162 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
168 #define BT_UINT8_MIN 0
169 #define BT_UINT8_MAX (UCHAR_MAX - 3)
170 #define BT_UINT8_t (UCHAR_MAX - 2)
171 #define BT_UINT8_nil (UCHAR_MAX - 1)
172 #define BT_UINT8_unbound UCHAR_MAX
174 INLINE_HEADER int INT_UINT8_P (Lisp_Object obj);
175 INLINE_HEADER int UINT8_VALUE_P (Lisp_Object obj);
176 INLINE_HEADER unsigned char UINT8_ENCODE (Lisp_Object obj);
177 INLINE_HEADER Lisp_Object UINT8_DECODE (unsigned char n);
178 INLINE_HEADER unsigned short UINT8_TO_UINT16 (unsigned char n);
181 INT_UINT8_P (Lisp_Object obj)
185 int num = XINT (obj);
187 return (BT_UINT8_MIN <= num) && (num <= BT_UINT8_MAX);
194 UINT8_VALUE_P (Lisp_Object obj)
196 return EQ (obj, Qunbound)
197 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT8_P (obj);
200 INLINE_HEADER unsigned char
201 UINT8_ENCODE (Lisp_Object obj)
203 if (EQ (obj, Qunbound))
204 return BT_UINT8_unbound;
205 else if (EQ (obj, Qnil))
207 else if (EQ (obj, Qt))
213 INLINE_HEADER Lisp_Object
214 UINT8_DECODE (unsigned char n)
216 if (n == BT_UINT8_unbound)
218 else if (n == BT_UINT8_nil)
220 else if (n == BT_UINT8_t)
227 mark_uint8_byte_table (Lisp_Object obj)
233 print_uint8_byte_table (Lisp_Object obj,
234 Lisp_Object printcharfun, int escapeflag)
236 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
238 struct gcpro gcpro1, gcpro2;
239 GCPRO2 (obj, printcharfun);
241 write_c_string ("\n#<uint8-byte-table", printcharfun);
242 for (i = 0; i < 256; i++)
244 unsigned char n = bte->property[i];
246 write_c_string ("\n ", printcharfun);
247 write_c_string (" ", printcharfun);
248 if (n == BT_UINT8_unbound)
249 write_c_string ("void", printcharfun);
250 else if (n == BT_UINT8_nil)
251 write_c_string ("nil", printcharfun);
252 else if (n == BT_UINT8_t)
253 write_c_string ("t", printcharfun);
258 sprintf (buf, "%hd", n);
259 write_c_string (buf, printcharfun);
263 write_c_string (">", printcharfun);
267 uint8_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
269 Lisp_Uint8_Byte_Table *te1 = XUINT8_BYTE_TABLE (obj1);
270 Lisp_Uint8_Byte_Table *te2 = XUINT8_BYTE_TABLE (obj2);
273 for (i = 0; i < 256; i++)
274 if (te1->property[i] != te2->property[i])
280 uint8_byte_table_hash (Lisp_Object obj, int depth)
282 Lisp_Uint8_Byte_Table *te = XUINT8_BYTE_TABLE (obj);
286 for (i = 0; i < 256; i++)
287 hash = HASH2 (hash, te->property[i]);
291 DEFINE_LRECORD_IMPLEMENTATION ("uint8-byte-table", uint8_byte_table,
292 mark_uint8_byte_table,
293 print_uint8_byte_table,
294 0, uint8_byte_table_equal,
295 uint8_byte_table_hash,
296 0 /* uint8_byte_table_description */,
297 Lisp_Uint8_Byte_Table);
300 make_uint8_byte_table (unsigned char initval)
304 Lisp_Uint8_Byte_Table *cte;
306 cte = alloc_lcrecord_type (Lisp_Uint8_Byte_Table,
307 &lrecord_uint8_byte_table);
309 for (i = 0; i < 256; i++)
310 cte->property[i] = initval;
312 XSETUINT8_BYTE_TABLE (obj, cte);
317 uint8_byte_table_same_value_p (Lisp_Object obj)
319 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
320 unsigned char v0 = bte->property[0];
323 for (i = 1; i < 256; i++)
325 if (bte->property[i] != v0)
332 #define BT_UINT16_MIN 0
333 #define BT_UINT16_MAX (USHRT_MAX - 3)
334 #define BT_UINT16_t (USHRT_MAX - 2)
335 #define BT_UINT16_nil (USHRT_MAX - 1)
336 #define BT_UINT16_unbound USHRT_MAX
338 INLINE_HEADER int INT_UINT16_P (Lisp_Object obj);
339 INLINE_HEADER int UINT16_VALUE_P (Lisp_Object obj);
340 INLINE_HEADER unsigned short UINT16_ENCODE (Lisp_Object obj);
341 INLINE_HEADER Lisp_Object UINT16_DECODE (unsigned short us);
344 INT_UINT16_P (Lisp_Object obj)
348 int num = XINT (obj);
350 return (BT_UINT16_MIN <= num) && (num <= BT_UINT16_MAX);
357 UINT16_VALUE_P (Lisp_Object obj)
359 return EQ (obj, Qunbound)
360 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT16_P (obj);
363 INLINE_HEADER unsigned short
364 UINT16_ENCODE (Lisp_Object obj)
366 if (EQ (obj, Qunbound))
367 return BT_UINT16_unbound;
368 else if (EQ (obj, Qnil))
369 return BT_UINT16_nil;
370 else if (EQ (obj, Qt))
376 INLINE_HEADER Lisp_Object
377 UINT16_DECODE (unsigned short n)
379 if (n == BT_UINT16_unbound)
381 else if (n == BT_UINT16_nil)
383 else if (n == BT_UINT16_t)
389 INLINE_HEADER unsigned short
390 UINT8_TO_UINT16 (unsigned char n)
392 if (n == BT_UINT8_unbound)
393 return BT_UINT16_unbound;
394 else if (n == BT_UINT8_nil)
395 return BT_UINT16_nil;
396 else if (n == BT_UINT8_t)
403 mark_uint16_byte_table (Lisp_Object obj)
409 print_uint16_byte_table (Lisp_Object obj,
410 Lisp_Object printcharfun, int escapeflag)
412 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
414 struct gcpro gcpro1, gcpro2;
415 GCPRO2 (obj, printcharfun);
417 write_c_string ("\n#<uint16-byte-table", printcharfun);
418 for (i = 0; i < 256; i++)
420 unsigned short n = bte->property[i];
422 write_c_string ("\n ", printcharfun);
423 write_c_string (" ", printcharfun);
424 if (n == BT_UINT16_unbound)
425 write_c_string ("void", printcharfun);
426 else if (n == BT_UINT16_nil)
427 write_c_string ("nil", printcharfun);
428 else if (n == BT_UINT16_t)
429 write_c_string ("t", printcharfun);
434 sprintf (buf, "%hd", n);
435 write_c_string (buf, printcharfun);
439 write_c_string (">", printcharfun);
443 uint16_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
445 Lisp_Uint16_Byte_Table *te1 = XUINT16_BYTE_TABLE (obj1);
446 Lisp_Uint16_Byte_Table *te2 = XUINT16_BYTE_TABLE (obj2);
449 for (i = 0; i < 256; i++)
450 if (te1->property[i] != te2->property[i])
456 uint16_byte_table_hash (Lisp_Object obj, int depth)
458 Lisp_Uint16_Byte_Table *te = XUINT16_BYTE_TABLE (obj);
462 for (i = 0; i < 256; i++)
463 hash = HASH2 (hash, te->property[i]);
467 DEFINE_LRECORD_IMPLEMENTATION ("uint16-byte-table", uint16_byte_table,
468 mark_uint16_byte_table,
469 print_uint16_byte_table,
470 0, uint16_byte_table_equal,
471 uint16_byte_table_hash,
472 0 /* uint16_byte_table_description */,
473 Lisp_Uint16_Byte_Table);
476 make_uint16_byte_table (unsigned short initval)
480 Lisp_Uint16_Byte_Table *cte;
482 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
483 &lrecord_uint16_byte_table);
485 for (i = 0; i < 256; i++)
486 cte->property[i] = initval;
488 XSETUINT16_BYTE_TABLE (obj, cte);
493 expand_uint8_byte_table_to_uint16 (Lisp_Object table)
497 Lisp_Uint8_Byte_Table* bte = XUINT8_BYTE_TABLE(table);
498 Lisp_Uint16_Byte_Table* cte;
500 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
501 &lrecord_uint16_byte_table);
502 for (i = 0; i < 256; i++)
504 cte->property[i] = UINT8_TO_UINT16 (bte->property[i]);
506 XSETUINT16_BYTE_TABLE (obj, cte);
511 uint16_byte_table_same_value_p (Lisp_Object obj)
513 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
514 unsigned short v0 = bte->property[0];
517 for (i = 1; i < 256; i++)
519 if (bte->property[i] != v0)
527 mark_byte_table (Lisp_Object obj)
529 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
532 for (i = 0; i < 256; i++)
534 mark_object (cte->property[i]);
540 print_byte_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
542 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
544 struct gcpro gcpro1, gcpro2;
545 GCPRO2 (obj, printcharfun);
547 write_c_string ("\n#<byte-table", printcharfun);
548 for (i = 0; i < 256; i++)
550 Lisp_Object elt = bte->property[i];
552 write_c_string ("\n ", printcharfun);
553 write_c_string (" ", printcharfun);
554 if (EQ (elt, Qunbound))
555 write_c_string ("void", printcharfun);
557 print_internal (elt, printcharfun, escapeflag);
560 write_c_string (">", printcharfun);
564 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
566 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
567 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
570 for (i = 0; i < 256; i++)
571 if (BYTE_TABLE_P (cte1->property[i]))
573 if (BYTE_TABLE_P (cte2->property[i]))
575 if (!byte_table_equal (cte1->property[i],
576 cte2->property[i], depth + 1))
583 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
589 byte_table_hash (Lisp_Object obj, int depth)
591 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
593 return internal_array_hash (cte->property, 256, depth);
596 static const struct lrecord_description byte_table_description[] = {
597 { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Byte_Table, property), 256 },
601 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
606 byte_table_description,
610 make_byte_table (Lisp_Object initval)
614 Lisp_Byte_Table *cte;
616 cte = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
618 for (i = 0; i < 256; i++)
619 cte->property[i] = initval;
621 XSETBYTE_TABLE (obj, cte);
626 byte_table_same_value_p (Lisp_Object obj)
628 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
629 Lisp_Object v0 = bte->property[0];
632 for (i = 1; i < 256; i++)
634 if (!internal_equal (bte->property[i], v0, 0))
641 Lisp_Object get_byte_table (Lisp_Object table, unsigned char idx);
642 Lisp_Object put_byte_table (Lisp_Object table, unsigned char idx,
646 get_byte_table (Lisp_Object table, unsigned char idx)
648 if (UINT8_BYTE_TABLE_P (table))
649 return UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[idx]);
650 else if (UINT16_BYTE_TABLE_P (table))
651 return UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[idx]);
652 else if (BYTE_TABLE_P (table))
653 return XBYTE_TABLE(table)->property[idx];
659 put_byte_table (Lisp_Object table, unsigned char idx, Lisp_Object value)
661 if (UINT8_BYTE_TABLE_P (table))
663 if (UINT8_VALUE_P (value))
665 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
666 if (!UINT8_BYTE_TABLE_P (value) &&
667 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
668 && uint8_byte_table_same_value_p (table))
673 else if (UINT16_VALUE_P (value))
675 Lisp_Object new = expand_uint8_byte_table_to_uint16 (table);
677 XUINT16_BYTE_TABLE(new)->property[idx] = UINT16_ENCODE (value);
682 Lisp_Object new = make_byte_table (Qnil);
685 for (i = 0; i < 256; i++)
687 XBYTE_TABLE(new)->property[i]
688 = UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[i]);
690 XBYTE_TABLE(new)->property[idx] = value;
694 else if (UINT16_BYTE_TABLE_P (table))
696 if (UINT16_VALUE_P (value))
698 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
699 if (!UINT8_BYTE_TABLE_P (value) &&
700 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
701 && uint16_byte_table_same_value_p (table))
708 Lisp_Object new = make_byte_table (Qnil);
711 for (i = 0; i < 256; i++)
713 XBYTE_TABLE(new)->property[i]
714 = UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[i]);
716 XBYTE_TABLE(new)->property[idx] = value;
720 else if (BYTE_TABLE_P (table))
722 XBYTE_TABLE(table)->property[idx] = value;
723 if (!UINT8_BYTE_TABLE_P (value) &&
724 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
725 && byte_table_same_value_p (table))
730 else if (!internal_equal (table, value, 0))
732 if (UINT8_VALUE_P (table) && UINT8_VALUE_P (value))
734 table = make_uint8_byte_table (UINT8_ENCODE (table));
735 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
737 else if (UINT16_VALUE_P (table) && UINT16_VALUE_P (value))
739 table = make_uint16_byte_table (UINT16_ENCODE (table));
740 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
744 table = make_byte_table (table);
745 XBYTE_TABLE(table)->property[idx] = value;
752 mark_char_id_table (Lisp_Object obj)
754 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
760 print_char_id_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
762 Lisp_Object table = XCHAR_ID_TABLE (obj)->table;
764 struct gcpro gcpro1, gcpro2;
765 GCPRO2 (obj, printcharfun);
767 write_c_string ("#<char-id-table ", printcharfun);
768 for (i = 0; i < 256; i++)
770 Lisp_Object elt = get_byte_table (table, i);
771 if (i != 0) write_c_string ("\n ", printcharfun);
772 if (EQ (elt, Qunbound))
773 write_c_string ("void", printcharfun);
775 print_internal (elt, printcharfun, escapeflag);
778 write_c_string (">", printcharfun);
782 char_id_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
784 Lisp_Object table1 = XCHAR_ID_TABLE (obj1)->table;
785 Lisp_Object table2 = XCHAR_ID_TABLE (obj2)->table;
788 for (i = 0; i < 256; i++)
790 if (!internal_equal (get_byte_table (table1, i),
791 get_byte_table (table2, i), 0))
798 char_id_table_hash (Lisp_Object obj, int depth)
800 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
802 return char_id_table_hash (cte->table, depth + 1);
805 static const struct lrecord_description char_id_table_description[] = {
806 { XD_LISP_OBJECT, offsetof(Lisp_Char_ID_Table, table) },
810 DEFINE_LRECORD_IMPLEMENTATION ("char-id-table", char_id_table,
813 0, char_id_table_equal,
815 char_id_table_description,
819 make_char_id_table (Lisp_Object initval)
822 Lisp_Char_ID_Table *cte;
824 cte = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
826 cte->table = make_byte_table (initval);
828 XSETCHAR_ID_TABLE (obj, cte);
834 get_char_id_table (Emchar ch, Lisp_Object table)
836 unsigned int code = ch;
843 (XCHAR_ID_TABLE (table)->table,
844 (unsigned char)(code >> 24)),
845 (unsigned char) (code >> 16)),
846 (unsigned char) (code >> 8)),
847 (unsigned char) code);
850 void put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table);
852 put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table)
854 unsigned int code = ch;
855 Lisp_Object table1, table2, table3, table4;
857 table1 = XCHAR_ID_TABLE (table)->table;
858 table2 = get_byte_table (table1, (unsigned char)(code >> 24));
859 table3 = get_byte_table (table2, (unsigned char)(code >> 16));
860 table4 = get_byte_table (table3, (unsigned char)(code >> 8));
862 table4 = put_byte_table (table4, (unsigned char)code, value);
863 table3 = put_byte_table (table3, (unsigned char)(code >> 8), table4);
864 table2 = put_byte_table (table2, (unsigned char)(code >> 16), table3);
865 XCHAR_ID_TABLE (table)->table
866 = put_byte_table (table1, (unsigned char)(code >> 24), table2);
870 Lisp_Object Vchar_attribute_hash_table;
871 Lisp_Object Vcharacter_composition_table;
872 Lisp_Object Vcharacter_variant_table;
874 Lisp_Object Qideograph_daikanwa;
875 Lisp_Object Q_decomposition;
879 Lisp_Object Qisolated;
880 Lisp_Object Qinitial;
883 Lisp_Object Qvertical;
884 Lisp_Object QnoBreak;
885 Lisp_Object Qfraction;
895 Emchar to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
897 Lisp_Object put_char_ccs_code_point (Lisp_Object character,
898 Lisp_Object ccs, Lisp_Object value);
899 Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
902 to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
908 else if (EQ (v, Qcompat))
910 else if (EQ (v, Qisolated))
912 else if (EQ (v, Qinitial))
914 else if (EQ (v, Qmedial))
916 else if (EQ (v, Qfinal))
918 else if (EQ (v, Qvertical))
920 else if (EQ (v, QnoBreak))
922 else if (EQ (v, Qfraction))
924 else if (EQ (v, Qsuper))
926 else if (EQ (v, Qsub))
928 else if (EQ (v, Qcircle))
930 else if (EQ (v, Qsquare))
932 else if (EQ (v, Qwide))
934 else if (EQ (v, Qnarrow))
936 else if (EQ (v, Qsmall))
938 else if (EQ (v, Qfont))
941 signal_simple_error (err_msg, err_arg);
944 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
945 Return character corresponding with list.
949 Lisp_Object table = Vcharacter_composition_table;
950 Lisp_Object rest = list;
954 Lisp_Object v = Fcar (rest);
956 Emchar c = to_char_id (v, "Invalid value for composition", list);
958 ret = get_char_id_table (c, table);
963 if (!CHAR_ID_TABLE_P (ret))
968 else if (!CONSP (rest))
970 else if (CHAR_ID_TABLE_P (ret))
973 signal_simple_error ("Invalid table is found with", list);
975 signal_simple_error ("Invalid value for composition", list);
978 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
979 Return variants of CHARACTER.
983 CHECK_CHAR (character);
984 return Fcopy_list (get_char_id_table (XCHAR (character),
985 Vcharacter_variant_table));
989 /* We store the char-attributes in hash tables with the names as the
990 key and the actual char-id-table object as the value. Occasionally
991 we need to use them in a list format. These routines provide us
993 struct char_attribute_list_closure
995 Lisp_Object *char_attribute_list;
999 add_char_attribute_to_list_mapper (Lisp_Object key, Lisp_Object value,
1000 void *char_attribute_list_closure)
1002 /* This function can GC */
1003 struct char_attribute_list_closure *calcl
1004 = (struct char_attribute_list_closure*) char_attribute_list_closure;
1005 Lisp_Object *char_attribute_list = calcl->char_attribute_list;
1007 *char_attribute_list = Fcons (key, *char_attribute_list);
1011 DEFUN ("char-attribute-list", Fchar_attribute_list, 0, 0, 0, /*
1012 Return the list of all existing character attributes except coded-charsets.
1016 Lisp_Object char_attribute_list = Qnil;
1017 struct gcpro gcpro1;
1018 struct char_attribute_list_closure char_attribute_list_closure;
1020 GCPRO1 (char_attribute_list);
1021 char_attribute_list_closure.char_attribute_list = &char_attribute_list;
1022 elisp_maphash (add_char_attribute_to_list_mapper,
1023 Vchar_attribute_hash_table,
1024 &char_attribute_list_closure);
1026 return char_attribute_list;
1029 DEFUN ("find-char-attribute-table", Ffind_char_attribute_table, 1, 1, 0, /*
1030 Return char-id-table corresponding to ATTRIBUTE.
1034 return Fgethash (attribute, Vchar_attribute_hash_table, Qnil);
1038 /* We store the char-id-tables in hash tables with the attributes as
1039 the key and the actual char-id-table object as the value. Each
1040 char-id-table stores values of an attribute corresponding with
1041 characters. Occasionally we need to get attributes of a character
1042 in a association-list format. These routines provide us with
1044 struct char_attribute_alist_closure
1047 Lisp_Object *char_attribute_alist;
1051 add_char_attribute_alist_mapper (Lisp_Object key, Lisp_Object value,
1052 void *char_attribute_alist_closure)
1054 /* This function can GC */
1055 struct char_attribute_alist_closure *caacl =
1056 (struct char_attribute_alist_closure*) char_attribute_alist_closure;
1057 Lisp_Object ret = get_char_id_table (caacl->char_id, value);
1058 if (!UNBOUNDP (ret))
1060 Lisp_Object *char_attribute_alist = caacl->char_attribute_alist;
1061 *char_attribute_alist
1062 = Fcons (Fcons (key, ret), *char_attribute_alist);
1067 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
1068 Return the alist of attributes of CHARACTER.
1072 Lisp_Object alist = Qnil;
1075 CHECK_CHAR (character);
1077 struct gcpro gcpro1;
1078 struct char_attribute_alist_closure char_attribute_alist_closure;
1081 char_attribute_alist_closure.char_id = XCHAR (character);
1082 char_attribute_alist_closure.char_attribute_alist = &alist;
1083 elisp_maphash (add_char_attribute_alist_mapper,
1084 Vchar_attribute_hash_table,
1085 &char_attribute_alist_closure);
1089 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1091 Lisp_Object ccs = chlook->charset_by_leading_byte[i];
1095 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1098 if ( CHAR_ID_TABLE_P (encoding_table)
1099 && INTP (cpos = get_char_id_table (XCHAR (character),
1102 alist = Fcons (Fcons (ccs, cpos), alist);
1109 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 3, 0, /*
1110 Return the value of CHARACTER's ATTRIBUTE.
1111 Return DEFAULT-VALUE if the value is not exist.
1113 (character, attribute, default_value))
1117 CHECK_CHAR (character);
1118 if (!NILP (ccs = Ffind_charset (attribute)))
1120 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1122 if (CHAR_ID_TABLE_P (encoding_table))
1123 return get_char_id_table (XCHAR (character), encoding_table);
1127 Lisp_Object table = Fgethash (attribute,
1128 Vchar_attribute_hash_table,
1130 if (!UNBOUNDP (table))
1132 Lisp_Object ret = get_char_id_table (XCHAR (character), table);
1133 if (!UNBOUNDP (ret))
1137 return default_value;
1140 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
1141 Store CHARACTER's ATTRIBUTE with VALUE.
1143 (character, attribute, value))
1147 CHECK_CHAR (character);
1148 ccs = Ffind_charset (attribute);
1151 return put_char_ccs_code_point (character, ccs, value);
1153 else if (EQ (attribute, Q_decomposition))
1158 signal_simple_error ("Invalid value for ->decomposition",
1161 if (CONSP (Fcdr (value)))
1163 Lisp_Object rest = value;
1164 Lisp_Object table = Vcharacter_composition_table;
1168 GET_EXTERNAL_LIST_LENGTH (rest, len);
1169 seq = make_vector (len, Qnil);
1171 while (CONSP (rest))
1173 Lisp_Object v = Fcar (rest);
1176 = to_char_id (v, "Invalid value for ->decomposition", value);
1179 XVECTOR_DATA(seq)[i++] = v;
1181 XVECTOR_DATA(seq)[i++] = make_char (c);
1185 put_char_id_table (c, character, table);
1190 ntable = get_char_id_table (c, table);
1191 if (!CHAR_ID_TABLE_P (ntable))
1193 ntable = make_char_id_table (Qnil);
1194 put_char_id_table (c, ntable, table);
1202 Lisp_Object v = Fcar (value);
1206 Emchar c = XINT (v);
1208 = get_char_id_table (c, Vcharacter_variant_table);
1210 if (NILP (Fmemq (v, ret)))
1212 put_char_id_table (c, Fcons (character, ret),
1213 Vcharacter_variant_table);
1216 seq = make_vector (1, v);
1220 else if (EQ (attribute, Q_ucs))
1226 signal_simple_error ("Invalid value for ->ucs", value);
1230 ret = get_char_id_table (c, Vcharacter_variant_table);
1231 if (NILP (Fmemq (character, ret)))
1233 put_char_id_table (c, Fcons (character, ret),
1234 Vcharacter_variant_table);
1238 Lisp_Object table = Fgethash (attribute,
1239 Vchar_attribute_hash_table,
1244 table = make_char_id_table (Qunbound);
1245 Fputhash (attribute, table, Vchar_attribute_hash_table);
1247 put_char_id_table (XCHAR (character), value, table);
1252 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
1253 Remove CHARACTER's ATTRIBUTE.
1255 (character, attribute))
1259 CHECK_CHAR (character);
1260 ccs = Ffind_charset (attribute);
1263 return remove_char_ccs (character, ccs);
1267 Lisp_Object table = Fgethash (attribute,
1268 Vchar_attribute_hash_table,
1270 if (!UNBOUNDP (table))
1272 put_char_id_table (XCHAR (character), Qunbound, table);
1279 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
1281 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
1283 /* ad-hoc method for `ascii' */
1284 if ((CHARSET_CHARS (cs) == 94) &&
1285 (CHARSET_BYTE_OFFSET (cs) != 33))
1286 return 128 - CHARSET_BYTE_OFFSET (cs);
1288 return CHARSET_CHARS (cs);
1291 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
1293 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
1295 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
1299 if (XVECTOR_LENGTH (v) > ccs_len)
1302 for (i = 0; i < XVECTOR_LENGTH (v); i++)
1304 Lisp_Object c = XVECTOR_DATA(v)[i];
1306 if (!NILP (c) && !CHARP (c))
1310 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
1322 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1325 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1335 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1336 nv = XVECTOR_DATA(v)[i];
1342 XVECTOR_DATA(v)[i] = Qnil;
1346 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1347 int code_point, Lisp_Object character);
1349 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1350 int code_point, Lisp_Object character)
1354 int ccs_len = XVECTOR_LENGTH (v);
1359 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1360 nv = XVECTOR_DATA(v)[i];
1364 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
1370 XVECTOR_DATA(v)[i] = character;
1374 put_char_ccs_code_point (Lisp_Object character,
1375 Lisp_Object ccs, Lisp_Object value)
1377 Lisp_Object encoding_table;
1379 if (!EQ (XCHARSET_NAME (ccs), Qucs)
1380 || (XCHAR (character) != XINT (value)))
1382 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
1383 int dim = XCHARSET_DIMENSION (ccs);
1384 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
1385 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
1389 { /* obsolete representation: value must be a list of bytes */
1390 Lisp_Object ret = Fcar (value);
1394 signal_simple_error ("Invalid value for coded-charset", value);
1395 code_point = XINT (ret);
1396 if (XCHARSET_GRAPHIC (ccs) == 1)
1398 rest = Fcdr (value);
1399 while (!NILP (rest))
1404 signal_simple_error ("Invalid value for coded-charset",
1408 signal_simple_error ("Invalid value for coded-charset",
1411 if (XCHARSET_GRAPHIC (ccs) == 1)
1413 code_point = (code_point << 8) | j;
1416 value = make_int (code_point);
1418 else if (INTP (value))
1420 code_point = XINT (value);
1421 if (XCHARSET_GRAPHIC (ccs) == 1)
1423 code_point &= 0x7F7F7F7F;
1424 value = make_int (code_point);
1428 signal_simple_error ("Invalid value for coded-charset", value);
1432 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1435 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
1440 XCHARSET_DECODING_TABLE (ccs)
1441 = v = make_older_vector (ccs_len, Qnil);
1444 decoding_table_put_char (v, dim, byte_offset, code_point, character);
1446 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
1448 XCHARSET_ENCODING_TABLE (ccs)
1449 = encoding_table = make_char_id_table (Qnil);
1451 put_char_id_table (XCHAR (character), value, encoding_table);
1456 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
1458 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1459 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1461 if (VECTORP (decoding_table))
1463 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1467 decoding_table_remove_char (decoding_table,
1468 XCHARSET_DIMENSION (ccs),
1469 XCHARSET_BYTE_OFFSET (ccs),
1473 if (CHAR_ID_TABLE_P (encoding_table))
1475 put_char_id_table (XCHAR (character), Qnil, encoding_table);
1480 EXFUN (Fmake_char, 3);
1481 EXFUN (Fdecode_char, 2);
1483 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
1484 Store character's ATTRIBUTES.
1488 Lisp_Object rest = attributes;
1489 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
1490 Lisp_Object character;
1492 Lisp_Object daikanwa = Qnil;
1497 while (CONSP (rest))
1499 Lisp_Object cell = Fcar (rest);
1503 signal_simple_error ("Invalid argument", attributes);
1504 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
1505 && ((XCHARSET_FINAL (ccs) != 0) ||
1506 (XCHARSET_UCS_MAX (ccs) > 0)) )
1510 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1512 character = Fdecode_char (ccs, cell);
1513 goto setup_attributes;
1517 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
1520 signal_simple_error ("Invalid argument", attributes);
1522 character = make_char (XINT (code) + 0x100000);
1523 goto setup_attributes;
1527 else if (!INTP (code))
1528 signal_simple_error ("Invalid argument", attributes);
1530 character = make_char (XINT (code));
1534 while (CONSP (rest))
1536 Lisp_Object cell = Fcar (rest);
1538 Lisp_Object key = Fcar (cell);
1539 Lisp_Object value = Fcdr (cell);
1543 signal_simple_error ("Invalid argument", attributes);
1546 if (EQ (key, Qmorohashi_daikanwa))
1549 GET_EXTERNAL_LIST_LENGTH (value, len);
1553 if (NILP (daikanwa))
1554 daikanwa = Fcdr (Fassq (Qideograph_daikanwa, rest));
1555 if (EQ (Fcar (value), daikanwa))
1559 else if (EQ (key, Qideograph_daikanwa))
1563 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
1572 Lisp_Object Vutf_2000_version;
1576 int leading_code_private_11;
1579 Lisp_Object Qcharsetp;
1581 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1582 Lisp_Object Qregistry, Qfinal, Qgraphic;
1583 Lisp_Object Qdirection;
1584 Lisp_Object Qreverse_direction_charset;
1585 Lisp_Object Qleading_byte;
1586 Lisp_Object Qshort_name, Qlong_name;
1600 Qcyrillic_iso8859_5,
1602 Qjapanese_jisx0208_1978,
1605 Qjapanese_jisx0208_1990,
1608 Qchinese_cns11643_1,
1609 Qchinese_cns11643_2,
1615 Qlatin_viscii_lower,
1616 Qlatin_viscii_upper,
1617 Qvietnamese_viscii_lower,
1618 Qvietnamese_viscii_upper,
1649 Lisp_Object Ql2r, Qr2l;
1651 Lisp_Object Vcharset_hash_table;
1653 /* Composite characters are characters constructed by overstriking two
1654 or more regular characters.
1656 1) The old Mule implementation involves storing composite characters
1657 in a buffer as a tag followed by all of the actual characters
1658 used to make up the composite character. I think this is a bad
1659 idea; it greatly complicates code that wants to handle strings
1660 one character at a time because it has to deal with the possibility
1661 of great big ungainly characters. It's much more reasonable to
1662 simply store an index into a table of composite characters.
1664 2) The current implementation only allows for 16,384 separate
1665 composite characters over the lifetime of the XEmacs process.
1666 This could become a potential problem if the user
1667 edited lots of different files that use composite characters.
1668 Due to FSF bogosity, increasing the number of allowable
1669 composite characters under Mule would decrease the number
1670 of possible faces that can exist. Mule already has shrunk
1671 this to 2048, and further shrinkage would become uncomfortable.
1672 No such problems exist in XEmacs.
1674 Composite characters could be represented as 0x80 C1 C2 C3,
1675 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1676 for slightly under 2^20 (one million) composite characters
1677 over the XEmacs process lifetime, and you only need to
1678 increase the size of a Mule character from 19 to 21 bits.
1679 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1680 85 million (slightly over 2^26) composite characters. */
1683 /************************************************************************/
1684 /* Basic Emchar functions */
1685 /************************************************************************/
1687 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1688 string in STR. Returns the number of bytes stored.
1689 Do not call this directly. Use the macro set_charptr_emchar() instead.
1693 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1699 Lisp_Object charset;
1708 else if ( c <= 0x7ff )
1710 *p++ = (c >> 6) | 0xc0;
1711 *p++ = (c & 0x3f) | 0x80;
1713 else if ( c <= 0xffff )
1715 *p++ = (c >> 12) | 0xe0;
1716 *p++ = ((c >> 6) & 0x3f) | 0x80;
1717 *p++ = (c & 0x3f) | 0x80;
1719 else if ( c <= 0x1fffff )
1721 *p++ = (c >> 18) | 0xf0;
1722 *p++ = ((c >> 12) & 0x3f) | 0x80;
1723 *p++ = ((c >> 6) & 0x3f) | 0x80;
1724 *p++ = (c & 0x3f) | 0x80;
1726 else if ( c <= 0x3ffffff )
1728 *p++ = (c >> 24) | 0xf8;
1729 *p++ = ((c >> 18) & 0x3f) | 0x80;
1730 *p++ = ((c >> 12) & 0x3f) | 0x80;
1731 *p++ = ((c >> 6) & 0x3f) | 0x80;
1732 *p++ = (c & 0x3f) | 0x80;
1736 *p++ = (c >> 30) | 0xfc;
1737 *p++ = ((c >> 24) & 0x3f) | 0x80;
1738 *p++ = ((c >> 18) & 0x3f) | 0x80;
1739 *p++ = ((c >> 12) & 0x3f) | 0x80;
1740 *p++ = ((c >> 6) & 0x3f) | 0x80;
1741 *p++ = (c & 0x3f) | 0x80;
1744 BREAKUP_CHAR (c, charset, c1, c2);
1745 lb = CHAR_LEADING_BYTE (c);
1746 if (LEADING_BYTE_PRIVATE_P (lb))
1747 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1749 if (EQ (charset, Vcharset_control_1))
1758 /* Return the first character from a Mule-encoded string in STR,
1759 assuming it's non-ASCII. Do not call this directly.
1760 Use the macro charptr_emchar() instead. */
1763 non_ascii_charptr_emchar (const Bufbyte *str)
1776 else if ( b >= 0xf8 )
1781 else if ( b >= 0xf0 )
1786 else if ( b >= 0xe0 )
1791 else if ( b >= 0xc0 )
1801 for( ; len > 0; len-- )
1804 ch = ( ch << 6 ) | ( b & 0x3f );
1808 Bufbyte i0 = *str, i1, i2 = 0;
1809 Lisp_Object charset;
1811 if (i0 == LEADING_BYTE_CONTROL_1)
1812 return (Emchar) (*++str - 0x20);
1814 if (LEADING_BYTE_PREFIX_P (i0))
1819 charset = CHARSET_BY_LEADING_BYTE (i0);
1820 if (XCHARSET_DIMENSION (charset) == 2)
1823 return MAKE_CHAR (charset, i1, i2);
1827 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1828 Do not call this directly. Use the macro valid_char_p() instead. */
1832 non_ascii_valid_char_p (Emchar ch)
1836 /* Must have only lowest 19 bits set */
1840 f1 = CHAR_FIELD1 (ch);
1841 f2 = CHAR_FIELD2 (ch);
1842 f3 = CHAR_FIELD3 (ch);
1846 Lisp_Object charset;
1848 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1849 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1850 f2 > MAX_CHAR_FIELD2_PRIVATE)
1855 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1856 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1860 NOTE: This takes advantage of the fact that
1861 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1862 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1864 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1865 if (EQ (charset, Qnil))
1867 return (XCHARSET_CHARS (charset) == 96);
1871 Lisp_Object charset;
1873 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1874 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1875 f1 > MAX_CHAR_FIELD1_PRIVATE)
1877 if (f2 < 0x20 || f3 < 0x20)
1880 #ifdef ENABLE_COMPOSITE_CHARS
1881 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1883 if (UNBOUNDP (Fgethash (make_int (ch),
1884 Vcomposite_char_char2string_hash_table,
1889 #endif /* ENABLE_COMPOSITE_CHARS */
1891 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1892 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1895 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1897 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1900 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1902 if (EQ (charset, Qnil))
1904 return (XCHARSET_CHARS (charset) == 96);
1910 /************************************************************************/
1911 /* Basic string functions */
1912 /************************************************************************/
1914 /* Copy the character pointed to by SRC into DST. Do not call this
1915 directly. Use the macro charptr_copy_char() instead.
1916 Return the number of bytes copied. */
1919 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
1921 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
1923 for (i = bytes; i; i--, dst++, src++)
1929 /************************************************************************/
1930 /* streams of Emchars */
1931 /************************************************************************/
1933 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1934 The functions below are not meant to be called directly; use
1935 the macros in insdel.h. */
1938 Lstream_get_emchar_1 (Lstream *stream, int ch)
1940 Bufbyte str[MAX_EMCHAR_LEN];
1941 Bufbyte *strptr = str;
1944 str[0] = (Bufbyte) ch;
1946 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
1948 int c = Lstream_getc (stream);
1949 bufpos_checking_assert (c >= 0);
1950 *++strptr = (Bufbyte) c;
1952 return charptr_emchar (str);
1956 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1958 Bufbyte str[MAX_EMCHAR_LEN];
1959 Bytecount len = set_charptr_emchar (str, ch);
1960 return Lstream_write (stream, str, len);
1964 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1966 Bufbyte str[MAX_EMCHAR_LEN];
1967 Bytecount len = set_charptr_emchar (str, ch);
1968 Lstream_unread (stream, str, len);
1972 /************************************************************************/
1973 /* charset object */
1974 /************************************************************************/
1977 mark_charset (Lisp_Object obj)
1979 Lisp_Charset *cs = XCHARSET (obj);
1981 mark_object (cs->short_name);
1982 mark_object (cs->long_name);
1983 mark_object (cs->doc_string);
1984 mark_object (cs->registry);
1985 mark_object (cs->ccl_program);
1987 mark_object (cs->encoding_table);
1988 /* mark_object (cs->decoding_table); */
1994 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1996 Lisp_Charset *cs = XCHARSET (obj);
2000 error ("printing unreadable object #<charset %s 0x%x>",
2001 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
2004 write_c_string ("#<charset ", printcharfun);
2005 print_internal (CHARSET_NAME (cs), printcharfun, 0);
2006 write_c_string (" ", printcharfun);
2007 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
2008 write_c_string (" ", printcharfun);
2009 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
2010 write_c_string (" ", printcharfun);
2011 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
2012 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
2014 CHARSET_DIMENSION (cs),
2015 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
2016 CHARSET_COLUMNS (cs),
2017 CHARSET_GRAPHIC (cs),
2018 CHARSET_FINAL (cs));
2019 write_c_string (buf, printcharfun);
2020 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
2021 sprintf (buf, " 0x%x>", cs->header.uid);
2022 write_c_string (buf, printcharfun);
2025 static const struct lrecord_description charset_description[] = {
2026 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
2027 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
2028 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
2029 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
2030 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
2031 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
2032 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
2034 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
2035 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
2040 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
2041 mark_charset, print_charset, 0, 0, 0,
2042 charset_description,
2045 /* Make a new charset. */
2046 /* #### SJT Should generic properties be allowed? */
2048 make_charset (Charset_ID id, Lisp_Object name,
2049 unsigned short chars, unsigned char dimension,
2050 unsigned char columns, unsigned char graphic,
2051 Bufbyte final, unsigned char direction, Lisp_Object short_name,
2052 Lisp_Object long_name, Lisp_Object doc,
2054 Lisp_Object decoding_table,
2055 Emchar ucs_min, Emchar ucs_max,
2056 Emchar code_offset, unsigned char byte_offset)
2059 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
2063 XSETCHARSET (obj, cs);
2065 CHARSET_ID (cs) = id;
2066 CHARSET_NAME (cs) = name;
2067 CHARSET_SHORT_NAME (cs) = short_name;
2068 CHARSET_LONG_NAME (cs) = long_name;
2069 CHARSET_CHARS (cs) = chars;
2070 CHARSET_DIMENSION (cs) = dimension;
2071 CHARSET_DIRECTION (cs) = direction;
2072 CHARSET_COLUMNS (cs) = columns;
2073 CHARSET_GRAPHIC (cs) = graphic;
2074 CHARSET_FINAL (cs) = final;
2075 CHARSET_DOC_STRING (cs) = doc;
2076 CHARSET_REGISTRY (cs) = reg;
2077 CHARSET_CCL_PROGRAM (cs) = Qnil;
2078 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
2080 CHARSET_DECODING_TABLE(cs) = Qnil;
2081 CHARSET_ENCODING_TABLE(cs) = Qnil;
2082 CHARSET_UCS_MIN(cs) = ucs_min;
2083 CHARSET_UCS_MAX(cs) = ucs_max;
2084 CHARSET_CODE_OFFSET(cs) = code_offset;
2085 CHARSET_BYTE_OFFSET(cs) = byte_offset;
2089 if (id == LEADING_BYTE_ASCII)
2090 CHARSET_REP_BYTES (cs) = 1;
2092 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
2094 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
2099 /* some charsets do not have final characters. This includes
2100 ASCII, Control-1, Composite, and the two faux private
2102 unsigned char iso2022_type
2103 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
2105 if (code_offset == 0)
2107 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
2108 chlook->charset_by_attributes[iso2022_type][final] = obj;
2112 (chlook->charset_by_attributes[iso2022_type][final][direction]));
2113 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
2117 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
2118 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
2120 /* Some charsets are "faux" and don't have names or really exist at
2121 all except in the leading-byte table. */
2123 Fputhash (name, obj, Vcharset_hash_table);
2128 get_unallocated_leading_byte (int dimension)
2133 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
2136 lb = chlook->next_allocated_leading_byte++;
2140 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
2143 lb = chlook->next_allocated_1_byte_leading_byte++;
2147 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
2150 lb = chlook->next_allocated_2_byte_leading_byte++;
2156 ("No more character sets free for this dimension",
2157 make_int (dimension));
2163 /* Number of Big5 characters which have the same code in 1st byte. */
2165 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2168 make_builtin_char (Lisp_Object charset, int c1, int c2)
2170 if (XCHARSET_UCS_MAX (charset))
2173 = (XCHARSET_DIMENSION (charset) == 1
2175 c1 - XCHARSET_BYTE_OFFSET (charset)
2177 (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
2178 + c2 - XCHARSET_BYTE_OFFSET (charset))
2179 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
2180 if ((code < XCHARSET_UCS_MIN (charset))
2181 || (XCHARSET_UCS_MAX (charset) < code))
2182 signal_simple_error ("Arguments makes invalid character",
2186 else if (XCHARSET_DIMENSION (charset) == 1)
2188 switch (XCHARSET_CHARS (charset))
2192 + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
2195 + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
2202 if (EQ (charset, Vcharset_chinese_big5))
2204 int B1 = c1, B2 = c2;
2206 = (B1 - 0xA1) * BIG5_SAME_ROW
2207 + B2 - (B2 < 0x7F ? 0x40 : 0x62);
2211 charset = Vcharset_chinese_big5_1;
2215 charset = Vcharset_chinese_big5_2;
2216 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
2221 switch (XCHARSET_CHARS (charset))
2224 return MIN_CHAR_94x94
2225 + (XCHARSET_FINAL (charset) - '0') * 94 * 94
2226 + (c1 - 33) * 94 + (c2 - 33);
2228 return MIN_CHAR_96x96
2229 + (XCHARSET_FINAL (charset) - '0') * 96 * 96
2230 + (c1 - 32) * 96 + (c2 - 32);
2238 range_charset_code_point (Lisp_Object charset, Emchar ch)
2242 if ((XCHARSET_UCS_MIN (charset) <= ch)
2243 && (ch <= XCHARSET_UCS_MAX (charset)))
2245 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
2247 if (XCHARSET_CHARS (charset) == 256)
2249 else if (XCHARSET_DIMENSION (charset) == 1)
2250 return d + XCHARSET_BYTE_OFFSET (charset);
2251 else if (XCHARSET_DIMENSION (charset) == 2)
2253 ((d / XCHARSET_CHARS (charset)
2254 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2255 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2256 else if (XCHARSET_DIMENSION (charset) == 3)
2258 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2259 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2260 | ((d / XCHARSET_CHARS (charset)
2261 % XCHARSET_CHARS (charset)
2262 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2263 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2264 else /* if (XCHARSET_DIMENSION (charset) == 4) */
2266 ((d / (XCHARSET_CHARS (charset)
2267 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2268 + XCHARSET_BYTE_OFFSET (charset)) << 24)
2269 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2270 % XCHARSET_CHARS (charset)
2271 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2272 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
2273 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2274 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2276 else if (XCHARSET_CODE_OFFSET (charset) == 0)
2278 if (XCHARSET_DIMENSION (charset) == 1)
2280 if (XCHARSET_CHARS (charset) == 94)
2282 if (((d = ch - (MIN_CHAR_94
2283 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
2287 else if (XCHARSET_CHARS (charset) == 96)
2289 if (((d = ch - (MIN_CHAR_96
2290 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
2297 else if (XCHARSET_DIMENSION (charset) == 2)
2299 if (XCHARSET_CHARS (charset) == 94)
2301 if (((d = ch - (MIN_CHAR_94x94
2302 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
2305 return (((d / 94) + 33) << 8) | (d % 94 + 33);
2307 else if (XCHARSET_CHARS (charset) == 96)
2309 if (((d = ch - (MIN_CHAR_96x96
2310 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
2313 return (((d / 96) + 32) << 8) | (d % 96 + 32);
2319 if (EQ (charset, Vcharset_mojikyo_2022_1)
2320 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
2322 int m = ch - MIN_CHAR_MOJIKYO - 1;
2323 int byte1 = m / (94 * 60) + 33;
2324 int byte2 = (m % (94 * 60)) / 94;
2325 int byte3 = m % 94 + 33;
2331 return (byte1 << 16) | (byte2 << 8) | byte3;
2337 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
2339 if (c <= MAX_CHAR_BASIC_LATIN)
2341 *charset = Vcharset_ascii;
2346 *charset = Vcharset_control_1;
2351 *charset = Vcharset_latin_iso8859_1;
2355 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
2357 *charset = Vcharset_greek_iso8859_7;
2358 return c - MIN_CHAR_GREEK + 0x20;
2360 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
2362 *charset = Vcharset_cyrillic_iso8859_5;
2363 return c - MIN_CHAR_CYRILLIC + 0x20;
2366 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
2368 *charset = Vcharset_hebrew_iso8859_8;
2369 return c - MIN_CHAR_HEBREW + 0x20;
2371 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
2373 *charset = Vcharset_thai_tis620;
2374 return c - MIN_CHAR_THAI + 0x20;
2377 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
2378 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
2380 return list2 (Vcharset_katakana_jisx0201,
2381 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
2384 else if (c <= MAX_CHAR_BMP)
2386 *charset = Vcharset_ucs_bmp;
2389 else if (c < MIN_CHAR_DAIKANWA)
2391 *charset = Vcharset_ucs;
2395 else if (c <= MAX_CHAR_DAIKANWA)
2397 *charset = Vcharset_ideograph_daikanwa;
2398 return c - MIN_CHAR_DAIKANWA;
2401 else if (c <= MAX_CHAR_MOJIKYO)
2403 *charset = Vcharset_mojikyo;
2404 return c - MIN_CHAR_MOJIKYO;
2406 else if (c < MIN_CHAR_94)
2408 *charset = Vcharset_ucs;
2411 else if (c <= MAX_CHAR_94)
2413 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
2414 ((c - MIN_CHAR_94) / 94) + '0',
2415 CHARSET_LEFT_TO_RIGHT);
2416 if (!NILP (*charset))
2417 return ((c - MIN_CHAR_94) % 94) + 33;
2420 *charset = Vcharset_ucs;
2424 else if (c <= MAX_CHAR_96)
2426 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
2427 ((c - MIN_CHAR_96) / 96) + '0',
2428 CHARSET_LEFT_TO_RIGHT);
2429 if (!NILP (*charset))
2430 return ((c - MIN_CHAR_96) % 96) + 32;
2433 *charset = Vcharset_ucs;
2437 else if (c <= MAX_CHAR_94x94)
2440 = CHARSET_BY_ATTRIBUTES (94, 2,
2441 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
2442 CHARSET_LEFT_TO_RIGHT);
2443 if (!NILP (*charset))
2444 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
2445 | (((c - MIN_CHAR_94x94) % 94) + 33);
2448 *charset = Vcharset_ucs;
2452 else if (c <= MAX_CHAR_96x96)
2455 = CHARSET_BY_ATTRIBUTES (96, 2,
2456 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
2457 CHARSET_LEFT_TO_RIGHT);
2458 if (!NILP (*charset))
2459 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
2460 | (((c - MIN_CHAR_96x96) % 96) + 32);
2463 *charset = Vcharset_ucs;
2469 *charset = Vcharset_ucs;
2474 Lisp_Object Vdefault_coded_charset_priority_list;
2478 /************************************************************************/
2479 /* Basic charset Lisp functions */
2480 /************************************************************************/
2482 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
2483 Return non-nil if OBJECT is a charset.
2487 return CHARSETP (object) ? Qt : Qnil;
2490 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
2491 Retrieve the charset of the given name.
2492 If CHARSET-OR-NAME is a charset object, it is simply returned.
2493 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
2494 nil is returned. Otherwise the associated charset object is returned.
2498 if (CHARSETP (charset_or_name))
2499 return charset_or_name;
2501 CHECK_SYMBOL (charset_or_name);
2502 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
2505 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
2506 Retrieve the charset of the given name.
2507 Same as `find-charset' except an error is signalled if there is no such
2508 charset instead of returning nil.
2512 Lisp_Object charset = Ffind_charset (name);
2515 signal_simple_error ("No such charset", name);
2519 /* We store the charsets in hash tables with the names as the key and the
2520 actual charset object as the value. Occasionally we need to use them
2521 in a list format. These routines provide us with that. */
2522 struct charset_list_closure
2524 Lisp_Object *charset_list;
2528 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
2529 void *charset_list_closure)
2531 /* This function can GC */
2532 struct charset_list_closure *chcl =
2533 (struct charset_list_closure*) charset_list_closure;
2534 Lisp_Object *charset_list = chcl->charset_list;
2536 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
2540 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
2541 Return a list of the names of all defined charsets.
2545 Lisp_Object charset_list = Qnil;
2546 struct gcpro gcpro1;
2547 struct charset_list_closure charset_list_closure;
2549 GCPRO1 (charset_list);
2550 charset_list_closure.charset_list = &charset_list;
2551 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
2552 &charset_list_closure);
2555 return charset_list;
2558 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
2559 Return the name of charset CHARSET.
2563 return XCHARSET_NAME (Fget_charset (charset));
2566 /* #### SJT Should generic properties be allowed? */
2567 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
2568 Define a new character set.
2569 This function is for use with Mule support.
2570 NAME is a symbol, the name by which the character set is normally referred.
2571 DOC-STRING is a string describing the character set.
2572 PROPS is a property list, describing the specific nature of the
2573 character set. Recognized properties are:
2575 'short-name Short version of the charset name (ex: Latin-1)
2576 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
2577 'registry A regular expression matching the font registry field for
2579 'dimension Number of octets used to index a character in this charset.
2580 Either 1 or 2. Defaults to 1.
2581 'columns Number of columns used to display a character in this charset.
2582 Only used in TTY mode. (Under X, the actual width of a
2583 character can be derived from the font used to display the
2584 characters.) If unspecified, defaults to the dimension
2585 (this is almost always the correct value).
2586 'chars Number of characters in each dimension (94 or 96).
2587 Defaults to 94. Note that if the dimension is 2, the
2588 character set thus described is 94x94 or 96x96.
2589 'final Final byte of ISO 2022 escape sequence. Must be
2590 supplied. Each combination of (DIMENSION, CHARS) defines a
2591 separate namespace for final bytes. Note that ISO
2592 2022 restricts the final byte to the range
2593 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2594 dimension == 2. Note also that final bytes in the range
2595 0x30 - 0x3F are reserved for user-defined (not official)
2597 'graphic 0 (use left half of font on output) or 1 (use right half
2598 of font on output). Defaults to 0. For example, for
2599 a font whose registry is ISO8859-1, the left half
2600 (octets 0x20 - 0x7F) is the `ascii' character set, while
2601 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2602 character set. With 'graphic set to 0, the octets
2603 will have their high bit cleared; with it set to 1,
2604 the octets will have their high bit set.
2605 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2607 'ccl-program A compiled CCL program used to convert a character in
2608 this charset into an index into the font. This is in
2609 addition to the 'graphic property. The CCL program
2610 is passed the octets of the character, with the high
2611 bit cleared and set depending upon whether the value
2612 of the 'graphic property is 0 or 1.
2614 (name, doc_string, props))
2616 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2617 int direction = CHARSET_LEFT_TO_RIGHT;
2618 Lisp_Object registry = Qnil;
2619 Lisp_Object charset;
2620 Lisp_Object ccl_program = Qnil;
2621 Lisp_Object short_name = Qnil, long_name = Qnil;
2622 int byte_offset = -1;
2624 CHECK_SYMBOL (name);
2625 if (!NILP (doc_string))
2626 CHECK_STRING (doc_string);
2628 charset = Ffind_charset (name);
2629 if (!NILP (charset))
2630 signal_simple_error ("Cannot redefine existing charset", name);
2633 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
2635 if (EQ (keyword, Qshort_name))
2637 CHECK_STRING (value);
2641 if (EQ (keyword, Qlong_name))
2643 CHECK_STRING (value);
2647 else if (EQ (keyword, Qdimension))
2650 dimension = XINT (value);
2651 if (dimension < 1 || dimension > 2)
2652 signal_simple_error ("Invalid value for 'dimension", value);
2655 else if (EQ (keyword, Qchars))
2658 chars = XINT (value);
2659 if (chars != 94 && chars != 96)
2660 signal_simple_error ("Invalid value for 'chars", value);
2663 else if (EQ (keyword, Qcolumns))
2666 columns = XINT (value);
2667 if (columns != 1 && columns != 2)
2668 signal_simple_error ("Invalid value for 'columns", value);
2671 else if (EQ (keyword, Qgraphic))
2674 graphic = XINT (value);
2676 if (graphic < 0 || graphic > 2)
2678 if (graphic < 0 || graphic > 1)
2680 signal_simple_error ("Invalid value for 'graphic", value);
2683 else if (EQ (keyword, Qregistry))
2685 CHECK_STRING (value);
2689 else if (EQ (keyword, Qdirection))
2691 if (EQ (value, Ql2r))
2692 direction = CHARSET_LEFT_TO_RIGHT;
2693 else if (EQ (value, Qr2l))
2694 direction = CHARSET_RIGHT_TO_LEFT;
2696 signal_simple_error ("Invalid value for 'direction", value);
2699 else if (EQ (keyword, Qfinal))
2701 CHECK_CHAR_COERCE_INT (value);
2702 final = XCHAR (value);
2703 if (final < '0' || final > '~')
2704 signal_simple_error ("Invalid value for 'final", value);
2707 else if (EQ (keyword, Qccl_program))
2709 struct ccl_program test_ccl;
2711 if (setup_ccl_program (&test_ccl, value) < 0)
2712 signal_simple_error ("Invalid value for 'ccl-program", value);
2713 ccl_program = value;
2717 signal_simple_error ("Unrecognized property", keyword);
2722 error ("'final must be specified");
2723 if (dimension == 2 && final > 0x5F)
2725 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2728 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2729 CHARSET_LEFT_TO_RIGHT)) ||
2730 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2731 CHARSET_RIGHT_TO_LEFT)))
2733 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2735 id = get_unallocated_leading_byte (dimension);
2737 if (NILP (doc_string))
2738 doc_string = build_string ("");
2740 if (NILP (registry))
2741 registry = build_string ("");
2743 if (NILP (short_name))
2744 XSETSTRING (short_name, XSYMBOL (name)->name);
2746 if (NILP (long_name))
2747 long_name = doc_string;
2750 columns = dimension;
2752 if (byte_offset < 0)
2756 else if (chars == 96)
2762 charset = make_charset (id, name, chars, dimension, columns, graphic,
2763 final, direction, short_name, long_name,
2764 doc_string, registry,
2765 Qnil, 0, 0, 0, byte_offset);
2766 if (!NILP (ccl_program))
2767 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2771 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2773 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2774 NEW-NAME is the name of the new charset. Return the new charset.
2776 (charset, new_name))
2778 Lisp_Object new_charset = Qnil;
2779 int id, chars, dimension, columns, graphic, final;
2781 Lisp_Object registry, doc_string, short_name, long_name;
2784 charset = Fget_charset (charset);
2785 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2786 signal_simple_error ("Charset already has reverse-direction charset",
2789 CHECK_SYMBOL (new_name);
2790 if (!NILP (Ffind_charset (new_name)))
2791 signal_simple_error ("Cannot redefine existing charset", new_name);
2793 cs = XCHARSET (charset);
2795 chars = CHARSET_CHARS (cs);
2796 dimension = CHARSET_DIMENSION (cs);
2797 columns = CHARSET_COLUMNS (cs);
2798 id = get_unallocated_leading_byte (dimension);
2800 graphic = CHARSET_GRAPHIC (cs);
2801 final = CHARSET_FINAL (cs);
2802 direction = CHARSET_RIGHT_TO_LEFT;
2803 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2804 direction = CHARSET_LEFT_TO_RIGHT;
2805 doc_string = CHARSET_DOC_STRING (cs);
2806 short_name = CHARSET_SHORT_NAME (cs);
2807 long_name = CHARSET_LONG_NAME (cs);
2808 registry = CHARSET_REGISTRY (cs);
2810 new_charset = make_charset (id, new_name, chars, dimension, columns,
2811 graphic, final, direction, short_name, long_name,
2812 doc_string, registry,
2814 CHARSET_DECODING_TABLE(cs),
2815 CHARSET_UCS_MIN(cs),
2816 CHARSET_UCS_MAX(cs),
2817 CHARSET_CODE_OFFSET(cs),
2818 CHARSET_BYTE_OFFSET(cs)
2824 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2825 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2830 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2831 Define symbol ALIAS as an alias for CHARSET.
2835 CHECK_SYMBOL (alias);
2836 charset = Fget_charset (charset);
2837 return Fputhash (alias, charset, Vcharset_hash_table);
2840 /* #### Reverse direction charsets not yet implemented. */
2842 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2844 Return the reverse-direction charset parallel to CHARSET, if any.
2845 This is the charset with the same properties (in particular, the same
2846 dimension, number of characters per dimension, and final byte) as
2847 CHARSET but whose characters are displayed in the opposite direction.
2851 charset = Fget_charset (charset);
2852 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2856 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2857 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2858 If DIRECTION is omitted, both directions will be checked (left-to-right
2859 will be returned if character sets exist for both directions).
2861 (dimension, chars, final, direction))
2863 int dm, ch, fi, di = -1;
2864 Lisp_Object obj = Qnil;
2866 CHECK_INT (dimension);
2867 dm = XINT (dimension);
2868 if (dm < 1 || dm > 2)
2869 signal_simple_error ("Invalid value for DIMENSION", dimension);
2873 if (ch != 94 && ch != 96)
2874 signal_simple_error ("Invalid value for CHARS", chars);
2876 CHECK_CHAR_COERCE_INT (final);
2878 if (fi < '0' || fi > '~')
2879 signal_simple_error ("Invalid value for FINAL", final);
2881 if (EQ (direction, Ql2r))
2882 di = CHARSET_LEFT_TO_RIGHT;
2883 else if (EQ (direction, Qr2l))
2884 di = CHARSET_RIGHT_TO_LEFT;
2885 else if (!NILP (direction))
2886 signal_simple_error ("Invalid value for DIRECTION", direction);
2888 if (dm == 2 && fi > 0x5F)
2890 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2894 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
2896 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
2899 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
2902 return XCHARSET_NAME (obj);
2906 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2907 Return short name of CHARSET.
2911 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2914 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2915 Return long name of CHARSET.
2919 return XCHARSET_LONG_NAME (Fget_charset (charset));
2922 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2923 Return description of CHARSET.
2927 return XCHARSET_DOC_STRING (Fget_charset (charset));
2930 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2931 Return dimension of CHARSET.
2935 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2938 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2939 Return property PROP of CHARSET, a charset object or symbol naming a charset.
2940 Recognized properties are those listed in `make-charset', as well as
2941 'name and 'doc-string.
2947 charset = Fget_charset (charset);
2948 cs = XCHARSET (charset);
2950 CHECK_SYMBOL (prop);
2951 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2952 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2953 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2954 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2955 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2956 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2957 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2958 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2959 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2960 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2961 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2962 if (EQ (prop, Qdirection))
2963 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2964 if (EQ (prop, Qreverse_direction_charset))
2966 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2967 /* #### Is this translation OK? If so, error checking sufficient? */
2968 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
2970 signal_simple_error ("Unrecognized charset property name", prop);
2971 return Qnil; /* not reached */
2974 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2975 Return charset identification number of CHARSET.
2979 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2982 /* #### We need to figure out which properties we really want to
2985 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2986 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2988 (charset, ccl_program))
2990 struct ccl_program test_ccl;
2992 charset = Fget_charset (charset);
2993 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
2994 signal_simple_error ("Invalid ccl-program", ccl_program);
2995 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
3000 invalidate_charset_font_caches (Lisp_Object charset)
3002 /* Invalidate font cache entries for charset on all devices. */
3003 Lisp_Object devcons, concons, hash_table;
3004 DEVICE_LOOP_NO_BREAK (devcons, concons)
3006 struct device *d = XDEVICE (XCAR (devcons));
3007 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
3008 if (!UNBOUNDP (hash_table))
3009 Fclrhash (hash_table);
3013 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
3014 Set the 'registry property of CHARSET to REGISTRY.
3016 (charset, registry))
3018 charset = Fget_charset (charset);
3019 CHECK_STRING (registry);
3020 XCHARSET_REGISTRY (charset) = registry;
3021 invalidate_charset_font_caches (charset);
3022 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
3027 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
3028 Return mapping-table of CHARSET.
3032 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
3035 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
3036 Set mapping-table of CHARSET to TABLE.
3040 struct Lisp_Charset *cs;
3044 charset = Fget_charset (charset);
3045 cs = XCHARSET (charset);
3049 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
3050 make_vector_newer (CHARSET_DECODING_TABLE(cs));
3051 CHARSET_DECODING_TABLE(cs) = Qnil;
3054 else if (VECTORP (table))
3056 int ccs_len = CHARSET_BYTE_SIZE (cs);
3057 int ret = decoding_table_check_elements (table,
3058 CHARSET_DIMENSION (cs),
3063 signal_simple_error ("Too big table", table);
3065 signal_simple_error ("Invalid element is found", table);
3067 signal_simple_error ("Something wrong", table);
3069 CHARSET_DECODING_TABLE(cs) = Qnil;
3072 signal_error (Qwrong_type_argument,
3073 list2 (build_translated_string ("vector-or-nil-p"),
3076 byte_offset = CHARSET_BYTE_OFFSET (cs);
3077 switch (CHARSET_DIMENSION (cs))
3080 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3082 Lisp_Object c = XVECTOR_DATA(table)[i];
3085 put_char_ccs_code_point (c, charset,
3086 make_int (i + byte_offset));
3090 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3092 Lisp_Object v = XVECTOR_DATA(table)[i];
3098 for (j = 0; j < XVECTOR_LENGTH (v); j++)
3100 Lisp_Object c = XVECTOR_DATA(v)[j];
3103 put_char_ccs_code_point
3105 make_int ( ( (i + byte_offset) << 8 )
3111 put_char_ccs_code_point (v, charset,
3112 make_int (i + byte_offset));
3121 /************************************************************************/
3122 /* Lisp primitives for working with characters */
3123 /************************************************************************/
3126 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
3127 Make a character from CHARSET and code-point CODE.
3133 charset = Fget_charset (charset);
3136 if (XCHARSET_GRAPHIC (charset) == 1)
3138 return make_char (DECODE_CHAR (charset, c));
3141 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
3142 Make a builtin character from CHARSET and code-point CODE.
3149 charset = Fget_charset (charset);
3153 if ((final = XCHARSET_FINAL (charset)) >= '0')
3155 if (XCHARSET_DIMENSION (charset) == 1)
3157 switch (XCHARSET_CHARS (charset))
3161 make_char (MIN_CHAR_94 + (final - '0') * 94
3162 + ((c & 0x7F) - 33));
3165 make_char (MIN_CHAR_96 + (final - '0') * 96
3166 + ((c & 0x7F) - 32));
3168 return Fdecode_char (charset, code);
3173 switch (XCHARSET_CHARS (charset))
3177 make_char (MIN_CHAR_94x94
3178 + (final - '0') * 94 * 94
3179 + (((c >> 8) & 0x7F) - 33) * 94
3180 + ((c & 0x7F) - 33));
3183 make_char (MIN_CHAR_96x96
3184 + (final - '0') * 96 * 96
3185 + (((c >> 8) & 0x7F) - 32) * 96
3186 + ((c & 0x7F) - 32));
3188 return Fdecode_char (charset, code);
3192 else if (XCHARSET_UCS_MAX (charset))
3195 = (XCHARSET_DIMENSION (charset) == 1
3197 c - XCHARSET_BYTE_OFFSET (charset)
3199 ((c >> 8) - XCHARSET_BYTE_OFFSET (charset))
3200 * XCHARSET_CHARS (charset)
3201 + (c & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
3202 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
3203 if ((cid < XCHARSET_UCS_MIN (charset))
3204 || (XCHARSET_UCS_MAX (charset) < cid))
3205 return Fdecode_char (charset, code);
3206 return make_char (cid);
3209 return Fdecode_char (charset, code);
3213 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
3214 Make a character from CHARSET and octets ARG1 and ARG2.
3215 ARG2 is required only for characters from two-dimensional charsets.
3216 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
3217 character s with caron.
3219 (charset, arg1, arg2))
3223 int lowlim, highlim;
3225 charset = Fget_charset (charset);
3226 cs = XCHARSET (charset);
3228 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
3229 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
3231 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
3233 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
3234 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
3237 /* It is useful (and safe, according to Olivier Galibert) to strip
3238 the 8th bit off ARG1 and ARG2 because it allows programmers to
3239 write (make-char 'latin-iso8859-2 CODE) where code is the actual
3240 Latin 2 code of the character. */
3248 if (a1 < lowlim || a1 > highlim)
3249 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
3251 if (CHARSET_DIMENSION (cs) == 1)
3255 ("Charset is of dimension one; second octet must be nil", arg2);
3256 return make_char (MAKE_CHAR (charset, a1, 0));
3265 a2 = XINT (arg2) & 0x7f;
3267 if (a2 < lowlim || a2 > highlim)
3268 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
3270 return make_char (MAKE_CHAR (charset, a1, a2));
3273 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
3274 Return the character set of CHARACTER.
3278 CHECK_CHAR_COERCE_INT (character);
3280 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
3283 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
3284 Return the octet numbered N (should be 0 or 1) of CHARACTER.
3285 N defaults to 0 if omitted.
3289 Lisp_Object charset;
3292 CHECK_CHAR_COERCE_INT (character);
3294 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
3296 if (NILP (n) || EQ (n, Qzero))
3297 return make_int (octet0);
3298 else if (EQ (n, make_int (1)))
3299 return make_int (octet1);
3301 signal_simple_error ("Octet number must be 0 or 1", n);
3304 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
3305 Return list of charset and one or two position-codes of CHARACTER.
3309 /* This function can GC */
3310 struct gcpro gcpro1, gcpro2;
3311 Lisp_Object charset = Qnil;
3312 Lisp_Object rc = Qnil;
3320 GCPRO2 (charset, rc);
3321 CHECK_CHAR_COERCE_INT (character);
3324 code_point = ENCODE_CHAR (XCHAR (character), charset);
3325 dimension = XCHARSET_DIMENSION (charset);
3326 while (dimension > 0)
3328 rc = Fcons (make_int (code_point & 255), rc);
3332 rc = Fcons (XCHARSET_NAME (charset), rc);
3334 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
3336 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
3338 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
3342 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
3351 #ifdef ENABLE_COMPOSITE_CHARS
3352 /************************************************************************/
3353 /* composite character functions */
3354 /************************************************************************/
3357 lookup_composite_char (Bufbyte *str, int len)
3359 Lisp_Object lispstr = make_string (str, len);
3360 Lisp_Object ch = Fgethash (lispstr,
3361 Vcomposite_char_string2char_hash_table,
3367 if (composite_char_row_next >= 128)
3368 signal_simple_error ("No more composite chars available", lispstr);
3369 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
3370 composite_char_col_next);
3371 Fputhash (make_char (emch), lispstr,
3372 Vcomposite_char_char2string_hash_table);
3373 Fputhash (lispstr, make_char (emch),
3374 Vcomposite_char_string2char_hash_table);
3375 composite_char_col_next++;
3376 if (composite_char_col_next >= 128)
3378 composite_char_col_next = 32;
3379 composite_char_row_next++;
3388 composite_char_string (Emchar ch)
3390 Lisp_Object str = Fgethash (make_char (ch),
3391 Vcomposite_char_char2string_hash_table,
3393 assert (!UNBOUNDP (str));
3397 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
3398 Convert a string into a single composite character.
3399 The character is the result of overstriking all the characters in
3404 CHECK_STRING (string);
3405 return make_char (lookup_composite_char (XSTRING_DATA (string),
3406 XSTRING_LENGTH (string)));
3409 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3410 Return a string of the characters comprising a composite character.
3418 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
3419 signal_simple_error ("Must be composite char", ch);
3420 return composite_char_string (emch);
3422 #endif /* ENABLE_COMPOSITE_CHARS */
3425 /************************************************************************/
3426 /* initialization */
3427 /************************************************************************/
3430 syms_of_mule_charset (void)
3433 INIT_LRECORD_IMPLEMENTATION (uint8_byte_table);
3434 INIT_LRECORD_IMPLEMENTATION (uint16_byte_table);
3435 INIT_LRECORD_IMPLEMENTATION (byte_table);
3436 INIT_LRECORD_IMPLEMENTATION (char_id_table);
3438 INIT_LRECORD_IMPLEMENTATION (charset);
3440 DEFSUBR (Fcharsetp);
3441 DEFSUBR (Ffind_charset);
3442 DEFSUBR (Fget_charset);
3443 DEFSUBR (Fcharset_list);
3444 DEFSUBR (Fcharset_name);
3445 DEFSUBR (Fmake_charset);
3446 DEFSUBR (Fmake_reverse_direction_charset);
3447 /* DEFSUBR (Freverse_direction_charset); */
3448 DEFSUBR (Fdefine_charset_alias);
3449 DEFSUBR (Fcharset_from_attributes);
3450 DEFSUBR (Fcharset_short_name);
3451 DEFSUBR (Fcharset_long_name);
3452 DEFSUBR (Fcharset_description);
3453 DEFSUBR (Fcharset_dimension);
3454 DEFSUBR (Fcharset_property);
3455 DEFSUBR (Fcharset_id);
3456 DEFSUBR (Fset_charset_ccl_program);
3457 DEFSUBR (Fset_charset_registry);
3459 DEFSUBR (Fchar_attribute_list);
3460 DEFSUBR (Ffind_char_attribute_table);
3461 DEFSUBR (Fchar_attribute_alist);
3462 DEFSUBR (Fget_char_attribute);
3463 DEFSUBR (Fput_char_attribute);
3464 DEFSUBR (Fremove_char_attribute);
3465 DEFSUBR (Fdefine_char);
3466 DEFSUBR (Fchar_variants);
3467 DEFSUBR (Fget_composite_char);
3468 DEFSUBR (Fcharset_mapping_table);
3469 DEFSUBR (Fset_charset_mapping_table);
3473 DEFSUBR (Fdecode_char);
3474 DEFSUBR (Fdecode_builtin_char);
3476 DEFSUBR (Fmake_char);
3477 DEFSUBR (Fchar_charset);
3478 DEFSUBR (Fchar_octet);
3479 DEFSUBR (Fsplit_char);
3481 #ifdef ENABLE_COMPOSITE_CHARS
3482 DEFSUBR (Fmake_composite_char);
3483 DEFSUBR (Fcomposite_char_string);
3486 defsymbol (&Qcharsetp, "charsetp");
3487 defsymbol (&Qregistry, "registry");
3488 defsymbol (&Qfinal, "final");
3489 defsymbol (&Qgraphic, "graphic");
3490 defsymbol (&Qdirection, "direction");
3491 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3492 defsymbol (&Qshort_name, "short-name");
3493 defsymbol (&Qlong_name, "long-name");
3495 defsymbol (&Ql2r, "l2r");
3496 defsymbol (&Qr2l, "r2l");
3498 /* Charsets, compatible with FSF 20.3
3499 Naming convention is Script-Charset[-Edition] */
3500 defsymbol (&Qascii, "ascii");
3501 defsymbol (&Qcontrol_1, "control-1");
3502 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3503 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3504 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3505 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3506 defsymbol (&Qthai_tis620, "thai-tis620");
3507 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3508 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3509 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3510 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3511 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3512 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3513 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3514 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
3515 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
3516 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
3517 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
3518 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
3519 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
3520 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
3521 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
3523 defsymbol (&Q_ucs, "->ucs");
3524 defsymbol (&Q_decomposition, "->decomposition");
3525 defsymbol (&Qcompat, "compat");
3526 defsymbol (&Qisolated, "isolated");
3527 defsymbol (&Qinitial, "initial");
3528 defsymbol (&Qmedial, "medial");
3529 defsymbol (&Qfinal, "final");
3530 defsymbol (&Qvertical, "vertical");
3531 defsymbol (&QnoBreak, "noBreak");
3532 defsymbol (&Qfraction, "fraction");
3533 defsymbol (&Qsuper, "super");
3534 defsymbol (&Qsub, "sub");
3535 defsymbol (&Qcircle, "circle");
3536 defsymbol (&Qsquare, "square");
3537 defsymbol (&Qwide, "wide");
3538 defsymbol (&Qnarrow, "narrow");
3539 defsymbol (&Qsmall, "small");
3540 defsymbol (&Qfont, "font");
3541 defsymbol (&Qucs, "ucs");
3542 defsymbol (&Qucs_bmp, "ucs-bmp");
3543 defsymbol (&Qucs_cns, "ucs-cns");
3544 defsymbol (&Qlatin_viscii, "latin-viscii");
3545 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3546 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3547 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3548 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3549 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3550 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
3551 defsymbol (&Qchinese_big5, "chinese-big5");
3552 defsymbol (&Qmojikyo, "mojikyo");
3553 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
3554 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
3555 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
3556 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
3557 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
3558 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
3559 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
3560 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
3561 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
3562 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
3563 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
3564 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
3565 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
3566 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
3567 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
3568 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
3569 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
3570 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
3571 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
3572 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
3573 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
3574 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
3575 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3577 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3578 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3580 defsymbol (&Qcomposite, "composite");
3584 vars_of_mule_charset (void)
3591 chlook = xnew (struct charset_lookup);
3592 dumpstruct (&chlook, &charset_lookup_description);
3594 /* Table of charsets indexed by leading byte. */
3595 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3596 chlook->charset_by_leading_byte[i] = Qnil;
3599 /* Table of charsets indexed by type/final-byte. */
3600 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3601 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3602 chlook->charset_by_attributes[i][j] = Qnil;
3604 /* Table of charsets indexed by type/final-byte/direction. */
3605 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3606 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3607 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3608 chlook->charset_by_attributes[i][j][k] = Qnil;
3612 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3614 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3615 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3619 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3620 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3621 Leading-code of private TYPE9N charset of column-width 1.
3623 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3627 Vutf_2000_version = build_string("0.17 (Hōryūji)");
3628 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
3629 Version number of UTF-2000.
3632 staticpro (&Vcharacter_composition_table);
3633 Vcharacter_composition_table = make_char_id_table (Qnil);
3635 staticpro (&Vcharacter_variant_table);
3636 Vcharacter_variant_table = make_char_id_table (Qnil);
3638 Vdefault_coded_charset_priority_list = Qnil;
3639 DEFVAR_LISP ("default-coded-charset-priority-list",
3640 &Vdefault_coded_charset_priority_list /*
3641 Default order of preferred coded-character-sets.
3647 complex_vars_of_mule_charset (void)
3649 staticpro (&Vcharset_hash_table);
3650 Vcharset_hash_table =
3651 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3653 /* Predefined character sets. We store them into variables for
3657 staticpro (&Vchar_attribute_hash_table);
3658 Vchar_attribute_hash_table
3659 = make_lisp_hash_table (16, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3661 staticpro (&Vcharset_ucs);
3663 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
3664 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3665 build_string ("UCS"),
3666 build_string ("UCS"),
3667 build_string ("ISO/IEC 10646"),
3669 Qnil, 0, 0xFFFFFFF, 0, 0);
3670 staticpro (&Vcharset_ucs_bmp);
3672 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3673 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3674 build_string ("BMP"),
3675 build_string ("BMP"),
3676 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3677 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
3678 Qnil, 0, 0xFFFF, 0, 0);
3679 staticpro (&Vcharset_ucs_cns);
3681 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 4,
3682 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3683 build_string ("UCS for CNS"),
3684 build_string ("UCS for CNS 11643"),
3685 build_string ("ISO/IEC 10646 for CNS 11643"),
3687 Qnil, 0, 0xFFFFFFF, 0, 0);
3689 # define MIN_CHAR_THAI 0
3690 # define MAX_CHAR_THAI 0
3691 # define MIN_CHAR_HEBREW 0
3692 # define MAX_CHAR_HEBREW 0
3693 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3694 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3696 staticpro (&Vcharset_ascii);
3698 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3699 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3700 build_string ("ASCII"),
3701 build_string ("ASCII)"),
3702 build_string ("ASCII (ISO646 IRV)"),
3703 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3704 Qnil, 0, 0x7F, 0, 0);
3705 staticpro (&Vcharset_control_1);
3706 Vcharset_control_1 =
3707 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3708 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3709 build_string ("C1"),
3710 build_string ("Control characters"),
3711 build_string ("Control characters 128-191"),
3713 Qnil, 0x80, 0x9F, 0, 0);
3714 staticpro (&Vcharset_latin_iso8859_1);
3715 Vcharset_latin_iso8859_1 =
3716 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3717 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3718 build_string ("Latin-1"),
3719 build_string ("ISO8859-1 (Latin-1)"),
3720 build_string ("ISO8859-1 (Latin-1)"),
3721 build_string ("iso8859-1"),
3722 Qnil, 0xA0, 0xFF, 0, 32);
3723 staticpro (&Vcharset_latin_iso8859_2);
3724 Vcharset_latin_iso8859_2 =
3725 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3726 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3727 build_string ("Latin-2"),
3728 build_string ("ISO8859-2 (Latin-2)"),
3729 build_string ("ISO8859-2 (Latin-2)"),
3730 build_string ("iso8859-2"),
3732 staticpro (&Vcharset_latin_iso8859_3);
3733 Vcharset_latin_iso8859_3 =
3734 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3735 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3736 build_string ("Latin-3"),
3737 build_string ("ISO8859-3 (Latin-3)"),
3738 build_string ("ISO8859-3 (Latin-3)"),
3739 build_string ("iso8859-3"),
3741 staticpro (&Vcharset_latin_iso8859_4);
3742 Vcharset_latin_iso8859_4 =
3743 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3744 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3745 build_string ("Latin-4"),
3746 build_string ("ISO8859-4 (Latin-4)"),
3747 build_string ("ISO8859-4 (Latin-4)"),
3748 build_string ("iso8859-4"),
3750 staticpro (&Vcharset_thai_tis620);
3751 Vcharset_thai_tis620 =
3752 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3753 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3754 build_string ("TIS620"),
3755 build_string ("TIS620 (Thai)"),
3756 build_string ("TIS620.2529 (Thai)"),
3757 build_string ("tis620"),
3758 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3759 staticpro (&Vcharset_greek_iso8859_7);
3760 Vcharset_greek_iso8859_7 =
3761 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3762 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3763 build_string ("ISO8859-7"),
3764 build_string ("ISO8859-7 (Greek)"),
3765 build_string ("ISO8859-7 (Greek)"),
3766 build_string ("iso8859-7"),
3768 0 /* MIN_CHAR_GREEK */,
3769 0 /* MAX_CHAR_GREEK */, 0, 32);
3770 staticpro (&Vcharset_arabic_iso8859_6);
3771 Vcharset_arabic_iso8859_6 =
3772 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3773 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3774 build_string ("ISO8859-6"),
3775 build_string ("ISO8859-6 (Arabic)"),
3776 build_string ("ISO8859-6 (Arabic)"),
3777 build_string ("iso8859-6"),
3779 staticpro (&Vcharset_hebrew_iso8859_8);
3780 Vcharset_hebrew_iso8859_8 =
3781 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3782 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3783 build_string ("ISO8859-8"),
3784 build_string ("ISO8859-8 (Hebrew)"),
3785 build_string ("ISO8859-8 (Hebrew)"),
3786 build_string ("iso8859-8"),
3787 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3788 staticpro (&Vcharset_katakana_jisx0201);
3789 Vcharset_katakana_jisx0201 =
3790 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3791 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3792 build_string ("JISX0201 Kana"),
3793 build_string ("JISX0201.1976 (Japanese Kana)"),
3794 build_string ("JISX0201.1976 Japanese Kana"),
3795 build_string ("jisx0201\\.1976"),
3797 staticpro (&Vcharset_latin_jisx0201);
3798 Vcharset_latin_jisx0201 =
3799 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3800 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3801 build_string ("JISX0201 Roman"),
3802 build_string ("JISX0201.1976 (Japanese Roman)"),
3803 build_string ("JISX0201.1976 Japanese Roman"),
3804 build_string ("jisx0201\\.1976"),
3806 staticpro (&Vcharset_cyrillic_iso8859_5);
3807 Vcharset_cyrillic_iso8859_5 =
3808 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3809 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3810 build_string ("ISO8859-5"),
3811 build_string ("ISO8859-5 (Cyrillic)"),
3812 build_string ("ISO8859-5 (Cyrillic)"),
3813 build_string ("iso8859-5"),
3815 0 /* MIN_CHAR_CYRILLIC */,
3816 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3817 staticpro (&Vcharset_latin_iso8859_9);
3818 Vcharset_latin_iso8859_9 =
3819 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3820 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3821 build_string ("Latin-5"),
3822 build_string ("ISO8859-9 (Latin-5)"),
3823 build_string ("ISO8859-9 (Latin-5)"),
3824 build_string ("iso8859-9"),
3826 staticpro (&Vcharset_japanese_jisx0208_1978);
3827 Vcharset_japanese_jisx0208_1978 =
3828 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3829 Qjapanese_jisx0208_1978, 94, 2,
3830 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3831 build_string ("JIS X0208:1978"),
3832 build_string ("JIS X0208:1978 (Japanese)"),
3834 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3835 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3837 staticpro (&Vcharset_chinese_gb2312);
3838 Vcharset_chinese_gb2312 =
3839 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3840 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3841 build_string ("GB2312"),
3842 build_string ("GB2312)"),
3843 build_string ("GB2312 Chinese simplified"),
3844 build_string ("gb2312"),
3846 staticpro (&Vcharset_japanese_jisx0208);
3847 Vcharset_japanese_jisx0208 =
3848 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3849 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3850 build_string ("JISX0208"),
3851 build_string ("JIS X0208:1983 (Japanese)"),
3852 build_string ("JIS X0208:1983 Japanese Kanji"),
3853 build_string ("jisx0208\\.1983"),
3856 staticpro (&Vcharset_japanese_jisx0208_1990);
3857 Vcharset_japanese_jisx0208_1990 =
3858 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3859 Qjapanese_jisx0208_1990, 94, 2,
3860 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3861 build_string ("JISX0208-1990"),
3862 build_string ("JIS X0208:1990 (Japanese)"),
3863 build_string ("JIS X0208:1990 Japanese Kanji"),
3864 build_string ("jisx0208\\.1990"),
3866 MIN_CHAR_JIS_X0208_1990,
3867 MAX_CHAR_JIS_X0208_1990, 0, 33);
3869 staticpro (&Vcharset_korean_ksc5601);
3870 Vcharset_korean_ksc5601 =
3871 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3872 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3873 build_string ("KSC5601"),
3874 build_string ("KSC5601 (Korean"),
3875 build_string ("KSC5601 Korean Hangul and Hanja"),
3876 build_string ("ksc5601"),
3878 staticpro (&Vcharset_japanese_jisx0212);
3879 Vcharset_japanese_jisx0212 =
3880 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3881 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3882 build_string ("JISX0212"),
3883 build_string ("JISX0212 (Japanese)"),
3884 build_string ("JISX0212 Japanese Supplement"),
3885 build_string ("jisx0212"),
3888 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3889 staticpro (&Vcharset_chinese_cns11643_1);
3890 Vcharset_chinese_cns11643_1 =
3891 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3892 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3893 build_string ("CNS11643-1"),
3894 build_string ("CNS11643-1 (Chinese traditional)"),
3896 ("CNS 11643 Plane 1 Chinese traditional"),
3897 build_string (CHINESE_CNS_PLANE_RE("1")),
3899 staticpro (&Vcharset_chinese_cns11643_2);
3900 Vcharset_chinese_cns11643_2 =
3901 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3902 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3903 build_string ("CNS11643-2"),
3904 build_string ("CNS11643-2 (Chinese traditional)"),
3906 ("CNS 11643 Plane 2 Chinese traditional"),
3907 build_string (CHINESE_CNS_PLANE_RE("2")),
3910 staticpro (&Vcharset_latin_tcvn5712);
3911 Vcharset_latin_tcvn5712 =
3912 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3913 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3914 build_string ("TCVN 5712"),
3915 build_string ("TCVN 5712 (VSCII-2)"),
3916 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3917 build_string ("tcvn5712-1"),
3919 staticpro (&Vcharset_latin_viscii_lower);
3920 Vcharset_latin_viscii_lower =
3921 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3922 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3923 build_string ("VISCII lower"),
3924 build_string ("VISCII lower (Vietnamese)"),
3925 build_string ("VISCII lower (Vietnamese)"),
3926 build_string ("MULEVISCII-LOWER"),
3928 staticpro (&Vcharset_latin_viscii_upper);
3929 Vcharset_latin_viscii_upper =
3930 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3931 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3932 build_string ("VISCII upper"),
3933 build_string ("VISCII upper (Vietnamese)"),
3934 build_string ("VISCII upper (Vietnamese)"),
3935 build_string ("MULEVISCII-UPPER"),
3937 staticpro (&Vcharset_latin_viscii);
3938 Vcharset_latin_viscii =
3939 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3940 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3941 build_string ("VISCII"),
3942 build_string ("VISCII 1.1 (Vietnamese)"),
3943 build_string ("VISCII 1.1 (Vietnamese)"),
3944 build_string ("VISCII1\\.1"),
3946 staticpro (&Vcharset_chinese_big5);
3947 Vcharset_chinese_big5 =
3948 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3949 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3950 build_string ("Big5"),
3951 build_string ("Big5"),
3952 build_string ("Big5 Chinese traditional"),
3953 build_string ("big5"),
3955 staticpro (&Vcharset_ideograph_daikanwa);
3956 Vcharset_ideograph_daikanwa =
3957 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
3958 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3959 build_string ("Daikanwa"),
3960 build_string ("Morohashi's Daikanwa"),
3961 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3962 build_string ("Daikanwa"),
3963 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3964 staticpro (&Vcharset_mojikyo);
3966 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
3967 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3968 build_string ("Mojikyo"),
3969 build_string ("Mojikyo"),
3970 build_string ("Konjaku-Mojikyo"),
3972 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
3973 staticpro (&Vcharset_mojikyo_2022_1);
3974 Vcharset_mojikyo_2022_1 =
3975 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
3976 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
3977 build_string ("Mojikyo-2022-1"),
3978 build_string ("Mojikyo ISO-2022 Part 1"),
3979 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
3983 #define DEF_MOJIKYO_PJ(n) \
3984 staticpro (&Vcharset_mojikyo_pj_##n); \
3985 Vcharset_mojikyo_pj_##n = \
3986 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
3987 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3988 build_string ("Mojikyo-PJ-"#n), \
3989 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
3991 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
3993 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
4005 DEF_MOJIKYO_PJ (10);
4006 DEF_MOJIKYO_PJ (11);
4007 DEF_MOJIKYO_PJ (12);
4008 DEF_MOJIKYO_PJ (13);
4009 DEF_MOJIKYO_PJ (14);
4010 DEF_MOJIKYO_PJ (15);
4011 DEF_MOJIKYO_PJ (16);
4012 DEF_MOJIKYO_PJ (17);
4013 DEF_MOJIKYO_PJ (18);
4014 DEF_MOJIKYO_PJ (19);
4015 DEF_MOJIKYO_PJ (20);
4016 DEF_MOJIKYO_PJ (21);
4018 staticpro (&Vcharset_ethiopic_ucs);
4019 Vcharset_ethiopic_ucs =
4020 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
4021 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4022 build_string ("Ethiopic (UCS)"),
4023 build_string ("Ethiopic (UCS)"),
4024 build_string ("Ethiopic of UCS"),
4025 build_string ("Ethiopic-Unicode"),
4026 Qnil, 0x1200, 0x137F, 0x1200, 0);
4028 staticpro (&Vcharset_chinese_big5_1);
4029 Vcharset_chinese_big5_1 =
4030 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
4031 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
4032 build_string ("Big5"),
4033 build_string ("Big5 (Level-1)"),
4035 ("Big5 Level-1 Chinese traditional"),
4036 build_string ("big5"),
4038 staticpro (&Vcharset_chinese_big5_2);
4039 Vcharset_chinese_big5_2 =
4040 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
4041 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
4042 build_string ("Big5"),
4043 build_string ("Big5 (Level-2)"),
4045 ("Big5 Level-2 Chinese traditional"),
4046 build_string ("big5"),
4049 #ifdef ENABLE_COMPOSITE_CHARS
4050 /* #### For simplicity, we put composite chars into a 96x96 charset.
4051 This is going to lead to problems because you can run out of
4052 room, esp. as we don't yet recycle numbers. */
4053 staticpro (&Vcharset_composite);
4054 Vcharset_composite =
4055 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
4056 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4057 build_string ("Composite"),
4058 build_string ("Composite characters"),
4059 build_string ("Composite characters"),
4062 /* #### not dumped properly */
4063 composite_char_row_next = 32;
4064 composite_char_col_next = 32;
4066 Vcomposite_char_string2char_hash_table =
4067 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
4068 Vcomposite_char_char2string_hash_table =
4069 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
4070 staticpro (&Vcomposite_char_string2char_hash_table);
4071 staticpro (&Vcomposite_char_char2string_hash_table);
4072 #endif /* ENABLE_COMPOSITE_CHARS */