1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_cns;
70 Lisp_Object Vcharset_ucs_big5;
71 Lisp_Object Vcharset_latin_viscii;
72 Lisp_Object Vcharset_latin_tcvn5712;
73 Lisp_Object Vcharset_latin_viscii_lower;
74 Lisp_Object Vcharset_latin_viscii_upper;
75 Lisp_Object Vcharset_chinese_big5;
76 Lisp_Object Vcharset_chinese_big5_cdp;
77 Lisp_Object Vcharset_ideograph_gt;
78 Lisp_Object Vcharset_ideograph_gt_pj_1;
79 Lisp_Object Vcharset_ideograph_gt_pj_2;
80 Lisp_Object Vcharset_ideograph_gt_pj_3;
81 Lisp_Object Vcharset_ideograph_gt_pj_4;
82 Lisp_Object Vcharset_ideograph_gt_pj_5;
83 Lisp_Object Vcharset_ideograph_gt_pj_6;
84 Lisp_Object Vcharset_ideograph_gt_pj_7;
85 Lisp_Object Vcharset_ideograph_gt_pj_8;
86 Lisp_Object Vcharset_ideograph_gt_pj_9;
87 Lisp_Object Vcharset_ideograph_gt_pj_10;
88 Lisp_Object Vcharset_ideograph_gt_pj_11;
89 Lisp_Object Vcharset_ideograph_daikanwa;
90 Lisp_Object Vcharset_mojikyo;
91 Lisp_Object Vcharset_mojikyo_2022_1;
92 Lisp_Object Vcharset_mojikyo_pj_1;
93 Lisp_Object Vcharset_mojikyo_pj_2;
94 Lisp_Object Vcharset_mojikyo_pj_3;
95 Lisp_Object Vcharset_mojikyo_pj_4;
96 Lisp_Object Vcharset_mojikyo_pj_5;
97 Lisp_Object Vcharset_mojikyo_pj_6;
98 Lisp_Object Vcharset_mojikyo_pj_7;
99 Lisp_Object Vcharset_mojikyo_pj_8;
100 Lisp_Object Vcharset_mojikyo_pj_9;
101 Lisp_Object Vcharset_mojikyo_pj_10;
102 Lisp_Object Vcharset_mojikyo_pj_11;
103 Lisp_Object Vcharset_mojikyo_pj_12;
104 Lisp_Object Vcharset_mojikyo_pj_13;
105 Lisp_Object Vcharset_mojikyo_pj_14;
106 Lisp_Object Vcharset_mojikyo_pj_15;
107 Lisp_Object Vcharset_mojikyo_pj_16;
108 Lisp_Object Vcharset_mojikyo_pj_17;
109 Lisp_Object Vcharset_mojikyo_pj_18;
110 Lisp_Object Vcharset_mojikyo_pj_19;
111 Lisp_Object Vcharset_mojikyo_pj_20;
112 Lisp_Object Vcharset_mojikyo_pj_21;
113 Lisp_Object Vcharset_ethiopic_ucs;
115 Lisp_Object Vcharset_chinese_big5_1;
116 Lisp_Object Vcharset_chinese_big5_2;
118 #ifdef ENABLE_COMPOSITE_CHARS
119 Lisp_Object Vcharset_composite;
121 /* Hash tables for composite chars. One maps string representing
122 composed chars to their equivalent chars; one goes the
124 Lisp_Object Vcomposite_char_char2string_hash_table;
125 Lisp_Object Vcomposite_char_string2char_hash_table;
127 static int composite_char_row_next;
128 static int composite_char_col_next;
130 #endif /* ENABLE_COMPOSITE_CHARS */
132 struct charset_lookup *chlook;
134 static const struct lrecord_description charset_lookup_description_1[] = {
135 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
144 static const struct struct_description charset_lookup_description = {
145 sizeof (struct charset_lookup),
146 charset_lookup_description_1
150 /* Table of number of bytes in the string representation of a character
151 indexed by the first byte of that representation.
153 rep_bytes_by_first_byte(c) is more efficient than the equivalent
154 canonical computation:
156 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
158 const Bytecount rep_bytes_by_first_byte[0xA0] =
159 { /* 0x00 - 0x7f are for straight ASCII */
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
165 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
166 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
167 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
168 /* 0x80 - 0x8f are for Dimension-1 official charsets */
170 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
172 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
174 /* 0x90 - 0x9d are for Dimension-2 official charsets */
175 /* 0x9e is for Dimension-1 private charsets */
176 /* 0x9f is for Dimension-2 private charsets */
177 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
183 #define BT_UINT8_MIN 0
184 #define BT_UINT8_MAX (UCHAR_MAX - 3)
185 #define BT_UINT8_t (UCHAR_MAX - 2)
186 #define BT_UINT8_nil (UCHAR_MAX - 1)
187 #define BT_UINT8_unbound UCHAR_MAX
189 INLINE_HEADER int INT_UINT8_P (Lisp_Object obj);
190 INLINE_HEADER int UINT8_VALUE_P (Lisp_Object obj);
191 INLINE_HEADER unsigned char UINT8_ENCODE (Lisp_Object obj);
192 INLINE_HEADER Lisp_Object UINT8_DECODE (unsigned char n);
193 INLINE_HEADER unsigned short UINT8_TO_UINT16 (unsigned char n);
196 INT_UINT8_P (Lisp_Object obj)
200 int num = XINT (obj);
202 return (BT_UINT8_MIN <= num) && (num <= BT_UINT8_MAX);
209 UINT8_VALUE_P (Lisp_Object obj)
211 return EQ (obj, Qunbound)
212 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT8_P (obj);
215 INLINE_HEADER unsigned char
216 UINT8_ENCODE (Lisp_Object obj)
218 if (EQ (obj, Qunbound))
219 return BT_UINT8_unbound;
220 else if (EQ (obj, Qnil))
222 else if (EQ (obj, Qt))
228 INLINE_HEADER Lisp_Object
229 UINT8_DECODE (unsigned char n)
231 if (n == BT_UINT8_unbound)
233 else if (n == BT_UINT8_nil)
235 else if (n == BT_UINT8_t)
242 mark_uint8_byte_table (Lisp_Object obj)
248 print_uint8_byte_table (Lisp_Object obj,
249 Lisp_Object printcharfun, int escapeflag)
251 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
253 struct gcpro gcpro1, gcpro2;
254 GCPRO2 (obj, printcharfun);
256 write_c_string ("\n#<uint8-byte-table", printcharfun);
257 for (i = 0; i < 256; i++)
259 unsigned char n = bte->property[i];
261 write_c_string ("\n ", printcharfun);
262 write_c_string (" ", printcharfun);
263 if (n == BT_UINT8_unbound)
264 write_c_string ("void", printcharfun);
265 else if (n == BT_UINT8_nil)
266 write_c_string ("nil", printcharfun);
267 else if (n == BT_UINT8_t)
268 write_c_string ("t", printcharfun);
273 sprintf (buf, "%hd", n);
274 write_c_string (buf, printcharfun);
278 write_c_string (">", printcharfun);
282 uint8_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
284 Lisp_Uint8_Byte_Table *te1 = XUINT8_BYTE_TABLE (obj1);
285 Lisp_Uint8_Byte_Table *te2 = XUINT8_BYTE_TABLE (obj2);
288 for (i = 0; i < 256; i++)
289 if (te1->property[i] != te2->property[i])
295 uint8_byte_table_hash (Lisp_Object obj, int depth)
297 Lisp_Uint8_Byte_Table *te = XUINT8_BYTE_TABLE (obj);
301 for (i = 0; i < 256; i++)
302 hash = HASH2 (hash, te->property[i]);
306 DEFINE_LRECORD_IMPLEMENTATION ("uint8-byte-table", uint8_byte_table,
307 mark_uint8_byte_table,
308 print_uint8_byte_table,
309 0, uint8_byte_table_equal,
310 uint8_byte_table_hash,
311 0 /* uint8_byte_table_description */,
312 Lisp_Uint8_Byte_Table);
315 make_uint8_byte_table (unsigned char initval)
319 Lisp_Uint8_Byte_Table *cte;
321 cte = alloc_lcrecord_type (Lisp_Uint8_Byte_Table,
322 &lrecord_uint8_byte_table);
324 for (i = 0; i < 256; i++)
325 cte->property[i] = initval;
327 XSETUINT8_BYTE_TABLE (obj, cte);
332 uint8_byte_table_same_value_p (Lisp_Object obj)
334 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
335 unsigned char v0 = bte->property[0];
338 for (i = 1; i < 256; i++)
340 if (bte->property[i] != v0)
347 #define BT_UINT16_MIN 0
348 #define BT_UINT16_MAX (USHRT_MAX - 3)
349 #define BT_UINT16_t (USHRT_MAX - 2)
350 #define BT_UINT16_nil (USHRT_MAX - 1)
351 #define BT_UINT16_unbound USHRT_MAX
353 INLINE_HEADER int INT_UINT16_P (Lisp_Object obj);
354 INLINE_HEADER int UINT16_VALUE_P (Lisp_Object obj);
355 INLINE_HEADER unsigned short UINT16_ENCODE (Lisp_Object obj);
356 INLINE_HEADER Lisp_Object UINT16_DECODE (unsigned short us);
359 INT_UINT16_P (Lisp_Object obj)
363 int num = XINT (obj);
365 return (BT_UINT16_MIN <= num) && (num <= BT_UINT16_MAX);
372 UINT16_VALUE_P (Lisp_Object obj)
374 return EQ (obj, Qunbound)
375 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT16_P (obj);
378 INLINE_HEADER unsigned short
379 UINT16_ENCODE (Lisp_Object obj)
381 if (EQ (obj, Qunbound))
382 return BT_UINT16_unbound;
383 else if (EQ (obj, Qnil))
384 return BT_UINT16_nil;
385 else if (EQ (obj, Qt))
391 INLINE_HEADER Lisp_Object
392 UINT16_DECODE (unsigned short n)
394 if (n == BT_UINT16_unbound)
396 else if (n == BT_UINT16_nil)
398 else if (n == BT_UINT16_t)
404 INLINE_HEADER unsigned short
405 UINT8_TO_UINT16 (unsigned char n)
407 if (n == BT_UINT8_unbound)
408 return BT_UINT16_unbound;
409 else if (n == BT_UINT8_nil)
410 return BT_UINT16_nil;
411 else if (n == BT_UINT8_t)
418 mark_uint16_byte_table (Lisp_Object obj)
424 print_uint16_byte_table (Lisp_Object obj,
425 Lisp_Object printcharfun, int escapeflag)
427 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
429 struct gcpro gcpro1, gcpro2;
430 GCPRO2 (obj, printcharfun);
432 write_c_string ("\n#<uint16-byte-table", printcharfun);
433 for (i = 0; i < 256; i++)
435 unsigned short n = bte->property[i];
437 write_c_string ("\n ", printcharfun);
438 write_c_string (" ", printcharfun);
439 if (n == BT_UINT16_unbound)
440 write_c_string ("void", printcharfun);
441 else if (n == BT_UINT16_nil)
442 write_c_string ("nil", printcharfun);
443 else if (n == BT_UINT16_t)
444 write_c_string ("t", printcharfun);
449 sprintf (buf, "%hd", n);
450 write_c_string (buf, printcharfun);
454 write_c_string (">", printcharfun);
458 uint16_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
460 Lisp_Uint16_Byte_Table *te1 = XUINT16_BYTE_TABLE (obj1);
461 Lisp_Uint16_Byte_Table *te2 = XUINT16_BYTE_TABLE (obj2);
464 for (i = 0; i < 256; i++)
465 if (te1->property[i] != te2->property[i])
471 uint16_byte_table_hash (Lisp_Object obj, int depth)
473 Lisp_Uint16_Byte_Table *te = XUINT16_BYTE_TABLE (obj);
477 for (i = 0; i < 256; i++)
478 hash = HASH2 (hash, te->property[i]);
482 DEFINE_LRECORD_IMPLEMENTATION ("uint16-byte-table", uint16_byte_table,
483 mark_uint16_byte_table,
484 print_uint16_byte_table,
485 0, uint16_byte_table_equal,
486 uint16_byte_table_hash,
487 0 /* uint16_byte_table_description */,
488 Lisp_Uint16_Byte_Table);
491 make_uint16_byte_table (unsigned short initval)
495 Lisp_Uint16_Byte_Table *cte;
497 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
498 &lrecord_uint16_byte_table);
500 for (i = 0; i < 256; i++)
501 cte->property[i] = initval;
503 XSETUINT16_BYTE_TABLE (obj, cte);
508 expand_uint8_byte_table_to_uint16 (Lisp_Object table)
512 Lisp_Uint8_Byte_Table* bte = XUINT8_BYTE_TABLE(table);
513 Lisp_Uint16_Byte_Table* cte;
515 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
516 &lrecord_uint16_byte_table);
517 for (i = 0; i < 256; i++)
519 cte->property[i] = UINT8_TO_UINT16 (bte->property[i]);
521 XSETUINT16_BYTE_TABLE (obj, cte);
526 uint16_byte_table_same_value_p (Lisp_Object obj)
528 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
529 unsigned short v0 = bte->property[0];
532 for (i = 1; i < 256; i++)
534 if (bte->property[i] != v0)
542 mark_byte_table (Lisp_Object obj)
544 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
547 for (i = 0; i < 256; i++)
549 mark_object (cte->property[i]);
555 print_byte_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
557 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
559 struct gcpro gcpro1, gcpro2;
560 GCPRO2 (obj, printcharfun);
562 write_c_string ("\n#<byte-table", printcharfun);
563 for (i = 0; i < 256; i++)
565 Lisp_Object elt = bte->property[i];
567 write_c_string ("\n ", printcharfun);
568 write_c_string (" ", printcharfun);
569 if (EQ (elt, Qunbound))
570 write_c_string ("void", printcharfun);
572 print_internal (elt, printcharfun, escapeflag);
575 write_c_string (">", printcharfun);
579 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
581 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
582 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
585 for (i = 0; i < 256; i++)
586 if (BYTE_TABLE_P (cte1->property[i]))
588 if (BYTE_TABLE_P (cte2->property[i]))
590 if (!byte_table_equal (cte1->property[i],
591 cte2->property[i], depth + 1))
598 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
604 byte_table_hash (Lisp_Object obj, int depth)
606 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
608 return internal_array_hash (cte->property, 256, depth);
611 static const struct lrecord_description byte_table_description[] = {
612 { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Byte_Table, property), 256 },
616 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
621 byte_table_description,
625 make_byte_table (Lisp_Object initval)
629 Lisp_Byte_Table *cte;
631 cte = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
633 for (i = 0; i < 256; i++)
634 cte->property[i] = initval;
636 XSETBYTE_TABLE (obj, cte);
641 byte_table_same_value_p (Lisp_Object obj)
643 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
644 Lisp_Object v0 = bte->property[0];
647 for (i = 1; i < 256; i++)
649 if (!internal_equal (bte->property[i], v0, 0))
656 Lisp_Object get_byte_table (Lisp_Object table, unsigned char idx);
657 Lisp_Object put_byte_table (Lisp_Object table, unsigned char idx,
661 get_byte_table (Lisp_Object table, unsigned char idx)
663 if (UINT8_BYTE_TABLE_P (table))
664 return UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[idx]);
665 else if (UINT16_BYTE_TABLE_P (table))
666 return UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[idx]);
667 else if (BYTE_TABLE_P (table))
668 return XBYTE_TABLE(table)->property[idx];
674 put_byte_table (Lisp_Object table, unsigned char idx, Lisp_Object value)
676 if (UINT8_BYTE_TABLE_P (table))
678 if (UINT8_VALUE_P (value))
680 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
681 if (!UINT8_BYTE_TABLE_P (value) &&
682 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
683 && uint8_byte_table_same_value_p (table))
688 else if (UINT16_VALUE_P (value))
690 Lisp_Object new = expand_uint8_byte_table_to_uint16 (table);
692 XUINT16_BYTE_TABLE(new)->property[idx] = UINT16_ENCODE (value);
697 Lisp_Object new = make_byte_table (Qnil);
700 for (i = 0; i < 256; i++)
702 XBYTE_TABLE(new)->property[i]
703 = UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[i]);
705 XBYTE_TABLE(new)->property[idx] = value;
709 else if (UINT16_BYTE_TABLE_P (table))
711 if (UINT16_VALUE_P (value))
713 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
714 if (!UINT8_BYTE_TABLE_P (value) &&
715 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
716 && uint16_byte_table_same_value_p (table))
723 Lisp_Object new = make_byte_table (Qnil);
726 for (i = 0; i < 256; i++)
728 XBYTE_TABLE(new)->property[i]
729 = UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[i]);
731 XBYTE_TABLE(new)->property[idx] = value;
735 else if (BYTE_TABLE_P (table))
737 XBYTE_TABLE(table)->property[idx] = value;
738 if (!UINT8_BYTE_TABLE_P (value) &&
739 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
740 && byte_table_same_value_p (table))
745 else if (!internal_equal (table, value, 0))
747 if (UINT8_VALUE_P (table) && UINT8_VALUE_P (value))
749 table = make_uint8_byte_table (UINT8_ENCODE (table));
750 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
752 else if (UINT16_VALUE_P (table) && UINT16_VALUE_P (value))
754 table = make_uint16_byte_table (UINT16_ENCODE (table));
755 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
759 table = make_byte_table (table);
760 XBYTE_TABLE(table)->property[idx] = value;
767 mark_char_id_table (Lisp_Object obj)
769 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
775 print_char_id_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
777 Lisp_Object table = XCHAR_ID_TABLE (obj)->table;
779 struct gcpro gcpro1, gcpro2;
780 GCPRO2 (obj, printcharfun);
782 write_c_string ("#<char-id-table ", printcharfun);
783 for (i = 0; i < 256; i++)
785 Lisp_Object elt = get_byte_table (table, i);
786 if (i != 0) write_c_string ("\n ", printcharfun);
787 if (EQ (elt, Qunbound))
788 write_c_string ("void", printcharfun);
790 print_internal (elt, printcharfun, escapeflag);
793 write_c_string (">", printcharfun);
797 char_id_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
799 Lisp_Object table1 = XCHAR_ID_TABLE (obj1)->table;
800 Lisp_Object table2 = XCHAR_ID_TABLE (obj2)->table;
803 for (i = 0; i < 256; i++)
805 if (!internal_equal (get_byte_table (table1, i),
806 get_byte_table (table2, i), 0))
813 char_id_table_hash (Lisp_Object obj, int depth)
815 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
817 return char_id_table_hash (cte->table, depth + 1);
820 static const struct lrecord_description char_id_table_description[] = {
821 { XD_LISP_OBJECT, offsetof(Lisp_Char_ID_Table, table) },
825 DEFINE_LRECORD_IMPLEMENTATION ("char-id-table", char_id_table,
828 0, char_id_table_equal,
830 char_id_table_description,
834 make_char_id_table (Lisp_Object initval)
837 Lisp_Char_ID_Table *cte;
839 cte = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
841 cte->table = make_byte_table (initval);
843 XSETCHAR_ID_TABLE (obj, cte);
849 get_char_id_table (Emchar ch, Lisp_Object table)
851 unsigned int code = ch;
858 (XCHAR_ID_TABLE (table)->table,
859 (unsigned char)(code >> 24)),
860 (unsigned char) (code >> 16)),
861 (unsigned char) (code >> 8)),
862 (unsigned char) code);
865 void put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table);
867 put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table)
869 unsigned int code = ch;
870 Lisp_Object table1, table2, table3, table4;
872 table1 = XCHAR_ID_TABLE (table)->table;
873 table2 = get_byte_table (table1, (unsigned char)(code >> 24));
874 table3 = get_byte_table (table2, (unsigned char)(code >> 16));
875 table4 = get_byte_table (table3, (unsigned char)(code >> 8));
877 table4 = put_byte_table (table4, (unsigned char)code, value);
878 table3 = put_byte_table (table3, (unsigned char)(code >> 8), table4);
879 table2 = put_byte_table (table2, (unsigned char)(code >> 16), table3);
880 XCHAR_ID_TABLE (table)->table
881 = put_byte_table (table1, (unsigned char)(code >> 24), table2);
885 Lisp_Object Vchar_attribute_hash_table;
886 Lisp_Object Vcharacter_composition_table;
887 Lisp_Object Vcharacter_variant_table;
889 Lisp_Object Qideograph_daikanwa;
890 Lisp_Object Q_decomposition;
895 Lisp_Object Qisolated;
896 Lisp_Object Qinitial;
899 Lisp_Object Qvertical;
900 Lisp_Object QnoBreak;
901 Lisp_Object Qfraction;
911 Emchar to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
913 Lisp_Object put_char_ccs_code_point (Lisp_Object character,
914 Lisp_Object ccs, Lisp_Object value);
915 Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
918 to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
924 else if (EQ (v, Qcompat))
926 else if (EQ (v, Qisolated))
928 else if (EQ (v, Qinitial))
930 else if (EQ (v, Qmedial))
932 else if (EQ (v, Qfinal))
934 else if (EQ (v, Qvertical))
936 else if (EQ (v, QnoBreak))
938 else if (EQ (v, Qfraction))
940 else if (EQ (v, Qsuper))
942 else if (EQ (v, Qsub))
944 else if (EQ (v, Qcircle))
946 else if (EQ (v, Qsquare))
948 else if (EQ (v, Qwide))
950 else if (EQ (v, Qnarrow))
952 else if (EQ (v, Qsmall))
954 else if (EQ (v, Qfont))
957 signal_simple_error (err_msg, err_arg);
960 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
961 Return character corresponding with list.
965 Lisp_Object table = Vcharacter_composition_table;
966 Lisp_Object rest = list;
970 Lisp_Object v = Fcar (rest);
972 Emchar c = to_char_id (v, "Invalid value for composition", list);
974 ret = get_char_id_table (c, table);
979 if (!CHAR_ID_TABLE_P (ret))
984 else if (!CONSP (rest))
986 else if (CHAR_ID_TABLE_P (ret))
989 signal_simple_error ("Invalid table is found with", list);
991 signal_simple_error ("Invalid value for composition", list);
994 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
995 Return variants of CHARACTER.
999 CHECK_CHAR (character);
1000 return Fcopy_list (get_char_id_table (XCHAR (character),
1001 Vcharacter_variant_table));
1005 /* We store the char-attributes in hash tables with the names as the
1006 key and the actual char-id-table object as the value. Occasionally
1007 we need to use them in a list format. These routines provide us
1009 struct char_attribute_list_closure
1011 Lisp_Object *char_attribute_list;
1015 add_char_attribute_to_list_mapper (Lisp_Object key, Lisp_Object value,
1016 void *char_attribute_list_closure)
1018 /* This function can GC */
1019 struct char_attribute_list_closure *calcl
1020 = (struct char_attribute_list_closure*) char_attribute_list_closure;
1021 Lisp_Object *char_attribute_list = calcl->char_attribute_list;
1023 *char_attribute_list = Fcons (key, *char_attribute_list);
1027 DEFUN ("char-attribute-list", Fchar_attribute_list, 0, 0, 0, /*
1028 Return the list of all existing character attributes except coded-charsets.
1032 Lisp_Object char_attribute_list = Qnil;
1033 struct gcpro gcpro1;
1034 struct char_attribute_list_closure char_attribute_list_closure;
1036 GCPRO1 (char_attribute_list);
1037 char_attribute_list_closure.char_attribute_list = &char_attribute_list;
1038 elisp_maphash (add_char_attribute_to_list_mapper,
1039 Vchar_attribute_hash_table,
1040 &char_attribute_list_closure);
1042 return char_attribute_list;
1045 DEFUN ("find-char-attribute-table", Ffind_char_attribute_table, 1, 1, 0, /*
1046 Return char-id-table corresponding to ATTRIBUTE.
1050 return Fgethash (attribute, Vchar_attribute_hash_table, Qnil);
1054 /* We store the char-id-tables in hash tables with the attributes as
1055 the key and the actual char-id-table object as the value. Each
1056 char-id-table stores values of an attribute corresponding with
1057 characters. Occasionally we need to get attributes of a character
1058 in a association-list format. These routines provide us with
1060 struct char_attribute_alist_closure
1063 Lisp_Object *char_attribute_alist;
1067 add_char_attribute_alist_mapper (Lisp_Object key, Lisp_Object value,
1068 void *char_attribute_alist_closure)
1070 /* This function can GC */
1071 struct char_attribute_alist_closure *caacl =
1072 (struct char_attribute_alist_closure*) char_attribute_alist_closure;
1073 Lisp_Object ret = get_char_id_table (caacl->char_id, value);
1074 if (!UNBOUNDP (ret))
1076 Lisp_Object *char_attribute_alist = caacl->char_attribute_alist;
1077 *char_attribute_alist
1078 = Fcons (Fcons (key, ret), *char_attribute_alist);
1083 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
1084 Return the alist of attributes of CHARACTER.
1088 Lisp_Object alist = Qnil;
1091 CHECK_CHAR (character);
1093 struct gcpro gcpro1;
1094 struct char_attribute_alist_closure char_attribute_alist_closure;
1097 char_attribute_alist_closure.char_id = XCHAR (character);
1098 char_attribute_alist_closure.char_attribute_alist = &alist;
1099 elisp_maphash (add_char_attribute_alist_mapper,
1100 Vchar_attribute_hash_table,
1101 &char_attribute_alist_closure);
1105 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1107 Lisp_Object ccs = chlook->charset_by_leading_byte[i];
1111 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1114 if ( CHAR_ID_TABLE_P (encoding_table)
1115 && INTP (cpos = get_char_id_table (XCHAR (character),
1118 alist = Fcons (Fcons (ccs, cpos), alist);
1125 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 3, 0, /*
1126 Return the value of CHARACTER's ATTRIBUTE.
1127 Return DEFAULT-VALUE if the value is not exist.
1129 (character, attribute, default_value))
1133 CHECK_CHAR (character);
1134 if (!NILP (ccs = Ffind_charset (attribute)))
1136 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1138 if (CHAR_ID_TABLE_P (encoding_table))
1139 return get_char_id_table (XCHAR (character), encoding_table);
1143 Lisp_Object table = Fgethash (attribute,
1144 Vchar_attribute_hash_table,
1146 if (!UNBOUNDP (table))
1148 Lisp_Object ret = get_char_id_table (XCHAR (character), table);
1149 if (!UNBOUNDP (ret))
1153 return default_value;
1156 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
1157 Store CHARACTER's ATTRIBUTE with VALUE.
1159 (character, attribute, value))
1163 CHECK_CHAR (character);
1164 ccs = Ffind_charset (attribute);
1167 return put_char_ccs_code_point (character, ccs, value);
1169 else if (EQ (attribute, Q_decomposition))
1174 signal_simple_error ("Invalid value for ->decomposition",
1177 if (CONSP (Fcdr (value)))
1179 Lisp_Object rest = value;
1180 Lisp_Object table = Vcharacter_composition_table;
1184 GET_EXTERNAL_LIST_LENGTH (rest, len);
1185 seq = make_vector (len, Qnil);
1187 while (CONSP (rest))
1189 Lisp_Object v = Fcar (rest);
1192 = to_char_id (v, "Invalid value for ->decomposition", value);
1195 XVECTOR_DATA(seq)[i++] = v;
1197 XVECTOR_DATA(seq)[i++] = make_char (c);
1201 put_char_id_table (c, character, table);
1206 ntable = get_char_id_table (c, table);
1207 if (!CHAR_ID_TABLE_P (ntable))
1209 ntable = make_char_id_table (Qnil);
1210 put_char_id_table (c, ntable, table);
1218 Lisp_Object v = Fcar (value);
1222 Emchar c = XINT (v);
1224 = get_char_id_table (c, Vcharacter_variant_table);
1226 if (NILP (Fmemq (v, ret)))
1228 put_char_id_table (c, Fcons (character, ret),
1229 Vcharacter_variant_table);
1232 seq = make_vector (1, v);
1236 else if (EQ (attribute, Qto_ucs) || EQ (attribute, Q_ucs))
1242 signal_simple_error ("Invalid value for ->ucs", value);
1246 ret = get_char_id_table (c, Vcharacter_variant_table);
1247 if (NILP (Fmemq (character, ret)))
1249 put_char_id_table (c, Fcons (character, ret),
1250 Vcharacter_variant_table);
1253 if (EQ (attribute, Q_ucs))
1254 attribute = Qto_ucs;
1258 Lisp_Object table = Fgethash (attribute,
1259 Vchar_attribute_hash_table,
1264 table = make_char_id_table (Qunbound);
1265 Fputhash (attribute, table, Vchar_attribute_hash_table);
1267 put_char_id_table (XCHAR (character), value, table);
1272 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
1273 Remove CHARACTER's ATTRIBUTE.
1275 (character, attribute))
1279 CHECK_CHAR (character);
1280 ccs = Ffind_charset (attribute);
1283 return remove_char_ccs (character, ccs);
1287 Lisp_Object table = Fgethash (attribute,
1288 Vchar_attribute_hash_table,
1290 if (!UNBOUNDP (table))
1292 put_char_id_table (XCHAR (character), Qunbound, table);
1299 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
1301 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
1303 /* ad-hoc method for `ascii' */
1304 if ((CHARSET_CHARS (cs) == 94) &&
1305 (CHARSET_BYTE_OFFSET (cs) != 33))
1306 return 128 - CHARSET_BYTE_OFFSET (cs);
1308 return CHARSET_CHARS (cs);
1311 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
1313 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
1315 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
1319 if (XVECTOR_LENGTH (v) > ccs_len)
1322 for (i = 0; i < XVECTOR_LENGTH (v); i++)
1324 Lisp_Object c = XVECTOR_DATA(v)[i];
1326 if (!NILP (c) && !CHARP (c))
1330 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
1342 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1345 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1355 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1356 nv = XVECTOR_DATA(v)[i];
1362 XVECTOR_DATA(v)[i] = Qnil;
1366 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1367 int code_point, Lisp_Object character);
1369 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1370 int code_point, Lisp_Object character)
1374 int ccs_len = XVECTOR_LENGTH (v);
1379 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1380 nv = XVECTOR_DATA(v)[i];
1384 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
1390 XVECTOR_DATA(v)[i] = character;
1394 put_char_ccs_code_point (Lisp_Object character,
1395 Lisp_Object ccs, Lisp_Object value)
1397 Lisp_Object encoding_table;
1399 if (!EQ (XCHARSET_NAME (ccs), Qucs)
1400 || (XCHAR (character) != XINT (value)))
1402 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
1403 int dim = XCHARSET_DIMENSION (ccs);
1404 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
1405 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
1409 { /* obsolete representation: value must be a list of bytes */
1410 Lisp_Object ret = Fcar (value);
1414 signal_simple_error ("Invalid value for coded-charset", value);
1415 code_point = XINT (ret);
1416 if (XCHARSET_GRAPHIC (ccs) == 1)
1418 rest = Fcdr (value);
1419 while (!NILP (rest))
1424 signal_simple_error ("Invalid value for coded-charset",
1428 signal_simple_error ("Invalid value for coded-charset",
1431 if (XCHARSET_GRAPHIC (ccs) == 1)
1433 code_point = (code_point << 8) | j;
1436 value = make_int (code_point);
1438 else if (INTP (value))
1440 code_point = XINT (value);
1441 if (XCHARSET_GRAPHIC (ccs) == 1)
1443 code_point &= 0x7F7F7F7F;
1444 value = make_int (code_point);
1448 signal_simple_error ("Invalid value for coded-charset", value);
1452 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1455 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
1460 XCHARSET_DECODING_TABLE (ccs)
1461 = v = make_older_vector (ccs_len, Qnil);
1464 decoding_table_put_char (v, dim, byte_offset, code_point, character);
1466 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
1468 XCHARSET_ENCODING_TABLE (ccs)
1469 = encoding_table = make_char_id_table (Qnil);
1471 put_char_id_table (XCHAR (character), value, encoding_table);
1476 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
1478 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1479 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1481 if (VECTORP (decoding_table))
1483 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1487 decoding_table_remove_char (decoding_table,
1488 XCHARSET_DIMENSION (ccs),
1489 XCHARSET_BYTE_OFFSET (ccs),
1493 if (CHAR_ID_TABLE_P (encoding_table))
1495 put_char_id_table (XCHAR (character), Qnil, encoding_table);
1500 EXFUN (Fmake_char, 3);
1501 EXFUN (Fdecode_char, 2);
1503 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
1504 Store character's ATTRIBUTES.
1508 Lisp_Object rest = attributes;
1509 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
1510 Lisp_Object character;
1514 while (CONSP (rest))
1516 Lisp_Object cell = Fcar (rest);
1520 signal_simple_error ("Invalid argument", attributes);
1521 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
1522 && ((XCHARSET_FINAL (ccs) != 0) ||
1523 (XCHARSET_UCS_MAX (ccs) > 0)) )
1527 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1529 character = Fdecode_char (ccs, cell);
1530 if (!NILP (character))
1531 goto setup_attributes;
1535 if ( (!NILP (code = Fcdr (Fassq (Qto_ucs, attributes)))) ||
1536 (!NILP (code = Fcdr (Fassq (Q_ucs, attributes)))) )
1540 signal_simple_error ("Invalid argument", attributes);
1542 character = make_char (XINT (code) + 0x100000);
1543 goto setup_attributes;
1547 else if (!INTP (code))
1548 signal_simple_error ("Invalid argument", attributes);
1550 character = make_char (XINT (code));
1554 while (CONSP (rest))
1556 Lisp_Object cell = Fcar (rest);
1559 signal_simple_error ("Invalid argument", attributes);
1561 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
1567 DEFUN ("find-char", Ffind_char, 1, 1, 0, /*
1568 Retrieve the character of the given ATTRIBUTES.
1572 Lisp_Object rest = attributes;
1575 while (CONSP (rest))
1577 Lisp_Object cell = Fcar (rest);
1581 signal_simple_error ("Invalid argument", attributes);
1582 if (!NILP (ccs = Ffind_charset (Fcar (cell))))
1586 return Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1588 return Fdecode_char (ccs, cell);
1592 if ( (!NILP (code = Fcdr (Fassq (Qto_ucs, attributes)))) ||
1593 (!NILP (code = Fcdr (Fassq (Q_ucs, attributes)))) )
1596 signal_simple_error ("Invalid argument", attributes);
1598 return make_char (XINT (code) + 0x100000);
1603 Lisp_Object Vutf_2000_version;
1607 int leading_code_private_11;
1610 Lisp_Object Qcharsetp;
1612 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1613 Lisp_Object Qregistry, Qfinal, Qgraphic;
1614 Lisp_Object Qdirection;
1615 Lisp_Object Qreverse_direction_charset;
1616 Lisp_Object Qleading_byte;
1617 Lisp_Object Qshort_name, Qlong_name;
1631 Qcyrillic_iso8859_5,
1633 Qjapanese_jisx0208_1978,
1637 Qjapanese_jisx0208_1990,
1640 Qchinese_cns11643_1,
1641 Qchinese_cns11643_2,
1648 Qlatin_viscii_lower,
1649 Qlatin_viscii_upper,
1650 Qvietnamese_viscii_lower,
1651 Qvietnamese_viscii_upper,
1664 Qideograph_gt_pj_10,
1665 Qideograph_gt_pj_11,
1695 Lisp_Object Ql2r, Qr2l;
1697 Lisp_Object Vcharset_hash_table;
1699 /* Composite characters are characters constructed by overstriking two
1700 or more regular characters.
1702 1) The old Mule implementation involves storing composite characters
1703 in a buffer as a tag followed by all of the actual characters
1704 used to make up the composite character. I think this is a bad
1705 idea; it greatly complicates code that wants to handle strings
1706 one character at a time because it has to deal with the possibility
1707 of great big ungainly characters. It's much more reasonable to
1708 simply store an index into a table of composite characters.
1710 2) The current implementation only allows for 16,384 separate
1711 composite characters over the lifetime of the XEmacs process.
1712 This could become a potential problem if the user
1713 edited lots of different files that use composite characters.
1714 Due to FSF bogosity, increasing the number of allowable
1715 composite characters under Mule would decrease the number
1716 of possible faces that can exist. Mule already has shrunk
1717 this to 2048, and further shrinkage would become uncomfortable.
1718 No such problems exist in XEmacs.
1720 Composite characters could be represented as 0x80 C1 C2 C3,
1721 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1722 for slightly under 2^20 (one million) composite characters
1723 over the XEmacs process lifetime, and you only need to
1724 increase the size of a Mule character from 19 to 21 bits.
1725 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1726 85 million (slightly over 2^26) composite characters. */
1729 /************************************************************************/
1730 /* Basic Emchar functions */
1731 /************************************************************************/
1733 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1734 string in STR. Returns the number of bytes stored.
1735 Do not call this directly. Use the macro set_charptr_emchar() instead.
1739 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1745 Lisp_Object charset;
1754 else if ( c <= 0x7ff )
1756 *p++ = (c >> 6) | 0xc0;
1757 *p++ = (c & 0x3f) | 0x80;
1759 else if ( c <= 0xffff )
1761 *p++ = (c >> 12) | 0xe0;
1762 *p++ = ((c >> 6) & 0x3f) | 0x80;
1763 *p++ = (c & 0x3f) | 0x80;
1765 else if ( c <= 0x1fffff )
1767 *p++ = (c >> 18) | 0xf0;
1768 *p++ = ((c >> 12) & 0x3f) | 0x80;
1769 *p++ = ((c >> 6) & 0x3f) | 0x80;
1770 *p++ = (c & 0x3f) | 0x80;
1772 else if ( c <= 0x3ffffff )
1774 *p++ = (c >> 24) | 0xf8;
1775 *p++ = ((c >> 18) & 0x3f) | 0x80;
1776 *p++ = ((c >> 12) & 0x3f) | 0x80;
1777 *p++ = ((c >> 6) & 0x3f) | 0x80;
1778 *p++ = (c & 0x3f) | 0x80;
1782 *p++ = (c >> 30) | 0xfc;
1783 *p++ = ((c >> 24) & 0x3f) | 0x80;
1784 *p++ = ((c >> 18) & 0x3f) | 0x80;
1785 *p++ = ((c >> 12) & 0x3f) | 0x80;
1786 *p++ = ((c >> 6) & 0x3f) | 0x80;
1787 *p++ = (c & 0x3f) | 0x80;
1790 BREAKUP_CHAR (c, charset, c1, c2);
1791 lb = CHAR_LEADING_BYTE (c);
1792 if (LEADING_BYTE_PRIVATE_P (lb))
1793 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1795 if (EQ (charset, Vcharset_control_1))
1804 /* Return the first character from a Mule-encoded string in STR,
1805 assuming it's non-ASCII. Do not call this directly.
1806 Use the macro charptr_emchar() instead. */
1809 non_ascii_charptr_emchar (const Bufbyte *str)
1822 else if ( b >= 0xf8 )
1827 else if ( b >= 0xf0 )
1832 else if ( b >= 0xe0 )
1837 else if ( b >= 0xc0 )
1847 for( ; len > 0; len-- )
1850 ch = ( ch << 6 ) | ( b & 0x3f );
1854 Bufbyte i0 = *str, i1, i2 = 0;
1855 Lisp_Object charset;
1857 if (i0 == LEADING_BYTE_CONTROL_1)
1858 return (Emchar) (*++str - 0x20);
1860 if (LEADING_BYTE_PREFIX_P (i0))
1865 charset = CHARSET_BY_LEADING_BYTE (i0);
1866 if (XCHARSET_DIMENSION (charset) == 2)
1869 return MAKE_CHAR (charset, i1, i2);
1873 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1874 Do not call this directly. Use the macro valid_char_p() instead. */
1878 non_ascii_valid_char_p (Emchar ch)
1882 /* Must have only lowest 19 bits set */
1886 f1 = CHAR_FIELD1 (ch);
1887 f2 = CHAR_FIELD2 (ch);
1888 f3 = CHAR_FIELD3 (ch);
1892 Lisp_Object charset;
1894 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1895 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1896 f2 > MAX_CHAR_FIELD2_PRIVATE)
1901 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1902 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1906 NOTE: This takes advantage of the fact that
1907 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1908 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1910 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1911 if (EQ (charset, Qnil))
1913 return (XCHARSET_CHARS (charset) == 96);
1917 Lisp_Object charset;
1919 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1920 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1921 f1 > MAX_CHAR_FIELD1_PRIVATE)
1923 if (f2 < 0x20 || f3 < 0x20)
1926 #ifdef ENABLE_COMPOSITE_CHARS
1927 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1929 if (UNBOUNDP (Fgethash (make_int (ch),
1930 Vcomposite_char_char2string_hash_table,
1935 #endif /* ENABLE_COMPOSITE_CHARS */
1937 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1938 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1941 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1943 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1946 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1948 if (EQ (charset, Qnil))
1950 return (XCHARSET_CHARS (charset) == 96);
1956 /************************************************************************/
1957 /* Basic string functions */
1958 /************************************************************************/
1960 /* Copy the character pointed to by SRC into DST. Do not call this
1961 directly. Use the macro charptr_copy_char() instead.
1962 Return the number of bytes copied. */
1965 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
1967 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
1969 for (i = bytes; i; i--, dst++, src++)
1975 /************************************************************************/
1976 /* streams of Emchars */
1977 /************************************************************************/
1979 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1980 The functions below are not meant to be called directly; use
1981 the macros in insdel.h. */
1984 Lstream_get_emchar_1 (Lstream *stream, int ch)
1986 Bufbyte str[MAX_EMCHAR_LEN];
1987 Bufbyte *strptr = str;
1990 str[0] = (Bufbyte) ch;
1992 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
1994 int c = Lstream_getc (stream);
1995 bufpos_checking_assert (c >= 0);
1996 *++strptr = (Bufbyte) c;
1998 return charptr_emchar (str);
2002 Lstream_fput_emchar (Lstream *stream, Emchar ch)
2004 Bufbyte str[MAX_EMCHAR_LEN];
2005 Bytecount len = set_charptr_emchar (str, ch);
2006 return Lstream_write (stream, str, len);
2010 Lstream_funget_emchar (Lstream *stream, Emchar ch)
2012 Bufbyte str[MAX_EMCHAR_LEN];
2013 Bytecount len = set_charptr_emchar (str, ch);
2014 Lstream_unread (stream, str, len);
2018 /************************************************************************/
2019 /* charset object */
2020 /************************************************************************/
2023 mark_charset (Lisp_Object obj)
2025 Lisp_Charset *cs = XCHARSET (obj);
2027 mark_object (cs->short_name);
2028 mark_object (cs->long_name);
2029 mark_object (cs->doc_string);
2030 mark_object (cs->registry);
2031 mark_object (cs->ccl_program);
2033 mark_object (cs->encoding_table);
2034 /* mark_object (cs->decoding_table); */
2040 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
2042 Lisp_Charset *cs = XCHARSET (obj);
2046 error ("printing unreadable object #<charset %s 0x%x>",
2047 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
2050 write_c_string ("#<charset ", printcharfun);
2051 print_internal (CHARSET_NAME (cs), printcharfun, 0);
2052 write_c_string (" ", printcharfun);
2053 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
2054 write_c_string (" ", printcharfun);
2055 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
2056 write_c_string (" ", printcharfun);
2057 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
2058 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
2060 CHARSET_DIMENSION (cs),
2061 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
2062 CHARSET_COLUMNS (cs),
2063 CHARSET_GRAPHIC (cs),
2064 CHARSET_FINAL (cs));
2065 write_c_string (buf, printcharfun);
2066 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
2067 sprintf (buf, " 0x%x>", cs->header.uid);
2068 write_c_string (buf, printcharfun);
2071 static const struct lrecord_description charset_description[] = {
2072 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
2073 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
2074 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
2075 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
2076 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
2077 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
2078 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
2080 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
2081 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
2086 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
2087 mark_charset, print_charset, 0, 0, 0,
2088 charset_description,
2091 /* Make a new charset. */
2092 /* #### SJT Should generic properties be allowed? */
2094 make_charset (Charset_ID id, Lisp_Object name,
2095 unsigned short chars, unsigned char dimension,
2096 unsigned char columns, unsigned char graphic,
2097 Bufbyte final, unsigned char direction, Lisp_Object short_name,
2098 Lisp_Object long_name, Lisp_Object doc,
2100 Lisp_Object decoding_table,
2101 Emchar ucs_min, Emchar ucs_max,
2102 Emchar code_offset, unsigned char byte_offset)
2105 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
2109 XSETCHARSET (obj, cs);
2111 CHARSET_ID (cs) = id;
2112 CHARSET_NAME (cs) = name;
2113 CHARSET_SHORT_NAME (cs) = short_name;
2114 CHARSET_LONG_NAME (cs) = long_name;
2115 CHARSET_CHARS (cs) = chars;
2116 CHARSET_DIMENSION (cs) = dimension;
2117 CHARSET_DIRECTION (cs) = direction;
2118 CHARSET_COLUMNS (cs) = columns;
2119 CHARSET_GRAPHIC (cs) = graphic;
2120 CHARSET_FINAL (cs) = final;
2121 CHARSET_DOC_STRING (cs) = doc;
2122 CHARSET_REGISTRY (cs) = reg;
2123 CHARSET_CCL_PROGRAM (cs) = Qnil;
2124 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
2126 CHARSET_DECODING_TABLE(cs) = Qnil;
2127 CHARSET_ENCODING_TABLE(cs) = Qnil;
2128 CHARSET_UCS_MIN(cs) = ucs_min;
2129 CHARSET_UCS_MAX(cs) = ucs_max;
2130 CHARSET_CODE_OFFSET(cs) = code_offset;
2131 CHARSET_BYTE_OFFSET(cs) = byte_offset;
2135 if (id == LEADING_BYTE_ASCII)
2136 CHARSET_REP_BYTES (cs) = 1;
2138 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
2140 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
2145 /* some charsets do not have final characters. This includes
2146 ASCII, Control-1, Composite, and the two faux private
2148 unsigned char iso2022_type
2149 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
2151 if (code_offset == 0)
2153 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
2154 chlook->charset_by_attributes[iso2022_type][final] = obj;
2158 (chlook->charset_by_attributes[iso2022_type][final][direction]));
2159 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
2163 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
2164 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
2166 /* Some charsets are "faux" and don't have names or really exist at
2167 all except in the leading-byte table. */
2169 Fputhash (name, obj, Vcharset_hash_table);
2174 get_unallocated_leading_byte (int dimension)
2179 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
2182 lb = chlook->next_allocated_leading_byte++;
2186 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
2189 lb = chlook->next_allocated_1_byte_leading_byte++;
2193 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
2196 lb = chlook->next_allocated_2_byte_leading_byte++;
2202 ("No more character sets free for this dimension",
2203 make_int (dimension));
2209 /* Number of Big5 characters which have the same code in 1st byte. */
2211 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2214 decode_builtin_char (Lisp_Object charset, int code_point)
2218 if (EQ (charset, Vcharset_chinese_big5))
2220 int c1 = code_point >> 8;
2221 int c2 = code_point & 0xFF;
2224 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
2226 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
2227 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
2229 I = (c1 - 0xA1) * BIG5_SAME_ROW
2230 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
2234 charset = Vcharset_chinese_big5_1;
2238 charset = Vcharset_chinese_big5_2;
2239 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
2241 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
2244 if ((final = XCHARSET_FINAL (charset)) >= '0')
2246 if (XCHARSET_DIMENSION (charset) == 1)
2248 switch (XCHARSET_CHARS (charset))
2252 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
2255 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
2263 switch (XCHARSET_CHARS (charset))
2266 return MIN_CHAR_94x94
2267 + (final - '0') * 94 * 94
2268 + (((code_point >> 8) & 0x7F) - 33) * 94
2269 + ((code_point & 0x7F) - 33);
2271 return MIN_CHAR_96x96
2272 + (final - '0') * 96 * 96
2273 + (((code_point >> 8) & 0x7F) - 32) * 96
2274 + ((code_point & 0x7F) - 32);
2281 else if (XCHARSET_UCS_MAX (charset))
2284 = (XCHARSET_DIMENSION (charset) == 1
2286 code_point - XCHARSET_BYTE_OFFSET (charset)
2288 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
2289 * XCHARSET_CHARS (charset)
2290 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
2291 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
2292 if ((cid < XCHARSET_UCS_MIN (charset))
2293 || (XCHARSET_UCS_MAX (charset) < cid))
2302 range_charset_code_point (Lisp_Object charset, Emchar ch)
2306 if ((XCHARSET_UCS_MIN (charset) <= ch)
2307 && (ch <= XCHARSET_UCS_MAX (charset)))
2309 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
2311 if (XCHARSET_CHARS (charset) == 256)
2313 else if (XCHARSET_DIMENSION (charset) == 1)
2314 return d + XCHARSET_BYTE_OFFSET (charset);
2315 else if (XCHARSET_DIMENSION (charset) == 2)
2317 ((d / XCHARSET_CHARS (charset)
2318 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2319 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2320 else if (XCHARSET_DIMENSION (charset) == 3)
2322 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2323 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2324 | ((d / XCHARSET_CHARS (charset)
2325 % XCHARSET_CHARS (charset)
2326 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2327 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2328 else /* if (XCHARSET_DIMENSION (charset) == 4) */
2330 ((d / (XCHARSET_CHARS (charset)
2331 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2332 + XCHARSET_BYTE_OFFSET (charset)) << 24)
2333 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2334 % XCHARSET_CHARS (charset)
2335 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2336 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
2337 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2338 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2340 else if (XCHARSET_CODE_OFFSET (charset) == 0)
2342 if (XCHARSET_DIMENSION (charset) == 1)
2344 if (XCHARSET_CHARS (charset) == 94)
2346 if (((d = ch - (MIN_CHAR_94
2347 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
2351 else if (XCHARSET_CHARS (charset) == 96)
2353 if (((d = ch - (MIN_CHAR_96
2354 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
2361 else if (XCHARSET_DIMENSION (charset) == 2)
2363 if (XCHARSET_CHARS (charset) == 94)
2365 if (((d = ch - (MIN_CHAR_94x94
2366 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
2369 return (((d / 94) + 33) << 8) | (d % 94 + 33);
2371 else if (XCHARSET_CHARS (charset) == 96)
2373 if (((d = ch - (MIN_CHAR_96x96
2374 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
2377 return (((d / 96) + 32) << 8) | (d % 96 + 32);
2383 if (EQ (charset, Vcharset_mojikyo_2022_1)
2384 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
2386 int m = ch - MIN_CHAR_MOJIKYO - 1;
2387 int byte1 = m / (94 * 60) + 33;
2388 int byte2 = (m % (94 * 60)) / 94;
2389 int byte3 = m % 94 + 33;
2395 return (byte1 << 16) | (byte2 << 8) | byte3;
2401 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
2403 if (c <= MAX_CHAR_BASIC_LATIN)
2405 *charset = Vcharset_ascii;
2410 *charset = Vcharset_control_1;
2415 *charset = Vcharset_latin_iso8859_1;
2419 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
2421 *charset = Vcharset_hebrew_iso8859_8;
2422 return c - MIN_CHAR_HEBREW + 0x20;
2425 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
2427 *charset = Vcharset_thai_tis620;
2428 return c - MIN_CHAR_THAI + 0x20;
2431 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
2432 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
2434 return list2 (Vcharset_katakana_jisx0201,
2435 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
2438 else if (c <= MAX_CHAR_BMP)
2440 *charset = Vcharset_ucs_bmp;
2443 else if (c < MIN_CHAR_DAIKANWA)
2445 *charset = Vcharset_ucs;
2448 else if (c <= MAX_CHAR_DAIKANWA)
2450 *charset = Vcharset_ideograph_daikanwa;
2451 return c - MIN_CHAR_DAIKANWA;
2453 else if (c <= MAX_CHAR_MOJIKYO_0)
2455 *charset = Vcharset_mojikyo;
2456 return c - MIN_CHAR_MOJIKYO_0;
2458 else if (c < MIN_CHAR_94)
2460 *charset = Vcharset_ucs;
2463 else if (c <= MAX_CHAR_94)
2465 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
2466 ((c - MIN_CHAR_94) / 94) + '0',
2467 CHARSET_LEFT_TO_RIGHT);
2468 if (!NILP (*charset))
2469 return ((c - MIN_CHAR_94) % 94) + 33;
2472 *charset = Vcharset_ucs;
2476 else if (c <= MAX_CHAR_96)
2478 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
2479 ((c - MIN_CHAR_96) / 96) + '0',
2480 CHARSET_LEFT_TO_RIGHT);
2481 if (!NILP (*charset))
2482 return ((c - MIN_CHAR_96) % 96) + 32;
2485 *charset = Vcharset_ucs;
2489 else if (c <= MAX_CHAR_94x94)
2492 = CHARSET_BY_ATTRIBUTES (94, 2,
2493 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
2494 CHARSET_LEFT_TO_RIGHT);
2495 if (!NILP (*charset))
2496 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
2497 | (((c - MIN_CHAR_94x94) % 94) + 33);
2500 *charset = Vcharset_ucs;
2504 else if (c <= MAX_CHAR_96x96)
2507 = CHARSET_BY_ATTRIBUTES (96, 2,
2508 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
2509 CHARSET_LEFT_TO_RIGHT);
2510 if (!NILP (*charset))
2511 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
2512 | (((c - MIN_CHAR_96x96) % 96) + 32);
2515 *charset = Vcharset_ucs;
2519 else if (c < MIN_CHAR_MOJIKYO)
2521 *charset = Vcharset_ucs;
2524 else if (c <= MAX_CHAR_MOJIKYO)
2526 *charset = Vcharset_mojikyo;
2527 return c - MIN_CHAR_MOJIKYO;
2531 *charset = Vcharset_ucs;
2536 Lisp_Object Vdefault_coded_charset_priority_list;
2540 /************************************************************************/
2541 /* Basic charset Lisp functions */
2542 /************************************************************************/
2544 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
2545 Return non-nil if OBJECT is a charset.
2549 return CHARSETP (object) ? Qt : Qnil;
2552 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
2553 Retrieve the charset of the given name.
2554 If CHARSET-OR-NAME is a charset object, it is simply returned.
2555 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
2556 nil is returned. Otherwise the associated charset object is returned.
2560 if (CHARSETP (charset_or_name))
2561 return charset_or_name;
2563 CHECK_SYMBOL (charset_or_name);
2564 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
2567 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
2568 Retrieve the charset of the given name.
2569 Same as `find-charset' except an error is signalled if there is no such
2570 charset instead of returning nil.
2574 Lisp_Object charset = Ffind_charset (name);
2577 signal_simple_error ("No such charset", name);
2581 /* We store the charsets in hash tables with the names as the key and the
2582 actual charset object as the value. Occasionally we need to use them
2583 in a list format. These routines provide us with that. */
2584 struct charset_list_closure
2586 Lisp_Object *charset_list;
2590 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
2591 void *charset_list_closure)
2593 /* This function can GC */
2594 struct charset_list_closure *chcl =
2595 (struct charset_list_closure*) charset_list_closure;
2596 Lisp_Object *charset_list = chcl->charset_list;
2598 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
2602 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
2603 Return a list of the names of all defined charsets.
2607 Lisp_Object charset_list = Qnil;
2608 struct gcpro gcpro1;
2609 struct charset_list_closure charset_list_closure;
2611 GCPRO1 (charset_list);
2612 charset_list_closure.charset_list = &charset_list;
2613 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
2614 &charset_list_closure);
2617 return charset_list;
2620 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
2621 Return the name of charset CHARSET.
2625 return XCHARSET_NAME (Fget_charset (charset));
2628 /* #### SJT Should generic properties be allowed? */
2629 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
2630 Define a new character set.
2631 This function is for use with Mule support.
2632 NAME is a symbol, the name by which the character set is normally referred.
2633 DOC-STRING is a string describing the character set.
2634 PROPS is a property list, describing the specific nature of the
2635 character set. Recognized properties are:
2637 'short-name Short version of the charset name (ex: Latin-1)
2638 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
2639 'registry A regular expression matching the font registry field for
2641 'dimension Number of octets used to index a character in this charset.
2642 Either 1 or 2. Defaults to 1.
2643 'columns Number of columns used to display a character in this charset.
2644 Only used in TTY mode. (Under X, the actual width of a
2645 character can be derived from the font used to display the
2646 characters.) If unspecified, defaults to the dimension
2647 (this is almost always the correct value).
2648 'chars Number of characters in each dimension (94 or 96).
2649 Defaults to 94. Note that if the dimension is 2, the
2650 character set thus described is 94x94 or 96x96.
2651 'final Final byte of ISO 2022 escape sequence. Must be
2652 supplied. Each combination of (DIMENSION, CHARS) defines a
2653 separate namespace for final bytes. Note that ISO
2654 2022 restricts the final byte to the range
2655 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2656 dimension == 2. Note also that final bytes in the range
2657 0x30 - 0x3F are reserved for user-defined (not official)
2659 'graphic 0 (use left half of font on output) or 1 (use right half
2660 of font on output). Defaults to 0. For example, for
2661 a font whose registry is ISO8859-1, the left half
2662 (octets 0x20 - 0x7F) is the `ascii' character set, while
2663 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2664 character set. With 'graphic set to 0, the octets
2665 will have their high bit cleared; with it set to 1,
2666 the octets will have their high bit set.
2667 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2669 'ccl-program A compiled CCL program used to convert a character in
2670 this charset into an index into the font. This is in
2671 addition to the 'graphic property. The CCL program
2672 is passed the octets of the character, with the high
2673 bit cleared and set depending upon whether the value
2674 of the 'graphic property is 0 or 1.
2676 (name, doc_string, props))
2678 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2679 int direction = CHARSET_LEFT_TO_RIGHT;
2680 Lisp_Object registry = Qnil;
2681 Lisp_Object charset;
2682 Lisp_Object ccl_program = Qnil;
2683 Lisp_Object short_name = Qnil, long_name = Qnil;
2684 int byte_offset = -1;
2686 CHECK_SYMBOL (name);
2687 if (!NILP (doc_string))
2688 CHECK_STRING (doc_string);
2690 charset = Ffind_charset (name);
2691 if (!NILP (charset))
2692 signal_simple_error ("Cannot redefine existing charset", name);
2695 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
2697 if (EQ (keyword, Qshort_name))
2699 CHECK_STRING (value);
2703 if (EQ (keyword, Qlong_name))
2705 CHECK_STRING (value);
2709 else if (EQ (keyword, Qdimension))
2712 dimension = XINT (value);
2713 if (dimension < 1 || dimension > 2)
2714 signal_simple_error ("Invalid value for 'dimension", value);
2717 else if (EQ (keyword, Qchars))
2720 chars = XINT (value);
2721 if (chars != 94 && chars != 96)
2722 signal_simple_error ("Invalid value for 'chars", value);
2725 else if (EQ (keyword, Qcolumns))
2728 columns = XINT (value);
2729 if (columns != 1 && columns != 2)
2730 signal_simple_error ("Invalid value for 'columns", value);
2733 else if (EQ (keyword, Qgraphic))
2736 graphic = XINT (value);
2738 if (graphic < 0 || graphic > 2)
2740 if (graphic < 0 || graphic > 1)
2742 signal_simple_error ("Invalid value for 'graphic", value);
2745 else if (EQ (keyword, Qregistry))
2747 CHECK_STRING (value);
2751 else if (EQ (keyword, Qdirection))
2753 if (EQ (value, Ql2r))
2754 direction = CHARSET_LEFT_TO_RIGHT;
2755 else if (EQ (value, Qr2l))
2756 direction = CHARSET_RIGHT_TO_LEFT;
2758 signal_simple_error ("Invalid value for 'direction", value);
2761 else if (EQ (keyword, Qfinal))
2763 CHECK_CHAR_COERCE_INT (value);
2764 final = XCHAR (value);
2765 if (final < '0' || final > '~')
2766 signal_simple_error ("Invalid value for 'final", value);
2769 else if (EQ (keyword, Qccl_program))
2771 struct ccl_program test_ccl;
2773 if (setup_ccl_program (&test_ccl, value) < 0)
2774 signal_simple_error ("Invalid value for 'ccl-program", value);
2775 ccl_program = value;
2779 signal_simple_error ("Unrecognized property", keyword);
2784 error ("'final must be specified");
2785 if (dimension == 2 && final > 0x5F)
2787 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2790 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2791 CHARSET_LEFT_TO_RIGHT)) ||
2792 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2793 CHARSET_RIGHT_TO_LEFT)))
2795 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2797 id = get_unallocated_leading_byte (dimension);
2799 if (NILP (doc_string))
2800 doc_string = build_string ("");
2802 if (NILP (registry))
2803 registry = build_string ("");
2805 if (NILP (short_name))
2806 XSETSTRING (short_name, XSYMBOL (name)->name);
2808 if (NILP (long_name))
2809 long_name = doc_string;
2812 columns = dimension;
2814 if (byte_offset < 0)
2818 else if (chars == 96)
2824 charset = make_charset (id, name, chars, dimension, columns, graphic,
2825 final, direction, short_name, long_name,
2826 doc_string, registry,
2827 Qnil, 0, 0, 0, byte_offset);
2828 if (!NILP (ccl_program))
2829 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2833 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2835 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2836 NEW-NAME is the name of the new charset. Return the new charset.
2838 (charset, new_name))
2840 Lisp_Object new_charset = Qnil;
2841 int id, chars, dimension, columns, graphic, final;
2843 Lisp_Object registry, doc_string, short_name, long_name;
2846 charset = Fget_charset (charset);
2847 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2848 signal_simple_error ("Charset already has reverse-direction charset",
2851 CHECK_SYMBOL (new_name);
2852 if (!NILP (Ffind_charset (new_name)))
2853 signal_simple_error ("Cannot redefine existing charset", new_name);
2855 cs = XCHARSET (charset);
2857 chars = CHARSET_CHARS (cs);
2858 dimension = CHARSET_DIMENSION (cs);
2859 columns = CHARSET_COLUMNS (cs);
2860 id = get_unallocated_leading_byte (dimension);
2862 graphic = CHARSET_GRAPHIC (cs);
2863 final = CHARSET_FINAL (cs);
2864 direction = CHARSET_RIGHT_TO_LEFT;
2865 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2866 direction = CHARSET_LEFT_TO_RIGHT;
2867 doc_string = CHARSET_DOC_STRING (cs);
2868 short_name = CHARSET_SHORT_NAME (cs);
2869 long_name = CHARSET_LONG_NAME (cs);
2870 registry = CHARSET_REGISTRY (cs);
2872 new_charset = make_charset (id, new_name, chars, dimension, columns,
2873 graphic, final, direction, short_name, long_name,
2874 doc_string, registry,
2876 CHARSET_DECODING_TABLE(cs),
2877 CHARSET_UCS_MIN(cs),
2878 CHARSET_UCS_MAX(cs),
2879 CHARSET_CODE_OFFSET(cs),
2880 CHARSET_BYTE_OFFSET(cs)
2886 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2887 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2892 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2893 Define symbol ALIAS as an alias for CHARSET.
2897 CHECK_SYMBOL (alias);
2898 charset = Fget_charset (charset);
2899 return Fputhash (alias, charset, Vcharset_hash_table);
2902 /* #### Reverse direction charsets not yet implemented. */
2904 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2906 Return the reverse-direction charset parallel to CHARSET, if any.
2907 This is the charset with the same properties (in particular, the same
2908 dimension, number of characters per dimension, and final byte) as
2909 CHARSET but whose characters are displayed in the opposite direction.
2913 charset = Fget_charset (charset);
2914 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2918 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2919 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2920 If DIRECTION is omitted, both directions will be checked (left-to-right
2921 will be returned if character sets exist for both directions).
2923 (dimension, chars, final, direction))
2925 int dm, ch, fi, di = -1;
2926 Lisp_Object obj = Qnil;
2928 CHECK_INT (dimension);
2929 dm = XINT (dimension);
2930 if (dm < 1 || dm > 2)
2931 signal_simple_error ("Invalid value for DIMENSION", dimension);
2935 if (ch != 94 && ch != 96)
2936 signal_simple_error ("Invalid value for CHARS", chars);
2938 CHECK_CHAR_COERCE_INT (final);
2940 if (fi < '0' || fi > '~')
2941 signal_simple_error ("Invalid value for FINAL", final);
2943 if (EQ (direction, Ql2r))
2944 di = CHARSET_LEFT_TO_RIGHT;
2945 else if (EQ (direction, Qr2l))
2946 di = CHARSET_RIGHT_TO_LEFT;
2947 else if (!NILP (direction))
2948 signal_simple_error ("Invalid value for DIRECTION", direction);
2950 if (dm == 2 && fi > 0x5F)
2952 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2956 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
2958 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
2961 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
2964 return XCHARSET_NAME (obj);
2968 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2969 Return short name of CHARSET.
2973 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2976 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2977 Return long name of CHARSET.
2981 return XCHARSET_LONG_NAME (Fget_charset (charset));
2984 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2985 Return description of CHARSET.
2989 return XCHARSET_DOC_STRING (Fget_charset (charset));
2992 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2993 Return dimension of CHARSET.
2997 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
3000 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
3001 Return property PROP of CHARSET, a charset object or symbol naming a charset.
3002 Recognized properties are those listed in `make-charset', as well as
3003 'name and 'doc-string.
3009 charset = Fget_charset (charset);
3010 cs = XCHARSET (charset);
3012 CHECK_SYMBOL (prop);
3013 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
3014 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
3015 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
3016 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
3017 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
3018 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
3019 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
3020 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
3021 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
3022 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
3023 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
3024 if (EQ (prop, Qdirection))
3025 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
3026 if (EQ (prop, Qreverse_direction_charset))
3028 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
3029 /* #### Is this translation OK? If so, error checking sufficient? */
3030 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
3032 signal_simple_error ("Unrecognized charset property name", prop);
3033 return Qnil; /* not reached */
3036 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
3037 Return charset identification number of CHARSET.
3041 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
3044 /* #### We need to figure out which properties we really want to
3047 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
3048 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
3050 (charset, ccl_program))
3052 struct ccl_program test_ccl;
3054 charset = Fget_charset (charset);
3055 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
3056 signal_simple_error ("Invalid ccl-program", ccl_program);
3057 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
3062 invalidate_charset_font_caches (Lisp_Object charset)
3064 /* Invalidate font cache entries for charset on all devices. */
3065 Lisp_Object devcons, concons, hash_table;
3066 DEVICE_LOOP_NO_BREAK (devcons, concons)
3068 struct device *d = XDEVICE (XCAR (devcons));
3069 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
3070 if (!UNBOUNDP (hash_table))
3071 Fclrhash (hash_table);
3075 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
3076 Set the 'registry property of CHARSET to REGISTRY.
3078 (charset, registry))
3080 charset = Fget_charset (charset);
3081 CHECK_STRING (registry);
3082 XCHARSET_REGISTRY (charset) = registry;
3083 invalidate_charset_font_caches (charset);
3084 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
3089 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
3090 Return mapping-table of CHARSET.
3094 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
3097 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
3098 Set mapping-table of CHARSET to TABLE.
3102 struct Lisp_Charset *cs;
3106 charset = Fget_charset (charset);
3107 cs = XCHARSET (charset);
3111 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
3112 make_vector_newer (CHARSET_DECODING_TABLE(cs));
3113 CHARSET_DECODING_TABLE(cs) = Qnil;
3116 else if (VECTORP (table))
3118 int ccs_len = CHARSET_BYTE_SIZE (cs);
3119 int ret = decoding_table_check_elements (table,
3120 CHARSET_DIMENSION (cs),
3125 signal_simple_error ("Too big table", table);
3127 signal_simple_error ("Invalid element is found", table);
3129 signal_simple_error ("Something wrong", table);
3131 CHARSET_DECODING_TABLE(cs) = Qnil;
3134 signal_error (Qwrong_type_argument,
3135 list2 (build_translated_string ("vector-or-nil-p"),
3138 byte_offset = CHARSET_BYTE_OFFSET (cs);
3139 switch (CHARSET_DIMENSION (cs))
3142 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3144 Lisp_Object c = XVECTOR_DATA(table)[i];
3147 put_char_ccs_code_point (c, charset,
3148 make_int (i + byte_offset));
3152 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3154 Lisp_Object v = XVECTOR_DATA(table)[i];
3160 for (j = 0; j < XVECTOR_LENGTH (v); j++)
3162 Lisp_Object c = XVECTOR_DATA(v)[j];
3165 put_char_ccs_code_point
3167 make_int ( ( (i + byte_offset) << 8 )
3173 put_char_ccs_code_point (v, charset,
3174 make_int (i + byte_offset));
3183 /************************************************************************/
3184 /* Lisp primitives for working with characters */
3185 /************************************************************************/
3188 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
3189 Make a character from CHARSET and code-point CODE.
3195 charset = Fget_charset (charset);
3198 if (XCHARSET_GRAPHIC (charset) == 1)
3200 c = DECODE_CHAR (charset, c);
3201 return c >= 0 ? make_char (c) : Qnil;
3204 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
3205 Make a builtin character from CHARSET and code-point CODE.
3211 charset = Fget_charset (charset);
3213 if (EQ (charset, Vcharset_latin_viscii))
3215 Lisp_Object chr = Fdecode_char (charset, code);
3221 (ret = Fget_char_attribute (chr,
3222 Vcharset_latin_viscii_lower,
3225 charset = Vcharset_latin_viscii_lower;
3229 (ret = Fget_char_attribute (chr,
3230 Vcharset_latin_viscii_upper,
3233 charset = Vcharset_latin_viscii_upper;
3240 if (XCHARSET_GRAPHIC (charset) == 1)
3243 c = decode_builtin_char (charset, c);
3244 return c >= 0 ? make_char (c) : Fdecode_char (charset, code);
3248 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
3249 Make a character from CHARSET and octets ARG1 and ARG2.
3250 ARG2 is required only for characters from two-dimensional charsets.
3251 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
3252 character s with caron.
3254 (charset, arg1, arg2))
3258 int lowlim, highlim;
3260 charset = Fget_charset (charset);
3261 cs = XCHARSET (charset);
3263 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
3264 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
3266 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
3268 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
3269 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
3272 /* It is useful (and safe, according to Olivier Galibert) to strip
3273 the 8th bit off ARG1 and ARG2 because it allows programmers to
3274 write (make-char 'latin-iso8859-2 CODE) where code is the actual
3275 Latin 2 code of the character. */
3283 if (a1 < lowlim || a1 > highlim)
3284 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
3286 if (CHARSET_DIMENSION (cs) == 1)
3290 ("Charset is of dimension one; second octet must be nil", arg2);
3291 return make_char (MAKE_CHAR (charset, a1, 0));
3300 a2 = XINT (arg2) & 0x7f;
3302 if (a2 < lowlim || a2 > highlim)
3303 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
3305 return make_char (MAKE_CHAR (charset, a1, a2));
3308 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
3309 Return the character set of CHARACTER.
3313 CHECK_CHAR_COERCE_INT (character);
3315 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
3318 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
3319 Return the octet numbered N (should be 0 or 1) of CHARACTER.
3320 N defaults to 0 if omitted.
3324 Lisp_Object charset;
3327 CHECK_CHAR_COERCE_INT (character);
3329 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
3331 if (NILP (n) || EQ (n, Qzero))
3332 return make_int (octet0);
3333 else if (EQ (n, make_int (1)))
3334 return make_int (octet1);
3336 signal_simple_error ("Octet number must be 0 or 1", n);
3339 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
3340 Return list of charset and one or two position-codes of CHARACTER.
3344 /* This function can GC */
3345 struct gcpro gcpro1, gcpro2;
3346 Lisp_Object charset = Qnil;
3347 Lisp_Object rc = Qnil;
3355 GCPRO2 (charset, rc);
3356 CHECK_CHAR_COERCE_INT (character);
3359 code_point = ENCODE_CHAR (XCHAR (character), charset);
3360 dimension = XCHARSET_DIMENSION (charset);
3361 while (dimension > 0)
3363 rc = Fcons (make_int (code_point & 255), rc);
3367 rc = Fcons (XCHARSET_NAME (charset), rc);
3369 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
3371 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
3373 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
3377 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
3386 #ifdef ENABLE_COMPOSITE_CHARS
3387 /************************************************************************/
3388 /* composite character functions */
3389 /************************************************************************/
3392 lookup_composite_char (Bufbyte *str, int len)
3394 Lisp_Object lispstr = make_string (str, len);
3395 Lisp_Object ch = Fgethash (lispstr,
3396 Vcomposite_char_string2char_hash_table,
3402 if (composite_char_row_next >= 128)
3403 signal_simple_error ("No more composite chars available", lispstr);
3404 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
3405 composite_char_col_next);
3406 Fputhash (make_char (emch), lispstr,
3407 Vcomposite_char_char2string_hash_table);
3408 Fputhash (lispstr, make_char (emch),
3409 Vcomposite_char_string2char_hash_table);
3410 composite_char_col_next++;
3411 if (composite_char_col_next >= 128)
3413 composite_char_col_next = 32;
3414 composite_char_row_next++;
3423 composite_char_string (Emchar ch)
3425 Lisp_Object str = Fgethash (make_char (ch),
3426 Vcomposite_char_char2string_hash_table,
3428 assert (!UNBOUNDP (str));
3432 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
3433 Convert a string into a single composite character.
3434 The character is the result of overstriking all the characters in
3439 CHECK_STRING (string);
3440 return make_char (lookup_composite_char (XSTRING_DATA (string),
3441 XSTRING_LENGTH (string)));
3444 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3445 Return a string of the characters comprising a composite character.
3453 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
3454 signal_simple_error ("Must be composite char", ch);
3455 return composite_char_string (emch);
3457 #endif /* ENABLE_COMPOSITE_CHARS */
3460 /************************************************************************/
3461 /* initialization */
3462 /************************************************************************/
3465 syms_of_mule_charset (void)
3468 INIT_LRECORD_IMPLEMENTATION (uint8_byte_table);
3469 INIT_LRECORD_IMPLEMENTATION (uint16_byte_table);
3470 INIT_LRECORD_IMPLEMENTATION (byte_table);
3471 INIT_LRECORD_IMPLEMENTATION (char_id_table);
3473 INIT_LRECORD_IMPLEMENTATION (charset);
3475 DEFSUBR (Fcharsetp);
3476 DEFSUBR (Ffind_charset);
3477 DEFSUBR (Fget_charset);
3478 DEFSUBR (Fcharset_list);
3479 DEFSUBR (Fcharset_name);
3480 DEFSUBR (Fmake_charset);
3481 DEFSUBR (Fmake_reverse_direction_charset);
3482 /* DEFSUBR (Freverse_direction_charset); */
3483 DEFSUBR (Fdefine_charset_alias);
3484 DEFSUBR (Fcharset_from_attributes);
3485 DEFSUBR (Fcharset_short_name);
3486 DEFSUBR (Fcharset_long_name);
3487 DEFSUBR (Fcharset_description);
3488 DEFSUBR (Fcharset_dimension);
3489 DEFSUBR (Fcharset_property);
3490 DEFSUBR (Fcharset_id);
3491 DEFSUBR (Fset_charset_ccl_program);
3492 DEFSUBR (Fset_charset_registry);
3494 DEFSUBR (Fchar_attribute_list);
3495 DEFSUBR (Ffind_char_attribute_table);
3496 DEFSUBR (Fchar_attribute_alist);
3497 DEFSUBR (Fget_char_attribute);
3498 DEFSUBR (Fput_char_attribute);
3499 DEFSUBR (Fremove_char_attribute);
3500 DEFSUBR (Fdefine_char);
3501 DEFSUBR (Ffind_char);
3502 DEFSUBR (Fchar_variants);
3503 DEFSUBR (Fget_composite_char);
3504 DEFSUBR (Fcharset_mapping_table);
3505 DEFSUBR (Fset_charset_mapping_table);
3509 DEFSUBR (Fdecode_char);
3510 DEFSUBR (Fdecode_builtin_char);
3512 DEFSUBR (Fmake_char);
3513 DEFSUBR (Fchar_charset);
3514 DEFSUBR (Fchar_octet);
3515 DEFSUBR (Fsplit_char);
3517 #ifdef ENABLE_COMPOSITE_CHARS
3518 DEFSUBR (Fmake_composite_char);
3519 DEFSUBR (Fcomposite_char_string);
3522 defsymbol (&Qcharsetp, "charsetp");
3523 defsymbol (&Qregistry, "registry");
3524 defsymbol (&Qfinal, "final");
3525 defsymbol (&Qgraphic, "graphic");
3526 defsymbol (&Qdirection, "direction");
3527 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3528 defsymbol (&Qshort_name, "short-name");
3529 defsymbol (&Qlong_name, "long-name");
3531 defsymbol (&Ql2r, "l2r");
3532 defsymbol (&Qr2l, "r2l");
3534 /* Charsets, compatible with FSF 20.3
3535 Naming convention is Script-Charset[-Edition] */
3536 defsymbol (&Qascii, "ascii");
3537 defsymbol (&Qcontrol_1, "control-1");
3538 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3539 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3540 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3541 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3542 defsymbol (&Qthai_tis620, "thai-tis620");
3543 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3544 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3545 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3546 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3547 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3548 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3549 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3550 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
3551 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
3552 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
3553 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
3554 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
3555 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
3556 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
3557 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
3558 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
3560 defsymbol (&Qto_ucs, "=>ucs");
3561 defsymbol (&Q_ucs, "->ucs");
3562 defsymbol (&Q_decomposition, "->decomposition");
3563 defsymbol (&Qcompat, "compat");
3564 defsymbol (&Qisolated, "isolated");
3565 defsymbol (&Qinitial, "initial");
3566 defsymbol (&Qmedial, "medial");
3567 defsymbol (&Qfinal, "final");
3568 defsymbol (&Qvertical, "vertical");
3569 defsymbol (&QnoBreak, "noBreak");
3570 defsymbol (&Qfraction, "fraction");
3571 defsymbol (&Qsuper, "super");
3572 defsymbol (&Qsub, "sub");
3573 defsymbol (&Qcircle, "circle");
3574 defsymbol (&Qsquare, "square");
3575 defsymbol (&Qwide, "wide");
3576 defsymbol (&Qnarrow, "narrow");
3577 defsymbol (&Qsmall, "small");
3578 defsymbol (&Qfont, "font");
3579 defsymbol (&Qucs, "ucs");
3580 defsymbol (&Qucs_bmp, "ucs-bmp");
3581 defsymbol (&Qucs_cns, "ucs-cns");
3582 defsymbol (&Qucs_big5, "ucs-big5");
3583 defsymbol (&Qlatin_viscii, "latin-viscii");
3584 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3585 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3586 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3587 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3588 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3589 defsymbol (&Qideograph_gt, "ideograph-gt");
3590 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
3591 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
3592 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
3593 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
3594 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
3595 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
3596 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
3597 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
3598 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
3599 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
3600 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
3601 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
3602 defsymbol (&Qchinese_big5, "chinese-big5");
3603 defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp");
3604 defsymbol (&Qmojikyo, "mojikyo");
3605 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
3606 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
3607 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
3608 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
3609 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
3610 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
3611 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
3612 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
3613 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
3614 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
3615 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
3616 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
3617 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
3618 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
3619 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
3620 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
3621 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
3622 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
3623 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
3624 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
3625 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
3626 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
3627 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3629 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3630 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3632 defsymbol (&Qcomposite, "composite");
3636 vars_of_mule_charset (void)
3643 chlook = xnew (struct charset_lookup);
3644 dumpstruct (&chlook, &charset_lookup_description);
3646 /* Table of charsets indexed by leading byte. */
3647 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3648 chlook->charset_by_leading_byte[i] = Qnil;
3651 /* Table of charsets indexed by type/final-byte. */
3652 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3653 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3654 chlook->charset_by_attributes[i][j] = Qnil;
3656 /* Table of charsets indexed by type/final-byte/direction. */
3657 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3658 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3659 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3660 chlook->charset_by_attributes[i][j][k] = Qnil;
3664 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3666 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3667 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3671 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3672 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3673 Leading-code of private TYPE9N charset of column-width 1.
3675 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3679 Vutf_2000_version = build_string("0.17 (Hōryūji)");
3680 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
3681 Version number of UTF-2000.
3684 staticpro (&Vcharacter_composition_table);
3685 Vcharacter_composition_table = make_char_id_table (Qnil);
3687 staticpro (&Vcharacter_variant_table);
3688 Vcharacter_variant_table = make_char_id_table (Qnil);
3690 Vdefault_coded_charset_priority_list = Qnil;
3691 DEFVAR_LISP ("default-coded-charset-priority-list",
3692 &Vdefault_coded_charset_priority_list /*
3693 Default order of preferred coded-character-sets.
3699 complex_vars_of_mule_charset (void)
3701 staticpro (&Vcharset_hash_table);
3702 Vcharset_hash_table =
3703 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3705 /* Predefined character sets. We store them into variables for
3709 staticpro (&Vchar_attribute_hash_table);
3710 Vchar_attribute_hash_table
3711 = make_lisp_hash_table (16, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3713 staticpro (&Vcharset_ucs);
3715 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
3716 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3717 build_string ("UCS"),
3718 build_string ("UCS"),
3719 build_string ("ISO/IEC 10646"),
3721 Qnil, 0, 0xFFFFFFF, 0, 0);
3722 staticpro (&Vcharset_ucs_bmp);
3724 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3725 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3726 build_string ("BMP"),
3727 build_string ("BMP"),
3728 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3729 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
3730 Qnil, 0, 0xFFFF, 0, 0);
3731 staticpro (&Vcharset_ucs_cns);
3733 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
3734 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3735 build_string ("UCS for CNS"),
3736 build_string ("UCS for CNS 11643"),
3737 build_string ("ISO/IEC 10646 for CNS 11643"),
3740 staticpro (&Vcharset_ucs_big5);
3742 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
3743 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3744 build_string ("UCS for Big5"),
3745 build_string ("UCS for Big5"),
3746 build_string ("ISO/IEC 10646 for Big5"),
3750 # define MIN_CHAR_THAI 0
3751 # define MAX_CHAR_THAI 0
3752 /* # define MIN_CHAR_HEBREW 0 */
3753 /* # define MAX_CHAR_HEBREW 0 */
3754 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3755 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3757 staticpro (&Vcharset_ascii);
3759 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3760 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3761 build_string ("ASCII"),
3762 build_string ("ASCII)"),
3763 build_string ("ASCII (ISO646 IRV)"),
3764 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3765 Qnil, 0, 0x7F, 0, 0);
3766 staticpro (&Vcharset_control_1);
3767 Vcharset_control_1 =
3768 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3769 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3770 build_string ("C1"),
3771 build_string ("Control characters"),
3772 build_string ("Control characters 128-191"),
3774 Qnil, 0x80, 0x9F, 0, 0);
3775 staticpro (&Vcharset_latin_iso8859_1);
3776 Vcharset_latin_iso8859_1 =
3777 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3778 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3779 build_string ("Latin-1"),
3780 build_string ("ISO8859-1 (Latin-1)"),
3781 build_string ("ISO8859-1 (Latin-1)"),
3782 build_string ("iso8859-1"),
3783 Qnil, 0xA0, 0xFF, 0, 32);
3784 staticpro (&Vcharset_latin_iso8859_2);
3785 Vcharset_latin_iso8859_2 =
3786 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3787 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3788 build_string ("Latin-2"),
3789 build_string ("ISO8859-2 (Latin-2)"),
3790 build_string ("ISO8859-2 (Latin-2)"),
3791 build_string ("iso8859-2"),
3793 staticpro (&Vcharset_latin_iso8859_3);
3794 Vcharset_latin_iso8859_3 =
3795 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3796 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3797 build_string ("Latin-3"),
3798 build_string ("ISO8859-3 (Latin-3)"),
3799 build_string ("ISO8859-3 (Latin-3)"),
3800 build_string ("iso8859-3"),
3802 staticpro (&Vcharset_latin_iso8859_4);
3803 Vcharset_latin_iso8859_4 =
3804 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3805 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3806 build_string ("Latin-4"),
3807 build_string ("ISO8859-4 (Latin-4)"),
3808 build_string ("ISO8859-4 (Latin-4)"),
3809 build_string ("iso8859-4"),
3811 staticpro (&Vcharset_thai_tis620);
3812 Vcharset_thai_tis620 =
3813 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3814 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3815 build_string ("TIS620"),
3816 build_string ("TIS620 (Thai)"),
3817 build_string ("TIS620.2529 (Thai)"),
3818 build_string ("tis620"),
3819 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3820 staticpro (&Vcharset_greek_iso8859_7);
3821 Vcharset_greek_iso8859_7 =
3822 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3823 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3824 build_string ("ISO8859-7"),
3825 build_string ("ISO8859-7 (Greek)"),
3826 build_string ("ISO8859-7 (Greek)"),
3827 build_string ("iso8859-7"),
3829 staticpro (&Vcharset_arabic_iso8859_6);
3830 Vcharset_arabic_iso8859_6 =
3831 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3832 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3833 build_string ("ISO8859-6"),
3834 build_string ("ISO8859-6 (Arabic)"),
3835 build_string ("ISO8859-6 (Arabic)"),
3836 build_string ("iso8859-6"),
3838 staticpro (&Vcharset_hebrew_iso8859_8);
3839 Vcharset_hebrew_iso8859_8 =
3840 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3841 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3842 build_string ("ISO8859-8"),
3843 build_string ("ISO8859-8 (Hebrew)"),
3844 build_string ("ISO8859-8 (Hebrew)"),
3845 build_string ("iso8859-8"),
3847 0 /* MIN_CHAR_HEBREW */,
3848 0 /* MAX_CHAR_HEBREW */, 0, 32);
3849 staticpro (&Vcharset_katakana_jisx0201);
3850 Vcharset_katakana_jisx0201 =
3851 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3852 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3853 build_string ("JISX0201 Kana"),
3854 build_string ("JISX0201.1976 (Japanese Kana)"),
3855 build_string ("JISX0201.1976 Japanese Kana"),
3856 build_string ("jisx0201\\.1976"),
3858 staticpro (&Vcharset_latin_jisx0201);
3859 Vcharset_latin_jisx0201 =
3860 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3861 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3862 build_string ("JISX0201 Roman"),
3863 build_string ("JISX0201.1976 (Japanese Roman)"),
3864 build_string ("JISX0201.1976 Japanese Roman"),
3865 build_string ("jisx0201\\.1976"),
3867 staticpro (&Vcharset_cyrillic_iso8859_5);
3868 Vcharset_cyrillic_iso8859_5 =
3869 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3870 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3871 build_string ("ISO8859-5"),
3872 build_string ("ISO8859-5 (Cyrillic)"),
3873 build_string ("ISO8859-5 (Cyrillic)"),
3874 build_string ("iso8859-5"),
3876 staticpro (&Vcharset_latin_iso8859_9);
3877 Vcharset_latin_iso8859_9 =
3878 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3879 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3880 build_string ("Latin-5"),
3881 build_string ("ISO8859-9 (Latin-5)"),
3882 build_string ("ISO8859-9 (Latin-5)"),
3883 build_string ("iso8859-9"),
3885 staticpro (&Vcharset_japanese_jisx0208_1978);
3886 Vcharset_japanese_jisx0208_1978 =
3887 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3888 Qjapanese_jisx0208_1978, 94, 2,
3889 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3890 build_string ("JIS X0208:1978"),
3891 build_string ("JIS X0208:1978 (Japanese)"),
3893 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3894 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3896 staticpro (&Vcharset_chinese_gb2312);
3897 Vcharset_chinese_gb2312 =
3898 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3899 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3900 build_string ("GB2312"),
3901 build_string ("GB2312)"),
3902 build_string ("GB2312 Chinese simplified"),
3903 build_string ("gb2312"),
3905 staticpro (&Vcharset_chinese_gb12345);
3906 Vcharset_chinese_gb12345 =
3907 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
3908 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3909 build_string ("G1"),
3910 build_string ("GB 12345)"),
3911 build_string ("GB 12345-1990"),
3912 build_string ("GB12345\\(\\.1990\\)?-0"),
3914 staticpro (&Vcharset_japanese_jisx0208);
3915 Vcharset_japanese_jisx0208 =
3916 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3917 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3918 build_string ("JISX0208"),
3919 build_string ("JIS X0208:1983 (Japanese)"),
3920 build_string ("JIS X0208:1983 Japanese Kanji"),
3921 build_string ("jisx0208\\.1983"),
3924 staticpro (&Vcharset_japanese_jisx0208_1990);
3925 Vcharset_japanese_jisx0208_1990 =
3926 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3927 Qjapanese_jisx0208_1990, 94, 2,
3928 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3929 build_string ("JISX0208-1990"),
3930 build_string ("JIS X0208:1990 (Japanese)"),
3931 build_string ("JIS X0208:1990 Japanese Kanji"),
3932 build_string ("jisx0208\\.1990"),
3934 MIN_CHAR_JIS_X0208_1990,
3935 MAX_CHAR_JIS_X0208_1990, 0, 33);
3937 staticpro (&Vcharset_korean_ksc5601);
3938 Vcharset_korean_ksc5601 =
3939 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3940 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3941 build_string ("KSC5601"),
3942 build_string ("KSC5601 (Korean"),
3943 build_string ("KSC5601 Korean Hangul and Hanja"),
3944 build_string ("ksc5601"),
3946 staticpro (&Vcharset_japanese_jisx0212);
3947 Vcharset_japanese_jisx0212 =
3948 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3949 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3950 build_string ("JISX0212"),
3951 build_string ("JISX0212 (Japanese)"),
3952 build_string ("JISX0212 Japanese Supplement"),
3953 build_string ("jisx0212"),
3956 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3957 staticpro (&Vcharset_chinese_cns11643_1);
3958 Vcharset_chinese_cns11643_1 =
3959 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3960 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3961 build_string ("CNS11643-1"),
3962 build_string ("CNS11643-1 (Chinese traditional)"),
3964 ("CNS 11643 Plane 1 Chinese traditional"),
3965 build_string (CHINESE_CNS_PLANE_RE("1")),
3967 staticpro (&Vcharset_chinese_cns11643_2);
3968 Vcharset_chinese_cns11643_2 =
3969 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3970 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3971 build_string ("CNS11643-2"),
3972 build_string ("CNS11643-2 (Chinese traditional)"),
3974 ("CNS 11643 Plane 2 Chinese traditional"),
3975 build_string (CHINESE_CNS_PLANE_RE("2")),
3978 staticpro (&Vcharset_latin_tcvn5712);
3979 Vcharset_latin_tcvn5712 =
3980 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3981 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3982 build_string ("TCVN 5712"),
3983 build_string ("TCVN 5712 (VSCII-2)"),
3984 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3985 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3987 staticpro (&Vcharset_latin_viscii_lower);
3988 Vcharset_latin_viscii_lower =
3989 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3990 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3991 build_string ("VISCII lower"),
3992 build_string ("VISCII lower (Vietnamese)"),
3993 build_string ("VISCII lower (Vietnamese)"),
3994 build_string ("MULEVISCII-LOWER"),
3996 staticpro (&Vcharset_latin_viscii_upper);
3997 Vcharset_latin_viscii_upper =
3998 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3999 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
4000 build_string ("VISCII upper"),
4001 build_string ("VISCII upper (Vietnamese)"),
4002 build_string ("VISCII upper (Vietnamese)"),
4003 build_string ("MULEVISCII-UPPER"),
4005 staticpro (&Vcharset_latin_viscii);
4006 Vcharset_latin_viscii =
4007 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
4008 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
4009 build_string ("VISCII"),
4010 build_string ("VISCII 1.1 (Vietnamese)"),
4011 build_string ("VISCII 1.1 (Vietnamese)"),
4012 build_string ("VISCII1\\.1"),
4014 staticpro (&Vcharset_chinese_big5);
4015 Vcharset_chinese_big5 =
4016 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
4017 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4018 build_string ("Big5"),
4019 build_string ("Big5"),
4020 build_string ("Big5 Chinese traditional"),
4021 build_string ("big5"),
4023 staticpro (&Vcharset_chinese_big5_cdp);
4024 Vcharset_chinese_big5_cdp =
4025 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
4026 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4027 build_string ("Big5-CDP"),
4028 build_string ("Big5 + CDP extension"),
4029 build_string ("Big5 with CDP extension"),
4030 build_string ("big5\\.cdp-0"),
4032 staticpro (&Vcharset_ideograph_gt);
4033 Vcharset_ideograph_gt =
4034 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
4035 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4036 build_string ("GT"),
4037 build_string ("GT"),
4038 build_string ("GT"),
4040 Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0);
4041 #define DEF_GT_PJ(n) \
4042 staticpro (&Vcharset_ideograph_gt_pj_##n); \
4043 Vcharset_ideograph_gt_pj_##n = \
4044 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
4045 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
4046 build_string ("GT-PJ-"#n), \
4047 build_string ("GT (pseudo JIS encoding) part "#n), \
4048 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
4050 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
4064 staticpro (&Vcharset_ideograph_daikanwa);
4065 Vcharset_ideograph_daikanwa =
4066 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
4067 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4068 build_string ("Daikanwa"),
4069 build_string ("Morohashi's Daikanwa"),
4070 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
4071 build_string ("Daikanwa"),
4072 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
4073 staticpro (&Vcharset_mojikyo);
4075 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
4076 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4077 build_string ("Mojikyo"),
4078 build_string ("Mojikyo"),
4079 build_string ("Konjaku-Mojikyo"),
4081 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
4082 staticpro (&Vcharset_mojikyo_2022_1);
4083 Vcharset_mojikyo_2022_1 =
4084 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
4085 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
4086 build_string ("Mojikyo-2022-1"),
4087 build_string ("Mojikyo ISO-2022 Part 1"),
4088 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
4092 #define DEF_MOJIKYO_PJ(n) \
4093 staticpro (&Vcharset_mojikyo_pj_##n); \
4094 Vcharset_mojikyo_pj_##n = \
4095 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
4096 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
4097 build_string ("Mojikyo-PJ-"#n), \
4098 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
4100 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
4102 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
4114 DEF_MOJIKYO_PJ (10);
4115 DEF_MOJIKYO_PJ (11);
4116 DEF_MOJIKYO_PJ (12);
4117 DEF_MOJIKYO_PJ (13);
4118 DEF_MOJIKYO_PJ (14);
4119 DEF_MOJIKYO_PJ (15);
4120 DEF_MOJIKYO_PJ (16);
4121 DEF_MOJIKYO_PJ (17);
4122 DEF_MOJIKYO_PJ (18);
4123 DEF_MOJIKYO_PJ (19);
4124 DEF_MOJIKYO_PJ (20);
4125 DEF_MOJIKYO_PJ (21);
4127 staticpro (&Vcharset_ethiopic_ucs);
4128 Vcharset_ethiopic_ucs =
4129 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
4130 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4131 build_string ("Ethiopic (UCS)"),
4132 build_string ("Ethiopic (UCS)"),
4133 build_string ("Ethiopic of UCS"),
4134 build_string ("Ethiopic-Unicode"),
4135 Qnil, 0x1200, 0x137F, 0x1200, 0);
4137 staticpro (&Vcharset_chinese_big5_1);
4138 Vcharset_chinese_big5_1 =
4139 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
4140 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
4141 build_string ("Big5"),
4142 build_string ("Big5 (Level-1)"),
4144 ("Big5 Level-1 Chinese traditional"),
4145 build_string ("big5"),
4147 staticpro (&Vcharset_chinese_big5_2);
4148 Vcharset_chinese_big5_2 =
4149 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
4150 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
4151 build_string ("Big5"),
4152 build_string ("Big5 (Level-2)"),
4154 ("Big5 Level-2 Chinese traditional"),
4155 build_string ("big5"),
4158 #ifdef ENABLE_COMPOSITE_CHARS
4159 /* #### For simplicity, we put composite chars into a 96x96 charset.
4160 This is going to lead to problems because you can run out of
4161 room, esp. as we don't yet recycle numbers. */
4162 staticpro (&Vcharset_composite);
4163 Vcharset_composite =
4164 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
4165 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4166 build_string ("Composite"),
4167 build_string ("Composite characters"),
4168 build_string ("Composite characters"),
4171 /* #### not dumped properly */
4172 composite_char_row_next = 32;
4173 composite_char_col_next = 32;
4175 Vcomposite_char_string2char_hash_table =
4176 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
4177 Vcomposite_char_char2string_hash_table =
4178 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
4179 staticpro (&Vcomposite_char_string2char_hash_table);
4180 staticpro (&Vcomposite_char_char2string_hash_table);
4181 #endif /* ENABLE_COMPOSITE_CHARS */