1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_cns;
70 Lisp_Object Vcharset_ucs_big5;
71 Lisp_Object Vcharset_latin_viscii;
72 Lisp_Object Vcharset_latin_tcvn5712;
73 Lisp_Object Vcharset_latin_viscii_lower;
74 Lisp_Object Vcharset_latin_viscii_upper;
75 Lisp_Object Vcharset_chinese_big5;
76 Lisp_Object Vcharset_chinese_big5_cdp;
77 Lisp_Object Vcharset_ideograph_gt;
78 Lisp_Object Vcharset_ideograph_gt_pj_1;
79 Lisp_Object Vcharset_ideograph_gt_pj_2;
80 Lisp_Object Vcharset_ideograph_gt_pj_3;
81 Lisp_Object Vcharset_ideograph_gt_pj_4;
82 Lisp_Object Vcharset_ideograph_gt_pj_5;
83 Lisp_Object Vcharset_ideograph_gt_pj_6;
84 Lisp_Object Vcharset_ideograph_gt_pj_7;
85 Lisp_Object Vcharset_ideograph_gt_pj_8;
86 Lisp_Object Vcharset_ideograph_gt_pj_9;
87 Lisp_Object Vcharset_ideograph_gt_pj_10;
88 Lisp_Object Vcharset_ideograph_gt_pj_11;
89 Lisp_Object Vcharset_ideograph_daikanwa;
90 Lisp_Object Vcharset_mojikyo;
91 Lisp_Object Vcharset_mojikyo_2022_1;
92 Lisp_Object Vcharset_mojikyo_pj_1;
93 Lisp_Object Vcharset_mojikyo_pj_2;
94 Lisp_Object Vcharset_mojikyo_pj_3;
95 Lisp_Object Vcharset_mojikyo_pj_4;
96 Lisp_Object Vcharset_mojikyo_pj_5;
97 Lisp_Object Vcharset_mojikyo_pj_6;
98 Lisp_Object Vcharset_mojikyo_pj_7;
99 Lisp_Object Vcharset_mojikyo_pj_8;
100 Lisp_Object Vcharset_mojikyo_pj_9;
101 Lisp_Object Vcharset_mojikyo_pj_10;
102 Lisp_Object Vcharset_mojikyo_pj_11;
103 Lisp_Object Vcharset_mojikyo_pj_12;
104 Lisp_Object Vcharset_mojikyo_pj_13;
105 Lisp_Object Vcharset_mojikyo_pj_14;
106 Lisp_Object Vcharset_mojikyo_pj_15;
107 Lisp_Object Vcharset_mojikyo_pj_16;
108 Lisp_Object Vcharset_mojikyo_pj_17;
109 Lisp_Object Vcharset_mojikyo_pj_18;
110 Lisp_Object Vcharset_mojikyo_pj_19;
111 Lisp_Object Vcharset_mojikyo_pj_20;
112 Lisp_Object Vcharset_mojikyo_pj_21;
113 Lisp_Object Vcharset_ethiopic_ucs;
115 Lisp_Object Vcharset_chinese_big5_1;
116 Lisp_Object Vcharset_chinese_big5_2;
118 #ifdef ENABLE_COMPOSITE_CHARS
119 Lisp_Object Vcharset_composite;
121 /* Hash tables for composite chars. One maps string representing
122 composed chars to their equivalent chars; one goes the
124 Lisp_Object Vcomposite_char_char2string_hash_table;
125 Lisp_Object Vcomposite_char_string2char_hash_table;
127 static int composite_char_row_next;
128 static int composite_char_col_next;
130 #endif /* ENABLE_COMPOSITE_CHARS */
132 struct charset_lookup *chlook;
134 static const struct lrecord_description charset_lookup_description_1[] = {
135 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
144 static const struct struct_description charset_lookup_description = {
145 sizeof (struct charset_lookup),
146 charset_lookup_description_1
150 /* Table of number of bytes in the string representation of a character
151 indexed by the first byte of that representation.
153 rep_bytes_by_first_byte(c) is more efficient than the equivalent
154 canonical computation:
156 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
158 const Bytecount rep_bytes_by_first_byte[0xA0] =
159 { /* 0x00 - 0x7f are for straight ASCII */
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
165 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
166 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
167 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
168 /* 0x80 - 0x8f are for Dimension-1 official charsets */
170 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
172 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
174 /* 0x90 - 0x9d are for Dimension-2 official charsets */
175 /* 0x9e is for Dimension-1 private charsets */
176 /* 0x9f is for Dimension-2 private charsets */
177 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
183 #define BT_UINT8_MIN 0
184 #define BT_UINT8_MAX (UCHAR_MAX - 3)
185 #define BT_UINT8_t (UCHAR_MAX - 2)
186 #define BT_UINT8_nil (UCHAR_MAX - 1)
187 #define BT_UINT8_unbound UCHAR_MAX
189 INLINE_HEADER int INT_UINT8_P (Lisp_Object obj);
190 INLINE_HEADER int UINT8_VALUE_P (Lisp_Object obj);
191 INLINE_HEADER unsigned char UINT8_ENCODE (Lisp_Object obj);
192 INLINE_HEADER Lisp_Object UINT8_DECODE (unsigned char n);
193 INLINE_HEADER unsigned short UINT8_TO_UINT16 (unsigned char n);
196 INT_UINT8_P (Lisp_Object obj)
200 int num = XINT (obj);
202 return (BT_UINT8_MIN <= num) && (num <= BT_UINT8_MAX);
209 UINT8_VALUE_P (Lisp_Object obj)
211 return EQ (obj, Qunbound)
212 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT8_P (obj);
215 INLINE_HEADER unsigned char
216 UINT8_ENCODE (Lisp_Object obj)
218 if (EQ (obj, Qunbound))
219 return BT_UINT8_unbound;
220 else if (EQ (obj, Qnil))
222 else if (EQ (obj, Qt))
228 INLINE_HEADER Lisp_Object
229 UINT8_DECODE (unsigned char n)
231 if (n == BT_UINT8_unbound)
233 else if (n == BT_UINT8_nil)
235 else if (n == BT_UINT8_t)
242 mark_uint8_byte_table (Lisp_Object obj)
248 print_uint8_byte_table (Lisp_Object obj,
249 Lisp_Object printcharfun, int escapeflag)
251 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
253 struct gcpro gcpro1, gcpro2;
254 GCPRO2 (obj, printcharfun);
256 write_c_string ("\n#<uint8-byte-table", printcharfun);
257 for (i = 0; i < 256; i++)
259 unsigned char n = bte->property[i];
261 write_c_string ("\n ", printcharfun);
262 write_c_string (" ", printcharfun);
263 if (n == BT_UINT8_unbound)
264 write_c_string ("void", printcharfun);
265 else if (n == BT_UINT8_nil)
266 write_c_string ("nil", printcharfun);
267 else if (n == BT_UINT8_t)
268 write_c_string ("t", printcharfun);
273 sprintf (buf, "%hd", n);
274 write_c_string (buf, printcharfun);
278 write_c_string (">", printcharfun);
282 uint8_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
284 Lisp_Uint8_Byte_Table *te1 = XUINT8_BYTE_TABLE (obj1);
285 Lisp_Uint8_Byte_Table *te2 = XUINT8_BYTE_TABLE (obj2);
288 for (i = 0; i < 256; i++)
289 if (te1->property[i] != te2->property[i])
295 uint8_byte_table_hash (Lisp_Object obj, int depth)
297 Lisp_Uint8_Byte_Table *te = XUINT8_BYTE_TABLE (obj);
301 for (i = 0; i < 256; i++)
302 hash = HASH2 (hash, te->property[i]);
306 DEFINE_LRECORD_IMPLEMENTATION ("uint8-byte-table", uint8_byte_table,
307 mark_uint8_byte_table,
308 print_uint8_byte_table,
309 0, uint8_byte_table_equal,
310 uint8_byte_table_hash,
311 0 /* uint8_byte_table_description */,
312 Lisp_Uint8_Byte_Table);
315 make_uint8_byte_table (unsigned char initval)
319 Lisp_Uint8_Byte_Table *cte;
321 cte = alloc_lcrecord_type (Lisp_Uint8_Byte_Table,
322 &lrecord_uint8_byte_table);
324 for (i = 0; i < 256; i++)
325 cte->property[i] = initval;
327 XSETUINT8_BYTE_TABLE (obj, cte);
332 uint8_byte_table_same_value_p (Lisp_Object obj)
334 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
335 unsigned char v0 = bte->property[0];
338 for (i = 1; i < 256; i++)
340 if (bte->property[i] != v0)
347 #define BT_UINT16_MIN 0
348 #define BT_UINT16_MAX (USHRT_MAX - 3)
349 #define BT_UINT16_t (USHRT_MAX - 2)
350 #define BT_UINT16_nil (USHRT_MAX - 1)
351 #define BT_UINT16_unbound USHRT_MAX
353 INLINE_HEADER int INT_UINT16_P (Lisp_Object obj);
354 INLINE_HEADER int UINT16_VALUE_P (Lisp_Object obj);
355 INLINE_HEADER unsigned short UINT16_ENCODE (Lisp_Object obj);
356 INLINE_HEADER Lisp_Object UINT16_DECODE (unsigned short us);
359 INT_UINT16_P (Lisp_Object obj)
363 int num = XINT (obj);
365 return (BT_UINT16_MIN <= num) && (num <= BT_UINT16_MAX);
372 UINT16_VALUE_P (Lisp_Object obj)
374 return EQ (obj, Qunbound)
375 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT16_P (obj);
378 INLINE_HEADER unsigned short
379 UINT16_ENCODE (Lisp_Object obj)
381 if (EQ (obj, Qunbound))
382 return BT_UINT16_unbound;
383 else if (EQ (obj, Qnil))
384 return BT_UINT16_nil;
385 else if (EQ (obj, Qt))
391 INLINE_HEADER Lisp_Object
392 UINT16_DECODE (unsigned short n)
394 if (n == BT_UINT16_unbound)
396 else if (n == BT_UINT16_nil)
398 else if (n == BT_UINT16_t)
404 INLINE_HEADER unsigned short
405 UINT8_TO_UINT16 (unsigned char n)
407 if (n == BT_UINT8_unbound)
408 return BT_UINT16_unbound;
409 else if (n == BT_UINT8_nil)
410 return BT_UINT16_nil;
411 else if (n == BT_UINT8_t)
418 mark_uint16_byte_table (Lisp_Object obj)
424 print_uint16_byte_table (Lisp_Object obj,
425 Lisp_Object printcharfun, int escapeflag)
427 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
429 struct gcpro gcpro1, gcpro2;
430 GCPRO2 (obj, printcharfun);
432 write_c_string ("\n#<uint16-byte-table", printcharfun);
433 for (i = 0; i < 256; i++)
435 unsigned short n = bte->property[i];
437 write_c_string ("\n ", printcharfun);
438 write_c_string (" ", printcharfun);
439 if (n == BT_UINT16_unbound)
440 write_c_string ("void", printcharfun);
441 else if (n == BT_UINT16_nil)
442 write_c_string ("nil", printcharfun);
443 else if (n == BT_UINT16_t)
444 write_c_string ("t", printcharfun);
449 sprintf (buf, "%hd", n);
450 write_c_string (buf, printcharfun);
454 write_c_string (">", printcharfun);
458 uint16_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
460 Lisp_Uint16_Byte_Table *te1 = XUINT16_BYTE_TABLE (obj1);
461 Lisp_Uint16_Byte_Table *te2 = XUINT16_BYTE_TABLE (obj2);
464 for (i = 0; i < 256; i++)
465 if (te1->property[i] != te2->property[i])
471 uint16_byte_table_hash (Lisp_Object obj, int depth)
473 Lisp_Uint16_Byte_Table *te = XUINT16_BYTE_TABLE (obj);
477 for (i = 0; i < 256; i++)
478 hash = HASH2 (hash, te->property[i]);
482 DEFINE_LRECORD_IMPLEMENTATION ("uint16-byte-table", uint16_byte_table,
483 mark_uint16_byte_table,
484 print_uint16_byte_table,
485 0, uint16_byte_table_equal,
486 uint16_byte_table_hash,
487 0 /* uint16_byte_table_description */,
488 Lisp_Uint16_Byte_Table);
491 make_uint16_byte_table (unsigned short initval)
495 Lisp_Uint16_Byte_Table *cte;
497 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
498 &lrecord_uint16_byte_table);
500 for (i = 0; i < 256; i++)
501 cte->property[i] = initval;
503 XSETUINT16_BYTE_TABLE (obj, cte);
508 expand_uint8_byte_table_to_uint16 (Lisp_Object table)
512 Lisp_Uint8_Byte_Table* bte = XUINT8_BYTE_TABLE(table);
513 Lisp_Uint16_Byte_Table* cte;
515 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
516 &lrecord_uint16_byte_table);
517 for (i = 0; i < 256; i++)
519 cte->property[i] = UINT8_TO_UINT16 (bte->property[i]);
521 XSETUINT16_BYTE_TABLE (obj, cte);
526 uint16_byte_table_same_value_p (Lisp_Object obj)
528 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
529 unsigned short v0 = bte->property[0];
532 for (i = 1; i < 256; i++)
534 if (bte->property[i] != v0)
542 mark_byte_table (Lisp_Object obj)
544 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
547 for (i = 0; i < 256; i++)
549 mark_object (cte->property[i]);
555 print_byte_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
557 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
559 struct gcpro gcpro1, gcpro2;
560 GCPRO2 (obj, printcharfun);
562 write_c_string ("\n#<byte-table", printcharfun);
563 for (i = 0; i < 256; i++)
565 Lisp_Object elt = bte->property[i];
567 write_c_string ("\n ", printcharfun);
568 write_c_string (" ", printcharfun);
569 if (EQ (elt, Qunbound))
570 write_c_string ("void", printcharfun);
572 print_internal (elt, printcharfun, escapeflag);
575 write_c_string (">", printcharfun);
579 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
581 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
582 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
585 for (i = 0; i < 256; i++)
586 if (BYTE_TABLE_P (cte1->property[i]))
588 if (BYTE_TABLE_P (cte2->property[i]))
590 if (!byte_table_equal (cte1->property[i],
591 cte2->property[i], depth + 1))
598 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
604 byte_table_hash (Lisp_Object obj, int depth)
606 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
608 return internal_array_hash (cte->property, 256, depth);
611 static const struct lrecord_description byte_table_description[] = {
612 { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Byte_Table, property), 256 },
616 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
621 byte_table_description,
625 make_byte_table (Lisp_Object initval)
629 Lisp_Byte_Table *cte;
631 cte = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
633 for (i = 0; i < 256; i++)
634 cte->property[i] = initval;
636 XSETBYTE_TABLE (obj, cte);
641 byte_table_same_value_p (Lisp_Object obj)
643 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
644 Lisp_Object v0 = bte->property[0];
647 for (i = 1; i < 256; i++)
649 if (!internal_equal (bte->property[i], v0, 0))
656 Lisp_Object get_byte_table (Lisp_Object table, unsigned char idx);
657 Lisp_Object put_byte_table (Lisp_Object table, unsigned char idx,
661 get_byte_table (Lisp_Object table, unsigned char idx)
663 if (UINT8_BYTE_TABLE_P (table))
664 return UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[idx]);
665 else if (UINT16_BYTE_TABLE_P (table))
666 return UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[idx]);
667 else if (BYTE_TABLE_P (table))
668 return XBYTE_TABLE(table)->property[idx];
674 put_byte_table (Lisp_Object table, unsigned char idx, Lisp_Object value)
676 if (UINT8_BYTE_TABLE_P (table))
678 if (UINT8_VALUE_P (value))
680 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
681 if (!UINT8_BYTE_TABLE_P (value) &&
682 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
683 && uint8_byte_table_same_value_p (table))
688 else if (UINT16_VALUE_P (value))
690 Lisp_Object new = expand_uint8_byte_table_to_uint16 (table);
692 XUINT16_BYTE_TABLE(new)->property[idx] = UINT16_ENCODE (value);
697 Lisp_Object new = make_byte_table (Qnil);
700 for (i = 0; i < 256; i++)
702 XBYTE_TABLE(new)->property[i]
703 = UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[i]);
705 XBYTE_TABLE(new)->property[idx] = value;
709 else if (UINT16_BYTE_TABLE_P (table))
711 if (UINT16_VALUE_P (value))
713 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
714 if (!UINT8_BYTE_TABLE_P (value) &&
715 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
716 && uint16_byte_table_same_value_p (table))
723 Lisp_Object new = make_byte_table (Qnil);
726 for (i = 0; i < 256; i++)
728 XBYTE_TABLE(new)->property[i]
729 = UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[i]);
731 XBYTE_TABLE(new)->property[idx] = value;
735 else if (BYTE_TABLE_P (table))
737 XBYTE_TABLE(table)->property[idx] = value;
738 if (!UINT8_BYTE_TABLE_P (value) &&
739 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
740 && byte_table_same_value_p (table))
745 else if (!internal_equal (table, value, 0))
747 if (UINT8_VALUE_P (table) && UINT8_VALUE_P (value))
749 table = make_uint8_byte_table (UINT8_ENCODE (table));
750 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
752 else if (UINT16_VALUE_P (table) && UINT16_VALUE_P (value))
754 table = make_uint16_byte_table (UINT16_ENCODE (table));
755 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
759 table = make_byte_table (table);
760 XBYTE_TABLE(table)->property[idx] = value;
767 mark_char_id_table (Lisp_Object obj)
769 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
775 print_char_id_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
777 Lisp_Object table = XCHAR_ID_TABLE (obj)->table;
779 struct gcpro gcpro1, gcpro2;
780 GCPRO2 (obj, printcharfun);
782 write_c_string ("#<char-id-table ", printcharfun);
783 for (i = 0; i < 256; i++)
785 Lisp_Object elt = get_byte_table (table, i);
786 if (i != 0) write_c_string ("\n ", printcharfun);
787 if (EQ (elt, Qunbound))
788 write_c_string ("void", printcharfun);
790 print_internal (elt, printcharfun, escapeflag);
793 write_c_string (">", printcharfun);
797 char_id_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
799 Lisp_Object table1 = XCHAR_ID_TABLE (obj1)->table;
800 Lisp_Object table2 = XCHAR_ID_TABLE (obj2)->table;
803 for (i = 0; i < 256; i++)
805 if (!internal_equal (get_byte_table (table1, i),
806 get_byte_table (table2, i), 0))
813 char_id_table_hash (Lisp_Object obj, int depth)
815 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
817 return char_id_table_hash (cte->table, depth + 1);
820 static const struct lrecord_description char_id_table_description[] = {
821 { XD_LISP_OBJECT, offsetof(Lisp_Char_ID_Table, table) },
825 DEFINE_LRECORD_IMPLEMENTATION ("char-id-table", char_id_table,
828 0, char_id_table_equal,
830 char_id_table_description,
834 make_char_id_table (Lisp_Object initval)
837 Lisp_Char_ID_Table *cte;
839 cte = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
841 cte->table = make_byte_table (initval);
843 XSETCHAR_ID_TABLE (obj, cte);
849 get_char_id_table (Emchar ch, Lisp_Object table)
851 unsigned int code = ch;
858 (XCHAR_ID_TABLE (table)->table,
859 (unsigned char)(code >> 24)),
860 (unsigned char) (code >> 16)),
861 (unsigned char) (code >> 8)),
862 (unsigned char) code);
865 void put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table);
867 put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table)
869 unsigned int code = ch;
870 Lisp_Object table1, table2, table3, table4;
872 table1 = XCHAR_ID_TABLE (table)->table;
873 table2 = get_byte_table (table1, (unsigned char)(code >> 24));
874 table3 = get_byte_table (table2, (unsigned char)(code >> 16));
875 table4 = get_byte_table (table3, (unsigned char)(code >> 8));
877 table4 = put_byte_table (table4, (unsigned char)code, value);
878 table3 = put_byte_table (table3, (unsigned char)(code >> 8), table4);
879 table2 = put_byte_table (table2, (unsigned char)(code >> 16), table3);
880 XCHAR_ID_TABLE (table)->table
881 = put_byte_table (table1, (unsigned char)(code >> 24), table2);
885 Lisp_Object Vchar_attribute_hash_table;
886 Lisp_Object Vcharacter_composition_table;
887 Lisp_Object Vcharacter_variant_table;
889 Lisp_Object Qideograph_daikanwa;
890 Lisp_Object Q_decomposition;
895 Lisp_Object Qisolated;
896 Lisp_Object Qinitial;
899 Lisp_Object Qvertical;
900 Lisp_Object QnoBreak;
901 Lisp_Object Qfraction;
911 Emchar to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
913 Lisp_Object put_char_ccs_code_point (Lisp_Object character,
914 Lisp_Object ccs, Lisp_Object value);
915 Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
918 to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
924 else if (EQ (v, Qcompat))
926 else if (EQ (v, Qisolated))
928 else if (EQ (v, Qinitial))
930 else if (EQ (v, Qmedial))
932 else if (EQ (v, Qfinal))
934 else if (EQ (v, Qvertical))
936 else if (EQ (v, QnoBreak))
938 else if (EQ (v, Qfraction))
940 else if (EQ (v, Qsuper))
942 else if (EQ (v, Qsub))
944 else if (EQ (v, Qcircle))
946 else if (EQ (v, Qsquare))
948 else if (EQ (v, Qwide))
950 else if (EQ (v, Qnarrow))
952 else if (EQ (v, Qsmall))
954 else if (EQ (v, Qfont))
957 signal_simple_error (err_msg, err_arg);
960 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
961 Return character corresponding with list.
965 Lisp_Object table = Vcharacter_composition_table;
966 Lisp_Object rest = list;
970 Lisp_Object v = Fcar (rest);
972 Emchar c = to_char_id (v, "Invalid value for composition", list);
974 ret = get_char_id_table (c, table);
979 if (!CHAR_ID_TABLE_P (ret))
984 else if (!CONSP (rest))
986 else if (CHAR_ID_TABLE_P (ret))
989 signal_simple_error ("Invalid table is found with", list);
991 signal_simple_error ("Invalid value for composition", list);
994 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
995 Return variants of CHARACTER.
999 CHECK_CHAR (character);
1000 return Fcopy_list (get_char_id_table (XCHAR (character),
1001 Vcharacter_variant_table));
1005 /* We store the char-attributes in hash tables with the names as the
1006 key and the actual char-id-table object as the value. Occasionally
1007 we need to use them in a list format. These routines provide us
1009 struct char_attribute_list_closure
1011 Lisp_Object *char_attribute_list;
1015 add_char_attribute_to_list_mapper (Lisp_Object key, Lisp_Object value,
1016 void *char_attribute_list_closure)
1018 /* This function can GC */
1019 struct char_attribute_list_closure *calcl
1020 = (struct char_attribute_list_closure*) char_attribute_list_closure;
1021 Lisp_Object *char_attribute_list = calcl->char_attribute_list;
1023 *char_attribute_list = Fcons (key, *char_attribute_list);
1027 DEFUN ("char-attribute-list", Fchar_attribute_list, 0, 0, 0, /*
1028 Return the list of all existing character attributes except coded-charsets.
1032 Lisp_Object char_attribute_list = Qnil;
1033 struct gcpro gcpro1;
1034 struct char_attribute_list_closure char_attribute_list_closure;
1036 GCPRO1 (char_attribute_list);
1037 char_attribute_list_closure.char_attribute_list = &char_attribute_list;
1038 elisp_maphash (add_char_attribute_to_list_mapper,
1039 Vchar_attribute_hash_table,
1040 &char_attribute_list_closure);
1042 return char_attribute_list;
1045 DEFUN ("find-char-attribute-table", Ffind_char_attribute_table, 1, 1, 0, /*
1046 Return char-id-table corresponding to ATTRIBUTE.
1050 return Fgethash (attribute, Vchar_attribute_hash_table, Qnil);
1054 /* We store the char-id-tables in hash tables with the attributes as
1055 the key and the actual char-id-table object as the value. Each
1056 char-id-table stores values of an attribute corresponding with
1057 characters. Occasionally we need to get attributes of a character
1058 in a association-list format. These routines provide us with
1060 struct char_attribute_alist_closure
1063 Lisp_Object *char_attribute_alist;
1067 add_char_attribute_alist_mapper (Lisp_Object key, Lisp_Object value,
1068 void *char_attribute_alist_closure)
1070 /* This function can GC */
1071 struct char_attribute_alist_closure *caacl =
1072 (struct char_attribute_alist_closure*) char_attribute_alist_closure;
1073 Lisp_Object ret = get_char_id_table (caacl->char_id, value);
1074 if (!UNBOUNDP (ret))
1076 Lisp_Object *char_attribute_alist = caacl->char_attribute_alist;
1077 *char_attribute_alist
1078 = Fcons (Fcons (key, ret), *char_attribute_alist);
1083 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
1084 Return the alist of attributes of CHARACTER.
1088 Lisp_Object alist = Qnil;
1091 CHECK_CHAR (character);
1093 struct gcpro gcpro1;
1094 struct char_attribute_alist_closure char_attribute_alist_closure;
1097 char_attribute_alist_closure.char_id = XCHAR (character);
1098 char_attribute_alist_closure.char_attribute_alist = &alist;
1099 elisp_maphash (add_char_attribute_alist_mapper,
1100 Vchar_attribute_hash_table,
1101 &char_attribute_alist_closure);
1105 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1107 Lisp_Object ccs = chlook->charset_by_leading_byte[i];
1111 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1114 if ( CHAR_ID_TABLE_P (encoding_table)
1115 && INTP (cpos = get_char_id_table (XCHAR (character),
1118 alist = Fcons (Fcons (ccs, cpos), alist);
1125 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 3, 0, /*
1126 Return the value of CHARACTER's ATTRIBUTE.
1127 Return DEFAULT-VALUE if the value is not exist.
1129 (character, attribute, default_value))
1133 CHECK_CHAR (character);
1134 if (!NILP (ccs = Ffind_charset (attribute)))
1136 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1138 if (CHAR_ID_TABLE_P (encoding_table))
1139 return get_char_id_table (XCHAR (character), encoding_table);
1143 Lisp_Object table = Fgethash (attribute,
1144 Vchar_attribute_hash_table,
1146 if (!UNBOUNDP (table))
1148 Lisp_Object ret = get_char_id_table (XCHAR (character), table);
1149 if (!UNBOUNDP (ret))
1153 return default_value;
1156 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
1157 Store CHARACTER's ATTRIBUTE with VALUE.
1159 (character, attribute, value))
1163 CHECK_CHAR (character);
1164 ccs = Ffind_charset (attribute);
1167 return put_char_ccs_code_point (character, ccs, value);
1169 else if (EQ (attribute, Q_decomposition))
1174 signal_simple_error ("Invalid value for ->decomposition",
1177 if (CONSP (Fcdr (value)))
1179 Lisp_Object rest = value;
1180 Lisp_Object table = Vcharacter_composition_table;
1184 GET_EXTERNAL_LIST_LENGTH (rest, len);
1185 seq = make_vector (len, Qnil);
1187 while (CONSP (rest))
1189 Lisp_Object v = Fcar (rest);
1192 = to_char_id (v, "Invalid value for ->decomposition", value);
1195 XVECTOR_DATA(seq)[i++] = v;
1197 XVECTOR_DATA(seq)[i++] = make_char (c);
1201 put_char_id_table (c, character, table);
1206 ntable = get_char_id_table (c, table);
1207 if (!CHAR_ID_TABLE_P (ntable))
1209 ntable = make_char_id_table (Qnil);
1210 put_char_id_table (c, ntable, table);
1218 Lisp_Object v = Fcar (value);
1222 Emchar c = XINT (v);
1224 = get_char_id_table (c, Vcharacter_variant_table);
1226 if (NILP (Fmemq (v, ret)))
1228 put_char_id_table (c, Fcons (character, ret),
1229 Vcharacter_variant_table);
1232 seq = make_vector (1, v);
1236 else if (EQ (attribute, Qto_ucs) || EQ (attribute, Q_ucs))
1242 signal_simple_error ("Invalid value for ->ucs", value);
1246 ret = get_char_id_table (c, Vcharacter_variant_table);
1247 if (NILP (Fmemq (character, ret)))
1249 put_char_id_table (c, Fcons (character, ret),
1250 Vcharacter_variant_table);
1253 if (EQ (attribute, Q_ucs))
1254 attribute = Qto_ucs;
1258 Lisp_Object table = Fgethash (attribute,
1259 Vchar_attribute_hash_table,
1264 table = make_char_id_table (Qunbound);
1265 Fputhash (attribute, table, Vchar_attribute_hash_table);
1267 put_char_id_table (XCHAR (character), value, table);
1272 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
1273 Remove CHARACTER's ATTRIBUTE.
1275 (character, attribute))
1279 CHECK_CHAR (character);
1280 ccs = Ffind_charset (attribute);
1283 return remove_char_ccs (character, ccs);
1287 Lisp_Object table = Fgethash (attribute,
1288 Vchar_attribute_hash_table,
1290 if (!UNBOUNDP (table))
1292 put_char_id_table (XCHAR (character), Qunbound, table);
1299 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
1301 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
1303 /* ad-hoc method for `ascii' */
1304 if ((CHARSET_CHARS (cs) == 94) &&
1305 (CHARSET_BYTE_OFFSET (cs) != 33))
1306 return 128 - CHARSET_BYTE_OFFSET (cs);
1308 return CHARSET_CHARS (cs);
1311 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
1313 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
1315 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
1319 if (XVECTOR_LENGTH (v) > ccs_len)
1322 for (i = 0; i < XVECTOR_LENGTH (v); i++)
1324 Lisp_Object c = XVECTOR_DATA(v)[i];
1326 if (!NILP (c) && !CHARP (c))
1330 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
1342 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1345 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1355 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1356 nv = XVECTOR_DATA(v)[i];
1362 XVECTOR_DATA(v)[i] = Qnil;
1366 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1367 int code_point, Lisp_Object character);
1369 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1370 int code_point, Lisp_Object character)
1374 int ccs_len = XVECTOR_LENGTH (v);
1379 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1380 nv = XVECTOR_DATA(v)[i];
1384 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
1390 XVECTOR_DATA(v)[i] = character;
1394 put_char_ccs_code_point (Lisp_Object character,
1395 Lisp_Object ccs, Lisp_Object value)
1397 Lisp_Object encoding_table;
1399 if (!EQ (XCHARSET_NAME (ccs), Qucs)
1400 || (XCHAR (character) != XINT (value)))
1402 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
1403 int dim = XCHARSET_DIMENSION (ccs);
1404 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
1405 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
1409 { /* obsolete representation: value must be a list of bytes */
1410 Lisp_Object ret = Fcar (value);
1414 signal_simple_error ("Invalid value for coded-charset", value);
1415 code_point = XINT (ret);
1416 if (XCHARSET_GRAPHIC (ccs) == 1)
1418 rest = Fcdr (value);
1419 while (!NILP (rest))
1424 signal_simple_error ("Invalid value for coded-charset",
1428 signal_simple_error ("Invalid value for coded-charset",
1431 if (XCHARSET_GRAPHIC (ccs) == 1)
1433 code_point = (code_point << 8) | j;
1436 value = make_int (code_point);
1438 else if (INTP (value))
1440 code_point = XINT (value);
1441 if (XCHARSET_GRAPHIC (ccs) == 1)
1443 code_point &= 0x7F7F7F7F;
1444 value = make_int (code_point);
1448 signal_simple_error ("Invalid value for coded-charset", value);
1452 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1455 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
1460 XCHARSET_DECODING_TABLE (ccs)
1461 = v = make_older_vector (ccs_len, Qnil);
1464 decoding_table_put_char (v, dim, byte_offset, code_point, character);
1466 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
1468 XCHARSET_ENCODING_TABLE (ccs)
1469 = encoding_table = make_char_id_table (Qnil);
1471 put_char_id_table (XCHAR (character), value, encoding_table);
1476 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
1478 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1479 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1481 if (VECTORP (decoding_table))
1483 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1487 decoding_table_remove_char (decoding_table,
1488 XCHARSET_DIMENSION (ccs),
1489 XCHARSET_BYTE_OFFSET (ccs),
1493 if (CHAR_ID_TABLE_P (encoding_table))
1495 put_char_id_table (XCHAR (character), Qnil, encoding_table);
1500 EXFUN (Fmake_char, 3);
1501 EXFUN (Fdecode_char, 2);
1503 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
1504 Store character's ATTRIBUTES.
1508 Lisp_Object rest = attributes;
1509 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
1510 Lisp_Object character;
1514 while (CONSP (rest))
1516 Lisp_Object cell = Fcar (rest);
1520 signal_simple_error ("Invalid argument", attributes);
1521 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
1522 && ((XCHARSET_FINAL (ccs) != 0) ||
1523 (XCHARSET_UCS_MAX (ccs) > 0)) )
1527 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1529 character = Fdecode_char (ccs, cell);
1530 if (!NILP (character))
1531 goto setup_attributes;
1535 if ( (!NILP (code = Fcdr (Fassq (Qto_ucs, attributes)))) ||
1536 (!NILP (code = Fcdr (Fassq (Q_ucs, attributes)))) )
1540 signal_simple_error ("Invalid argument", attributes);
1542 character = make_char (XINT (code) + 0x100000);
1543 goto setup_attributes;
1547 else if (!INTP (code))
1548 signal_simple_error ("Invalid argument", attributes);
1550 character = make_char (XINT (code));
1554 while (CONSP (rest))
1556 Lisp_Object cell = Fcar (rest);
1559 signal_simple_error ("Invalid argument", attributes);
1561 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
1567 Lisp_Object Vutf_2000_version;
1571 int leading_code_private_11;
1574 Lisp_Object Qcharsetp;
1576 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1577 Lisp_Object Qregistry, Qfinal, Qgraphic;
1578 Lisp_Object Qdirection;
1579 Lisp_Object Qreverse_direction_charset;
1580 Lisp_Object Qleading_byte;
1581 Lisp_Object Qshort_name, Qlong_name;
1595 Qcyrillic_iso8859_5,
1597 Qjapanese_jisx0208_1978,
1601 Qjapanese_jisx0208_1990,
1604 Qchinese_cns11643_1,
1605 Qchinese_cns11643_2,
1612 Qlatin_viscii_lower,
1613 Qlatin_viscii_upper,
1614 Qvietnamese_viscii_lower,
1615 Qvietnamese_viscii_upper,
1628 Qideograph_gt_pj_10,
1629 Qideograph_gt_pj_11,
1659 Lisp_Object Ql2r, Qr2l;
1661 Lisp_Object Vcharset_hash_table;
1663 /* Composite characters are characters constructed by overstriking two
1664 or more regular characters.
1666 1) The old Mule implementation involves storing composite characters
1667 in a buffer as a tag followed by all of the actual characters
1668 used to make up the composite character. I think this is a bad
1669 idea; it greatly complicates code that wants to handle strings
1670 one character at a time because it has to deal with the possibility
1671 of great big ungainly characters. It's much more reasonable to
1672 simply store an index into a table of composite characters.
1674 2) The current implementation only allows for 16,384 separate
1675 composite characters over the lifetime of the XEmacs process.
1676 This could become a potential problem if the user
1677 edited lots of different files that use composite characters.
1678 Due to FSF bogosity, increasing the number of allowable
1679 composite characters under Mule would decrease the number
1680 of possible faces that can exist. Mule already has shrunk
1681 this to 2048, and further shrinkage would become uncomfortable.
1682 No such problems exist in XEmacs.
1684 Composite characters could be represented as 0x80 C1 C2 C3,
1685 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1686 for slightly under 2^20 (one million) composite characters
1687 over the XEmacs process lifetime, and you only need to
1688 increase the size of a Mule character from 19 to 21 bits.
1689 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1690 85 million (slightly over 2^26) composite characters. */
1693 /************************************************************************/
1694 /* Basic Emchar functions */
1695 /************************************************************************/
1697 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1698 string in STR. Returns the number of bytes stored.
1699 Do not call this directly. Use the macro set_charptr_emchar() instead.
1703 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1709 Lisp_Object charset;
1718 else if ( c <= 0x7ff )
1720 *p++ = (c >> 6) | 0xc0;
1721 *p++ = (c & 0x3f) | 0x80;
1723 else if ( c <= 0xffff )
1725 *p++ = (c >> 12) | 0xe0;
1726 *p++ = ((c >> 6) & 0x3f) | 0x80;
1727 *p++ = (c & 0x3f) | 0x80;
1729 else if ( c <= 0x1fffff )
1731 *p++ = (c >> 18) | 0xf0;
1732 *p++ = ((c >> 12) & 0x3f) | 0x80;
1733 *p++ = ((c >> 6) & 0x3f) | 0x80;
1734 *p++ = (c & 0x3f) | 0x80;
1736 else if ( c <= 0x3ffffff )
1738 *p++ = (c >> 24) | 0xf8;
1739 *p++ = ((c >> 18) & 0x3f) | 0x80;
1740 *p++ = ((c >> 12) & 0x3f) | 0x80;
1741 *p++ = ((c >> 6) & 0x3f) | 0x80;
1742 *p++ = (c & 0x3f) | 0x80;
1746 *p++ = (c >> 30) | 0xfc;
1747 *p++ = ((c >> 24) & 0x3f) | 0x80;
1748 *p++ = ((c >> 18) & 0x3f) | 0x80;
1749 *p++ = ((c >> 12) & 0x3f) | 0x80;
1750 *p++ = ((c >> 6) & 0x3f) | 0x80;
1751 *p++ = (c & 0x3f) | 0x80;
1754 BREAKUP_CHAR (c, charset, c1, c2);
1755 lb = CHAR_LEADING_BYTE (c);
1756 if (LEADING_BYTE_PRIVATE_P (lb))
1757 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1759 if (EQ (charset, Vcharset_control_1))
1768 /* Return the first character from a Mule-encoded string in STR,
1769 assuming it's non-ASCII. Do not call this directly.
1770 Use the macro charptr_emchar() instead. */
1773 non_ascii_charptr_emchar (const Bufbyte *str)
1786 else if ( b >= 0xf8 )
1791 else if ( b >= 0xf0 )
1796 else if ( b >= 0xe0 )
1801 else if ( b >= 0xc0 )
1811 for( ; len > 0; len-- )
1814 ch = ( ch << 6 ) | ( b & 0x3f );
1818 Bufbyte i0 = *str, i1, i2 = 0;
1819 Lisp_Object charset;
1821 if (i0 == LEADING_BYTE_CONTROL_1)
1822 return (Emchar) (*++str - 0x20);
1824 if (LEADING_BYTE_PREFIX_P (i0))
1829 charset = CHARSET_BY_LEADING_BYTE (i0);
1830 if (XCHARSET_DIMENSION (charset) == 2)
1833 return MAKE_CHAR (charset, i1, i2);
1837 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1838 Do not call this directly. Use the macro valid_char_p() instead. */
1842 non_ascii_valid_char_p (Emchar ch)
1846 /* Must have only lowest 19 bits set */
1850 f1 = CHAR_FIELD1 (ch);
1851 f2 = CHAR_FIELD2 (ch);
1852 f3 = CHAR_FIELD3 (ch);
1856 Lisp_Object charset;
1858 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1859 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1860 f2 > MAX_CHAR_FIELD2_PRIVATE)
1865 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1866 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1870 NOTE: This takes advantage of the fact that
1871 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1872 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1874 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1875 if (EQ (charset, Qnil))
1877 return (XCHARSET_CHARS (charset) == 96);
1881 Lisp_Object charset;
1883 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1884 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1885 f1 > MAX_CHAR_FIELD1_PRIVATE)
1887 if (f2 < 0x20 || f3 < 0x20)
1890 #ifdef ENABLE_COMPOSITE_CHARS
1891 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1893 if (UNBOUNDP (Fgethash (make_int (ch),
1894 Vcomposite_char_char2string_hash_table,
1899 #endif /* ENABLE_COMPOSITE_CHARS */
1901 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1902 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1905 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1907 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1910 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1912 if (EQ (charset, Qnil))
1914 return (XCHARSET_CHARS (charset) == 96);
1920 /************************************************************************/
1921 /* Basic string functions */
1922 /************************************************************************/
1924 /* Copy the character pointed to by SRC into DST. Do not call this
1925 directly. Use the macro charptr_copy_char() instead.
1926 Return the number of bytes copied. */
1929 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
1931 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
1933 for (i = bytes; i; i--, dst++, src++)
1939 /************************************************************************/
1940 /* streams of Emchars */
1941 /************************************************************************/
1943 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1944 The functions below are not meant to be called directly; use
1945 the macros in insdel.h. */
1948 Lstream_get_emchar_1 (Lstream *stream, int ch)
1950 Bufbyte str[MAX_EMCHAR_LEN];
1951 Bufbyte *strptr = str;
1954 str[0] = (Bufbyte) ch;
1956 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
1958 int c = Lstream_getc (stream);
1959 bufpos_checking_assert (c >= 0);
1960 *++strptr = (Bufbyte) c;
1962 return charptr_emchar (str);
1966 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1968 Bufbyte str[MAX_EMCHAR_LEN];
1969 Bytecount len = set_charptr_emchar (str, ch);
1970 return Lstream_write (stream, str, len);
1974 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1976 Bufbyte str[MAX_EMCHAR_LEN];
1977 Bytecount len = set_charptr_emchar (str, ch);
1978 Lstream_unread (stream, str, len);
1982 /************************************************************************/
1983 /* charset object */
1984 /************************************************************************/
1987 mark_charset (Lisp_Object obj)
1989 Lisp_Charset *cs = XCHARSET (obj);
1991 mark_object (cs->short_name);
1992 mark_object (cs->long_name);
1993 mark_object (cs->doc_string);
1994 mark_object (cs->registry);
1995 mark_object (cs->ccl_program);
1997 mark_object (cs->encoding_table);
1998 /* mark_object (cs->decoding_table); */
2004 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
2006 Lisp_Charset *cs = XCHARSET (obj);
2010 error ("printing unreadable object #<charset %s 0x%x>",
2011 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
2014 write_c_string ("#<charset ", printcharfun);
2015 print_internal (CHARSET_NAME (cs), printcharfun, 0);
2016 write_c_string (" ", printcharfun);
2017 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
2018 write_c_string (" ", printcharfun);
2019 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
2020 write_c_string (" ", printcharfun);
2021 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
2022 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
2024 CHARSET_DIMENSION (cs),
2025 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
2026 CHARSET_COLUMNS (cs),
2027 CHARSET_GRAPHIC (cs),
2028 CHARSET_FINAL (cs));
2029 write_c_string (buf, printcharfun);
2030 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
2031 sprintf (buf, " 0x%x>", cs->header.uid);
2032 write_c_string (buf, printcharfun);
2035 static const struct lrecord_description charset_description[] = {
2036 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
2037 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
2038 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
2039 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
2040 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
2041 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
2042 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
2044 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
2045 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
2050 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
2051 mark_charset, print_charset, 0, 0, 0,
2052 charset_description,
2055 /* Make a new charset. */
2056 /* #### SJT Should generic properties be allowed? */
2058 make_charset (Charset_ID id, Lisp_Object name,
2059 unsigned short chars, unsigned char dimension,
2060 unsigned char columns, unsigned char graphic,
2061 Bufbyte final, unsigned char direction, Lisp_Object short_name,
2062 Lisp_Object long_name, Lisp_Object doc,
2064 Lisp_Object decoding_table,
2065 Emchar ucs_min, Emchar ucs_max,
2066 Emchar code_offset, unsigned char byte_offset)
2069 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
2073 XSETCHARSET (obj, cs);
2075 CHARSET_ID (cs) = id;
2076 CHARSET_NAME (cs) = name;
2077 CHARSET_SHORT_NAME (cs) = short_name;
2078 CHARSET_LONG_NAME (cs) = long_name;
2079 CHARSET_CHARS (cs) = chars;
2080 CHARSET_DIMENSION (cs) = dimension;
2081 CHARSET_DIRECTION (cs) = direction;
2082 CHARSET_COLUMNS (cs) = columns;
2083 CHARSET_GRAPHIC (cs) = graphic;
2084 CHARSET_FINAL (cs) = final;
2085 CHARSET_DOC_STRING (cs) = doc;
2086 CHARSET_REGISTRY (cs) = reg;
2087 CHARSET_CCL_PROGRAM (cs) = Qnil;
2088 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
2090 CHARSET_DECODING_TABLE(cs) = Qnil;
2091 CHARSET_ENCODING_TABLE(cs) = Qnil;
2092 CHARSET_UCS_MIN(cs) = ucs_min;
2093 CHARSET_UCS_MAX(cs) = ucs_max;
2094 CHARSET_CODE_OFFSET(cs) = code_offset;
2095 CHARSET_BYTE_OFFSET(cs) = byte_offset;
2099 if (id == LEADING_BYTE_ASCII)
2100 CHARSET_REP_BYTES (cs) = 1;
2102 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
2104 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
2109 /* some charsets do not have final characters. This includes
2110 ASCII, Control-1, Composite, and the two faux private
2112 unsigned char iso2022_type
2113 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
2115 if (code_offset == 0)
2117 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
2118 chlook->charset_by_attributes[iso2022_type][final] = obj;
2122 (chlook->charset_by_attributes[iso2022_type][final][direction]));
2123 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
2127 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
2128 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
2130 /* Some charsets are "faux" and don't have names or really exist at
2131 all except in the leading-byte table. */
2133 Fputhash (name, obj, Vcharset_hash_table);
2138 get_unallocated_leading_byte (int dimension)
2143 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
2146 lb = chlook->next_allocated_leading_byte++;
2150 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
2153 lb = chlook->next_allocated_1_byte_leading_byte++;
2157 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
2160 lb = chlook->next_allocated_2_byte_leading_byte++;
2166 ("No more character sets free for this dimension",
2167 make_int (dimension));
2173 /* Number of Big5 characters which have the same code in 1st byte. */
2175 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2178 decode_builtin_char (Lisp_Object charset, int code_point)
2182 if (EQ (charset, Vcharset_chinese_big5))
2184 int c1 = code_point >> 8;
2185 int c2 = code_point & 0xFF;
2188 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
2190 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
2191 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
2193 I = (c1 - 0xA1) * BIG5_SAME_ROW
2194 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
2198 charset = Vcharset_chinese_big5_1;
2202 charset = Vcharset_chinese_big5_2;
2203 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
2205 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
2208 if ((final = XCHARSET_FINAL (charset)) >= '0')
2210 if (XCHARSET_DIMENSION (charset) == 1)
2212 switch (XCHARSET_CHARS (charset))
2216 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
2219 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
2227 switch (XCHARSET_CHARS (charset))
2230 return MIN_CHAR_94x94
2231 + (final - '0') * 94 * 94
2232 + (((code_point >> 8) & 0x7F) - 33) * 94
2233 + ((code_point & 0x7F) - 33);
2235 return MIN_CHAR_96x96
2236 + (final - '0') * 96 * 96
2237 + (((code_point >> 8) & 0x7F) - 32) * 96
2238 + ((code_point & 0x7F) - 32);
2245 else if (XCHARSET_UCS_MAX (charset))
2248 = (XCHARSET_DIMENSION (charset) == 1
2250 code_point - XCHARSET_BYTE_OFFSET (charset)
2252 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
2253 * XCHARSET_CHARS (charset)
2254 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
2255 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
2256 if ((cid < XCHARSET_UCS_MIN (charset))
2257 || (XCHARSET_UCS_MAX (charset) < cid))
2266 range_charset_code_point (Lisp_Object charset, Emchar ch)
2270 if ((XCHARSET_UCS_MIN (charset) <= ch)
2271 && (ch <= XCHARSET_UCS_MAX (charset)))
2273 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
2275 if (XCHARSET_CHARS (charset) == 256)
2277 else if (XCHARSET_DIMENSION (charset) == 1)
2278 return d + XCHARSET_BYTE_OFFSET (charset);
2279 else if (XCHARSET_DIMENSION (charset) == 2)
2281 ((d / XCHARSET_CHARS (charset)
2282 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2283 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2284 else if (XCHARSET_DIMENSION (charset) == 3)
2286 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2287 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2288 | ((d / XCHARSET_CHARS (charset)
2289 % XCHARSET_CHARS (charset)
2290 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2291 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2292 else /* if (XCHARSET_DIMENSION (charset) == 4) */
2294 ((d / (XCHARSET_CHARS (charset)
2295 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2296 + XCHARSET_BYTE_OFFSET (charset)) << 24)
2297 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2298 % XCHARSET_CHARS (charset)
2299 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2300 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
2301 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2302 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2304 else if (XCHARSET_CODE_OFFSET (charset) == 0)
2306 if (XCHARSET_DIMENSION (charset) == 1)
2308 if (XCHARSET_CHARS (charset) == 94)
2310 if (((d = ch - (MIN_CHAR_94
2311 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
2315 else if (XCHARSET_CHARS (charset) == 96)
2317 if (((d = ch - (MIN_CHAR_96
2318 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
2325 else if (XCHARSET_DIMENSION (charset) == 2)
2327 if (XCHARSET_CHARS (charset) == 94)
2329 if (((d = ch - (MIN_CHAR_94x94
2330 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
2333 return (((d / 94) + 33) << 8) | (d % 94 + 33);
2335 else if (XCHARSET_CHARS (charset) == 96)
2337 if (((d = ch - (MIN_CHAR_96x96
2338 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
2341 return (((d / 96) + 32) << 8) | (d % 96 + 32);
2347 if (EQ (charset, Vcharset_mojikyo_2022_1)
2348 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
2350 int m = ch - MIN_CHAR_MOJIKYO - 1;
2351 int byte1 = m / (94 * 60) + 33;
2352 int byte2 = (m % (94 * 60)) / 94;
2353 int byte3 = m % 94 + 33;
2359 return (byte1 << 16) | (byte2 << 8) | byte3;
2365 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
2367 if (c <= MAX_CHAR_BASIC_LATIN)
2369 *charset = Vcharset_ascii;
2374 *charset = Vcharset_control_1;
2379 *charset = Vcharset_latin_iso8859_1;
2383 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
2385 *charset = Vcharset_hebrew_iso8859_8;
2386 return c - MIN_CHAR_HEBREW + 0x20;
2389 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
2391 *charset = Vcharset_thai_tis620;
2392 return c - MIN_CHAR_THAI + 0x20;
2395 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
2396 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
2398 return list2 (Vcharset_katakana_jisx0201,
2399 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
2402 else if (c <= MAX_CHAR_BMP)
2404 *charset = Vcharset_ucs_bmp;
2407 else if (c < MIN_CHAR_DAIKANWA)
2409 *charset = Vcharset_ucs;
2412 else if (c <= MAX_CHAR_DAIKANWA)
2414 *charset = Vcharset_ideograph_daikanwa;
2415 return c - MIN_CHAR_DAIKANWA;
2417 else if (c <= MAX_CHAR_MOJIKYO_0)
2419 *charset = Vcharset_mojikyo;
2420 return c - MIN_CHAR_MOJIKYO_0;
2422 else if (c < MIN_CHAR_94)
2424 *charset = Vcharset_ucs;
2427 else if (c <= MAX_CHAR_94)
2429 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
2430 ((c - MIN_CHAR_94) / 94) + '0',
2431 CHARSET_LEFT_TO_RIGHT);
2432 if (!NILP (*charset))
2433 return ((c - MIN_CHAR_94) % 94) + 33;
2436 *charset = Vcharset_ucs;
2440 else if (c <= MAX_CHAR_96)
2442 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
2443 ((c - MIN_CHAR_96) / 96) + '0',
2444 CHARSET_LEFT_TO_RIGHT);
2445 if (!NILP (*charset))
2446 return ((c - MIN_CHAR_96) % 96) + 32;
2449 *charset = Vcharset_ucs;
2453 else if (c <= MAX_CHAR_94x94)
2456 = CHARSET_BY_ATTRIBUTES (94, 2,
2457 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
2458 CHARSET_LEFT_TO_RIGHT);
2459 if (!NILP (*charset))
2460 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
2461 | (((c - MIN_CHAR_94x94) % 94) + 33);
2464 *charset = Vcharset_ucs;
2468 else if (c <= MAX_CHAR_96x96)
2471 = CHARSET_BY_ATTRIBUTES (96, 2,
2472 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
2473 CHARSET_LEFT_TO_RIGHT);
2474 if (!NILP (*charset))
2475 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
2476 | (((c - MIN_CHAR_96x96) % 96) + 32);
2479 *charset = Vcharset_ucs;
2483 else if (c < MIN_CHAR_MOJIKYO)
2485 *charset = Vcharset_ucs;
2488 else if (c <= MAX_CHAR_MOJIKYO)
2490 *charset = Vcharset_mojikyo;
2491 return c - MIN_CHAR_MOJIKYO;
2495 *charset = Vcharset_ucs;
2500 Lisp_Object Vdefault_coded_charset_priority_list;
2504 /************************************************************************/
2505 /* Basic charset Lisp functions */
2506 /************************************************************************/
2508 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
2509 Return non-nil if OBJECT is a charset.
2513 return CHARSETP (object) ? Qt : Qnil;
2516 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
2517 Retrieve the charset of the given name.
2518 If CHARSET-OR-NAME is a charset object, it is simply returned.
2519 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
2520 nil is returned. Otherwise the associated charset object is returned.
2524 if (CHARSETP (charset_or_name))
2525 return charset_or_name;
2527 CHECK_SYMBOL (charset_or_name);
2528 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
2531 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
2532 Retrieve the charset of the given name.
2533 Same as `find-charset' except an error is signalled if there is no such
2534 charset instead of returning nil.
2538 Lisp_Object charset = Ffind_charset (name);
2541 signal_simple_error ("No such charset", name);
2545 /* We store the charsets in hash tables with the names as the key and the
2546 actual charset object as the value. Occasionally we need to use them
2547 in a list format. These routines provide us with that. */
2548 struct charset_list_closure
2550 Lisp_Object *charset_list;
2554 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
2555 void *charset_list_closure)
2557 /* This function can GC */
2558 struct charset_list_closure *chcl =
2559 (struct charset_list_closure*) charset_list_closure;
2560 Lisp_Object *charset_list = chcl->charset_list;
2562 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
2566 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
2567 Return a list of the names of all defined charsets.
2571 Lisp_Object charset_list = Qnil;
2572 struct gcpro gcpro1;
2573 struct charset_list_closure charset_list_closure;
2575 GCPRO1 (charset_list);
2576 charset_list_closure.charset_list = &charset_list;
2577 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
2578 &charset_list_closure);
2581 return charset_list;
2584 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
2585 Return the name of charset CHARSET.
2589 return XCHARSET_NAME (Fget_charset (charset));
2592 /* #### SJT Should generic properties be allowed? */
2593 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
2594 Define a new character set.
2595 This function is for use with Mule support.
2596 NAME is a symbol, the name by which the character set is normally referred.
2597 DOC-STRING is a string describing the character set.
2598 PROPS is a property list, describing the specific nature of the
2599 character set. Recognized properties are:
2601 'short-name Short version of the charset name (ex: Latin-1)
2602 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
2603 'registry A regular expression matching the font registry field for
2605 'dimension Number of octets used to index a character in this charset.
2606 Either 1 or 2. Defaults to 1.
2607 'columns Number of columns used to display a character in this charset.
2608 Only used in TTY mode. (Under X, the actual width of a
2609 character can be derived from the font used to display the
2610 characters.) If unspecified, defaults to the dimension
2611 (this is almost always the correct value).
2612 'chars Number of characters in each dimension (94 or 96).
2613 Defaults to 94. Note that if the dimension is 2, the
2614 character set thus described is 94x94 or 96x96.
2615 'final Final byte of ISO 2022 escape sequence. Must be
2616 supplied. Each combination of (DIMENSION, CHARS) defines a
2617 separate namespace for final bytes. Note that ISO
2618 2022 restricts the final byte to the range
2619 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2620 dimension == 2. Note also that final bytes in the range
2621 0x30 - 0x3F are reserved for user-defined (not official)
2623 'graphic 0 (use left half of font on output) or 1 (use right half
2624 of font on output). Defaults to 0. For example, for
2625 a font whose registry is ISO8859-1, the left half
2626 (octets 0x20 - 0x7F) is the `ascii' character set, while
2627 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2628 character set. With 'graphic set to 0, the octets
2629 will have their high bit cleared; with it set to 1,
2630 the octets will have their high bit set.
2631 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2633 'ccl-program A compiled CCL program used to convert a character in
2634 this charset into an index into the font. This is in
2635 addition to the 'graphic property. The CCL program
2636 is passed the octets of the character, with the high
2637 bit cleared and set depending upon whether the value
2638 of the 'graphic property is 0 or 1.
2640 (name, doc_string, props))
2642 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2643 int direction = CHARSET_LEFT_TO_RIGHT;
2644 Lisp_Object registry = Qnil;
2645 Lisp_Object charset;
2646 Lisp_Object ccl_program = Qnil;
2647 Lisp_Object short_name = Qnil, long_name = Qnil;
2648 int byte_offset = -1;
2650 CHECK_SYMBOL (name);
2651 if (!NILP (doc_string))
2652 CHECK_STRING (doc_string);
2654 charset = Ffind_charset (name);
2655 if (!NILP (charset))
2656 signal_simple_error ("Cannot redefine existing charset", name);
2659 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
2661 if (EQ (keyword, Qshort_name))
2663 CHECK_STRING (value);
2667 if (EQ (keyword, Qlong_name))
2669 CHECK_STRING (value);
2673 else if (EQ (keyword, Qdimension))
2676 dimension = XINT (value);
2677 if (dimension < 1 || dimension > 2)
2678 signal_simple_error ("Invalid value for 'dimension", value);
2681 else if (EQ (keyword, Qchars))
2684 chars = XINT (value);
2685 if (chars != 94 && chars != 96)
2686 signal_simple_error ("Invalid value for 'chars", value);
2689 else if (EQ (keyword, Qcolumns))
2692 columns = XINT (value);
2693 if (columns != 1 && columns != 2)
2694 signal_simple_error ("Invalid value for 'columns", value);
2697 else if (EQ (keyword, Qgraphic))
2700 graphic = XINT (value);
2702 if (graphic < 0 || graphic > 2)
2704 if (graphic < 0 || graphic > 1)
2706 signal_simple_error ("Invalid value for 'graphic", value);
2709 else if (EQ (keyword, Qregistry))
2711 CHECK_STRING (value);
2715 else if (EQ (keyword, Qdirection))
2717 if (EQ (value, Ql2r))
2718 direction = CHARSET_LEFT_TO_RIGHT;
2719 else if (EQ (value, Qr2l))
2720 direction = CHARSET_RIGHT_TO_LEFT;
2722 signal_simple_error ("Invalid value for 'direction", value);
2725 else if (EQ (keyword, Qfinal))
2727 CHECK_CHAR_COERCE_INT (value);
2728 final = XCHAR (value);
2729 if (final < '0' || final > '~')
2730 signal_simple_error ("Invalid value for 'final", value);
2733 else if (EQ (keyword, Qccl_program))
2735 struct ccl_program test_ccl;
2737 if (setup_ccl_program (&test_ccl, value) < 0)
2738 signal_simple_error ("Invalid value for 'ccl-program", value);
2739 ccl_program = value;
2743 signal_simple_error ("Unrecognized property", keyword);
2748 error ("'final must be specified");
2749 if (dimension == 2 && final > 0x5F)
2751 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2754 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2755 CHARSET_LEFT_TO_RIGHT)) ||
2756 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2757 CHARSET_RIGHT_TO_LEFT)))
2759 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2761 id = get_unallocated_leading_byte (dimension);
2763 if (NILP (doc_string))
2764 doc_string = build_string ("");
2766 if (NILP (registry))
2767 registry = build_string ("");
2769 if (NILP (short_name))
2770 XSETSTRING (short_name, XSYMBOL (name)->name);
2772 if (NILP (long_name))
2773 long_name = doc_string;
2776 columns = dimension;
2778 if (byte_offset < 0)
2782 else if (chars == 96)
2788 charset = make_charset (id, name, chars, dimension, columns, graphic,
2789 final, direction, short_name, long_name,
2790 doc_string, registry,
2791 Qnil, 0, 0, 0, byte_offset);
2792 if (!NILP (ccl_program))
2793 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2797 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2799 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2800 NEW-NAME is the name of the new charset. Return the new charset.
2802 (charset, new_name))
2804 Lisp_Object new_charset = Qnil;
2805 int id, chars, dimension, columns, graphic, final;
2807 Lisp_Object registry, doc_string, short_name, long_name;
2810 charset = Fget_charset (charset);
2811 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2812 signal_simple_error ("Charset already has reverse-direction charset",
2815 CHECK_SYMBOL (new_name);
2816 if (!NILP (Ffind_charset (new_name)))
2817 signal_simple_error ("Cannot redefine existing charset", new_name);
2819 cs = XCHARSET (charset);
2821 chars = CHARSET_CHARS (cs);
2822 dimension = CHARSET_DIMENSION (cs);
2823 columns = CHARSET_COLUMNS (cs);
2824 id = get_unallocated_leading_byte (dimension);
2826 graphic = CHARSET_GRAPHIC (cs);
2827 final = CHARSET_FINAL (cs);
2828 direction = CHARSET_RIGHT_TO_LEFT;
2829 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2830 direction = CHARSET_LEFT_TO_RIGHT;
2831 doc_string = CHARSET_DOC_STRING (cs);
2832 short_name = CHARSET_SHORT_NAME (cs);
2833 long_name = CHARSET_LONG_NAME (cs);
2834 registry = CHARSET_REGISTRY (cs);
2836 new_charset = make_charset (id, new_name, chars, dimension, columns,
2837 graphic, final, direction, short_name, long_name,
2838 doc_string, registry,
2840 CHARSET_DECODING_TABLE(cs),
2841 CHARSET_UCS_MIN(cs),
2842 CHARSET_UCS_MAX(cs),
2843 CHARSET_CODE_OFFSET(cs),
2844 CHARSET_BYTE_OFFSET(cs)
2850 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2851 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2856 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2857 Define symbol ALIAS as an alias for CHARSET.
2861 CHECK_SYMBOL (alias);
2862 charset = Fget_charset (charset);
2863 return Fputhash (alias, charset, Vcharset_hash_table);
2866 /* #### Reverse direction charsets not yet implemented. */
2868 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2870 Return the reverse-direction charset parallel to CHARSET, if any.
2871 This is the charset with the same properties (in particular, the same
2872 dimension, number of characters per dimension, and final byte) as
2873 CHARSET but whose characters are displayed in the opposite direction.
2877 charset = Fget_charset (charset);
2878 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2882 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2883 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2884 If DIRECTION is omitted, both directions will be checked (left-to-right
2885 will be returned if character sets exist for both directions).
2887 (dimension, chars, final, direction))
2889 int dm, ch, fi, di = -1;
2890 Lisp_Object obj = Qnil;
2892 CHECK_INT (dimension);
2893 dm = XINT (dimension);
2894 if (dm < 1 || dm > 2)
2895 signal_simple_error ("Invalid value for DIMENSION", dimension);
2899 if (ch != 94 && ch != 96)
2900 signal_simple_error ("Invalid value for CHARS", chars);
2902 CHECK_CHAR_COERCE_INT (final);
2904 if (fi < '0' || fi > '~')
2905 signal_simple_error ("Invalid value for FINAL", final);
2907 if (EQ (direction, Ql2r))
2908 di = CHARSET_LEFT_TO_RIGHT;
2909 else if (EQ (direction, Qr2l))
2910 di = CHARSET_RIGHT_TO_LEFT;
2911 else if (!NILP (direction))
2912 signal_simple_error ("Invalid value for DIRECTION", direction);
2914 if (dm == 2 && fi > 0x5F)
2916 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2920 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
2922 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
2925 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
2928 return XCHARSET_NAME (obj);
2932 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2933 Return short name of CHARSET.
2937 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2940 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2941 Return long name of CHARSET.
2945 return XCHARSET_LONG_NAME (Fget_charset (charset));
2948 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2949 Return description of CHARSET.
2953 return XCHARSET_DOC_STRING (Fget_charset (charset));
2956 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2957 Return dimension of CHARSET.
2961 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2964 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2965 Return property PROP of CHARSET, a charset object or symbol naming a charset.
2966 Recognized properties are those listed in `make-charset', as well as
2967 'name and 'doc-string.
2973 charset = Fget_charset (charset);
2974 cs = XCHARSET (charset);
2976 CHECK_SYMBOL (prop);
2977 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2978 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2979 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2980 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2981 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2982 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2983 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2984 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2985 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2986 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2987 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2988 if (EQ (prop, Qdirection))
2989 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2990 if (EQ (prop, Qreverse_direction_charset))
2992 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2993 /* #### Is this translation OK? If so, error checking sufficient? */
2994 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
2996 signal_simple_error ("Unrecognized charset property name", prop);
2997 return Qnil; /* not reached */
3000 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
3001 Return charset identification number of CHARSET.
3005 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
3008 /* #### We need to figure out which properties we really want to
3011 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
3012 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
3014 (charset, ccl_program))
3016 struct ccl_program test_ccl;
3018 charset = Fget_charset (charset);
3019 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
3020 signal_simple_error ("Invalid ccl-program", ccl_program);
3021 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
3026 invalidate_charset_font_caches (Lisp_Object charset)
3028 /* Invalidate font cache entries for charset on all devices. */
3029 Lisp_Object devcons, concons, hash_table;
3030 DEVICE_LOOP_NO_BREAK (devcons, concons)
3032 struct device *d = XDEVICE (XCAR (devcons));
3033 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
3034 if (!UNBOUNDP (hash_table))
3035 Fclrhash (hash_table);
3039 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
3040 Set the 'registry property of CHARSET to REGISTRY.
3042 (charset, registry))
3044 charset = Fget_charset (charset);
3045 CHECK_STRING (registry);
3046 XCHARSET_REGISTRY (charset) = registry;
3047 invalidate_charset_font_caches (charset);
3048 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
3053 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
3054 Return mapping-table of CHARSET.
3058 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
3061 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
3062 Set mapping-table of CHARSET to TABLE.
3066 struct Lisp_Charset *cs;
3070 charset = Fget_charset (charset);
3071 cs = XCHARSET (charset);
3075 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
3076 make_vector_newer (CHARSET_DECODING_TABLE(cs));
3077 CHARSET_DECODING_TABLE(cs) = Qnil;
3080 else if (VECTORP (table))
3082 int ccs_len = CHARSET_BYTE_SIZE (cs);
3083 int ret = decoding_table_check_elements (table,
3084 CHARSET_DIMENSION (cs),
3089 signal_simple_error ("Too big table", table);
3091 signal_simple_error ("Invalid element is found", table);
3093 signal_simple_error ("Something wrong", table);
3095 CHARSET_DECODING_TABLE(cs) = Qnil;
3098 signal_error (Qwrong_type_argument,
3099 list2 (build_translated_string ("vector-or-nil-p"),
3102 byte_offset = CHARSET_BYTE_OFFSET (cs);
3103 switch (CHARSET_DIMENSION (cs))
3106 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3108 Lisp_Object c = XVECTOR_DATA(table)[i];
3111 put_char_ccs_code_point (c, charset,
3112 make_int (i + byte_offset));
3116 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3118 Lisp_Object v = XVECTOR_DATA(table)[i];
3124 for (j = 0; j < XVECTOR_LENGTH (v); j++)
3126 Lisp_Object c = XVECTOR_DATA(v)[j];
3129 put_char_ccs_code_point
3131 make_int ( ( (i + byte_offset) << 8 )
3137 put_char_ccs_code_point (v, charset,
3138 make_int (i + byte_offset));
3147 /************************************************************************/
3148 /* Lisp primitives for working with characters */
3149 /************************************************************************/
3152 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
3153 Make a character from CHARSET and code-point CODE.
3159 charset = Fget_charset (charset);
3162 if (XCHARSET_GRAPHIC (charset) == 1)
3164 c = DECODE_CHAR (charset, c);
3165 return c >= 0 ? make_char (c) : Qnil;
3168 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
3169 Make a builtin character from CHARSET and code-point CODE.
3175 charset = Fget_charset (charset);
3177 if (EQ (charset, Vcharset_latin_viscii))
3179 Lisp_Object chr = Fdecode_char (charset, code);
3185 (ret = Fget_char_attribute (chr,
3186 Vcharset_latin_viscii_lower,
3189 charset = Vcharset_latin_viscii_lower;
3193 (ret = Fget_char_attribute (chr,
3194 Vcharset_latin_viscii_upper,
3197 charset = Vcharset_latin_viscii_upper;
3204 if (XCHARSET_GRAPHIC (charset) == 1)
3207 c = decode_builtin_char (charset, c);
3208 return c >= 0 ? make_char (c) : Fdecode_char (charset, code);
3212 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
3213 Make a character from CHARSET and octets ARG1 and ARG2.
3214 ARG2 is required only for characters from two-dimensional charsets.
3215 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
3216 character s with caron.
3218 (charset, arg1, arg2))
3222 int lowlim, highlim;
3224 charset = Fget_charset (charset);
3225 cs = XCHARSET (charset);
3227 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
3228 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
3230 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
3232 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
3233 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
3236 /* It is useful (and safe, according to Olivier Galibert) to strip
3237 the 8th bit off ARG1 and ARG2 because it allows programmers to
3238 write (make-char 'latin-iso8859-2 CODE) where code is the actual
3239 Latin 2 code of the character. */
3247 if (a1 < lowlim || a1 > highlim)
3248 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
3250 if (CHARSET_DIMENSION (cs) == 1)
3254 ("Charset is of dimension one; second octet must be nil", arg2);
3255 return make_char (MAKE_CHAR (charset, a1, 0));
3264 a2 = XINT (arg2) & 0x7f;
3266 if (a2 < lowlim || a2 > highlim)
3267 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
3269 return make_char (MAKE_CHAR (charset, a1, a2));
3272 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
3273 Return the character set of CHARACTER.
3277 CHECK_CHAR_COERCE_INT (character);
3279 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
3282 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
3283 Return the octet numbered N (should be 0 or 1) of CHARACTER.
3284 N defaults to 0 if omitted.
3288 Lisp_Object charset;
3291 CHECK_CHAR_COERCE_INT (character);
3293 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
3295 if (NILP (n) || EQ (n, Qzero))
3296 return make_int (octet0);
3297 else if (EQ (n, make_int (1)))
3298 return make_int (octet1);
3300 signal_simple_error ("Octet number must be 0 or 1", n);
3303 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
3304 Return list of charset and one or two position-codes of CHARACTER.
3308 /* This function can GC */
3309 struct gcpro gcpro1, gcpro2;
3310 Lisp_Object charset = Qnil;
3311 Lisp_Object rc = Qnil;
3319 GCPRO2 (charset, rc);
3320 CHECK_CHAR_COERCE_INT (character);
3323 code_point = ENCODE_CHAR (XCHAR (character), charset);
3324 dimension = XCHARSET_DIMENSION (charset);
3325 while (dimension > 0)
3327 rc = Fcons (make_int (code_point & 255), rc);
3331 rc = Fcons (XCHARSET_NAME (charset), rc);
3333 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
3335 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
3337 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
3341 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
3350 #ifdef ENABLE_COMPOSITE_CHARS
3351 /************************************************************************/
3352 /* composite character functions */
3353 /************************************************************************/
3356 lookup_composite_char (Bufbyte *str, int len)
3358 Lisp_Object lispstr = make_string (str, len);
3359 Lisp_Object ch = Fgethash (lispstr,
3360 Vcomposite_char_string2char_hash_table,
3366 if (composite_char_row_next >= 128)
3367 signal_simple_error ("No more composite chars available", lispstr);
3368 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
3369 composite_char_col_next);
3370 Fputhash (make_char (emch), lispstr,
3371 Vcomposite_char_char2string_hash_table);
3372 Fputhash (lispstr, make_char (emch),
3373 Vcomposite_char_string2char_hash_table);
3374 composite_char_col_next++;
3375 if (composite_char_col_next >= 128)
3377 composite_char_col_next = 32;
3378 composite_char_row_next++;
3387 composite_char_string (Emchar ch)
3389 Lisp_Object str = Fgethash (make_char (ch),
3390 Vcomposite_char_char2string_hash_table,
3392 assert (!UNBOUNDP (str));
3396 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
3397 Convert a string into a single composite character.
3398 The character is the result of overstriking all the characters in
3403 CHECK_STRING (string);
3404 return make_char (lookup_composite_char (XSTRING_DATA (string),
3405 XSTRING_LENGTH (string)));
3408 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3409 Return a string of the characters comprising a composite character.
3417 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
3418 signal_simple_error ("Must be composite char", ch);
3419 return composite_char_string (emch);
3421 #endif /* ENABLE_COMPOSITE_CHARS */
3424 /************************************************************************/
3425 /* initialization */
3426 /************************************************************************/
3429 syms_of_mule_charset (void)
3432 INIT_LRECORD_IMPLEMENTATION (uint8_byte_table);
3433 INIT_LRECORD_IMPLEMENTATION (uint16_byte_table);
3434 INIT_LRECORD_IMPLEMENTATION (byte_table);
3435 INIT_LRECORD_IMPLEMENTATION (char_id_table);
3437 INIT_LRECORD_IMPLEMENTATION (charset);
3439 DEFSUBR (Fcharsetp);
3440 DEFSUBR (Ffind_charset);
3441 DEFSUBR (Fget_charset);
3442 DEFSUBR (Fcharset_list);
3443 DEFSUBR (Fcharset_name);
3444 DEFSUBR (Fmake_charset);
3445 DEFSUBR (Fmake_reverse_direction_charset);
3446 /* DEFSUBR (Freverse_direction_charset); */
3447 DEFSUBR (Fdefine_charset_alias);
3448 DEFSUBR (Fcharset_from_attributes);
3449 DEFSUBR (Fcharset_short_name);
3450 DEFSUBR (Fcharset_long_name);
3451 DEFSUBR (Fcharset_description);
3452 DEFSUBR (Fcharset_dimension);
3453 DEFSUBR (Fcharset_property);
3454 DEFSUBR (Fcharset_id);
3455 DEFSUBR (Fset_charset_ccl_program);
3456 DEFSUBR (Fset_charset_registry);
3458 DEFSUBR (Fchar_attribute_list);
3459 DEFSUBR (Ffind_char_attribute_table);
3460 DEFSUBR (Fchar_attribute_alist);
3461 DEFSUBR (Fget_char_attribute);
3462 DEFSUBR (Fput_char_attribute);
3463 DEFSUBR (Fremove_char_attribute);
3464 DEFSUBR (Fdefine_char);
3465 DEFSUBR (Fchar_variants);
3466 DEFSUBR (Fget_composite_char);
3467 DEFSUBR (Fcharset_mapping_table);
3468 DEFSUBR (Fset_charset_mapping_table);
3472 DEFSUBR (Fdecode_char);
3473 DEFSUBR (Fdecode_builtin_char);
3475 DEFSUBR (Fmake_char);
3476 DEFSUBR (Fchar_charset);
3477 DEFSUBR (Fchar_octet);
3478 DEFSUBR (Fsplit_char);
3480 #ifdef ENABLE_COMPOSITE_CHARS
3481 DEFSUBR (Fmake_composite_char);
3482 DEFSUBR (Fcomposite_char_string);
3485 defsymbol (&Qcharsetp, "charsetp");
3486 defsymbol (&Qregistry, "registry");
3487 defsymbol (&Qfinal, "final");
3488 defsymbol (&Qgraphic, "graphic");
3489 defsymbol (&Qdirection, "direction");
3490 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3491 defsymbol (&Qshort_name, "short-name");
3492 defsymbol (&Qlong_name, "long-name");
3494 defsymbol (&Ql2r, "l2r");
3495 defsymbol (&Qr2l, "r2l");
3497 /* Charsets, compatible with FSF 20.3
3498 Naming convention is Script-Charset[-Edition] */
3499 defsymbol (&Qascii, "ascii");
3500 defsymbol (&Qcontrol_1, "control-1");
3501 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3502 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3503 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3504 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3505 defsymbol (&Qthai_tis620, "thai-tis620");
3506 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3507 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3508 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3509 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3510 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3511 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3512 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3513 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
3514 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
3515 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
3516 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
3517 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
3518 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
3519 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
3520 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
3521 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
3523 defsymbol (&Qto_ucs, "=>ucs");
3524 defsymbol (&Q_ucs, "->ucs");
3525 defsymbol (&Q_decomposition, "->decomposition");
3526 defsymbol (&Qcompat, "compat");
3527 defsymbol (&Qisolated, "isolated");
3528 defsymbol (&Qinitial, "initial");
3529 defsymbol (&Qmedial, "medial");
3530 defsymbol (&Qfinal, "final");
3531 defsymbol (&Qvertical, "vertical");
3532 defsymbol (&QnoBreak, "noBreak");
3533 defsymbol (&Qfraction, "fraction");
3534 defsymbol (&Qsuper, "super");
3535 defsymbol (&Qsub, "sub");
3536 defsymbol (&Qcircle, "circle");
3537 defsymbol (&Qsquare, "square");
3538 defsymbol (&Qwide, "wide");
3539 defsymbol (&Qnarrow, "narrow");
3540 defsymbol (&Qsmall, "small");
3541 defsymbol (&Qfont, "font");
3542 defsymbol (&Qucs, "ucs");
3543 defsymbol (&Qucs_bmp, "ucs-bmp");
3544 defsymbol (&Qucs_cns, "ucs-cns");
3545 defsymbol (&Qucs_big5, "ucs-big5");
3546 defsymbol (&Qlatin_viscii, "latin-viscii");
3547 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3548 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3549 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3550 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3551 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3552 defsymbol (&Qideograph_gt, "ideograph-gt");
3553 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
3554 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
3555 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
3556 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
3557 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
3558 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
3559 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
3560 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
3561 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
3562 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
3563 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
3564 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
3565 defsymbol (&Qchinese_big5, "chinese-big5");
3566 defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp");
3567 defsymbol (&Qmojikyo, "mojikyo");
3568 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
3569 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
3570 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
3571 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
3572 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
3573 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
3574 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
3575 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
3576 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
3577 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
3578 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
3579 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
3580 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
3581 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
3582 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
3583 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
3584 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
3585 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
3586 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
3587 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
3588 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
3589 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
3590 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3592 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3593 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3595 defsymbol (&Qcomposite, "composite");
3599 vars_of_mule_charset (void)
3606 chlook = xnew (struct charset_lookup);
3607 dumpstruct (&chlook, &charset_lookup_description);
3609 /* Table of charsets indexed by leading byte. */
3610 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3611 chlook->charset_by_leading_byte[i] = Qnil;
3614 /* Table of charsets indexed by type/final-byte. */
3615 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3616 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3617 chlook->charset_by_attributes[i][j] = Qnil;
3619 /* Table of charsets indexed by type/final-byte/direction. */
3620 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3621 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3622 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3623 chlook->charset_by_attributes[i][j][k] = Qnil;
3627 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3629 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3630 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3634 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3635 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3636 Leading-code of private TYPE9N charset of column-width 1.
3638 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3642 Vutf_2000_version = build_string("0.17 (Hōryūji)");
3643 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
3644 Version number of UTF-2000.
3647 staticpro (&Vcharacter_composition_table);
3648 Vcharacter_composition_table = make_char_id_table (Qnil);
3650 staticpro (&Vcharacter_variant_table);
3651 Vcharacter_variant_table = make_char_id_table (Qnil);
3653 Vdefault_coded_charset_priority_list = Qnil;
3654 DEFVAR_LISP ("default-coded-charset-priority-list",
3655 &Vdefault_coded_charset_priority_list /*
3656 Default order of preferred coded-character-sets.
3662 complex_vars_of_mule_charset (void)
3664 staticpro (&Vcharset_hash_table);
3665 Vcharset_hash_table =
3666 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3668 /* Predefined character sets. We store them into variables for
3672 staticpro (&Vchar_attribute_hash_table);
3673 Vchar_attribute_hash_table
3674 = make_lisp_hash_table (16, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3676 staticpro (&Vcharset_ucs);
3678 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
3679 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3680 build_string ("UCS"),
3681 build_string ("UCS"),
3682 build_string ("ISO/IEC 10646"),
3684 Qnil, 0, 0xFFFFFFF, 0, 0);
3685 staticpro (&Vcharset_ucs_bmp);
3687 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3688 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3689 build_string ("BMP"),
3690 build_string ("BMP"),
3691 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3692 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
3693 Qnil, 0, 0xFFFF, 0, 0);
3694 staticpro (&Vcharset_ucs_cns);
3696 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
3697 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3698 build_string ("UCS for CNS"),
3699 build_string ("UCS for CNS 11643"),
3700 build_string ("ISO/IEC 10646 for CNS 11643"),
3703 staticpro (&Vcharset_ucs_big5);
3705 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
3706 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3707 build_string ("UCS for Big5"),
3708 build_string ("UCS for Big5"),
3709 build_string ("ISO/IEC 10646 for Big5"),
3713 # define MIN_CHAR_THAI 0
3714 # define MAX_CHAR_THAI 0
3715 /* # define MIN_CHAR_HEBREW 0 */
3716 /* # define MAX_CHAR_HEBREW 0 */
3717 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3718 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3720 staticpro (&Vcharset_ascii);
3722 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3723 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3724 build_string ("ASCII"),
3725 build_string ("ASCII)"),
3726 build_string ("ASCII (ISO646 IRV)"),
3727 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3728 Qnil, 0, 0x7F, 0, 0);
3729 staticpro (&Vcharset_control_1);
3730 Vcharset_control_1 =
3731 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3732 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3733 build_string ("C1"),
3734 build_string ("Control characters"),
3735 build_string ("Control characters 128-191"),
3737 Qnil, 0x80, 0x9F, 0, 0);
3738 staticpro (&Vcharset_latin_iso8859_1);
3739 Vcharset_latin_iso8859_1 =
3740 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3741 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3742 build_string ("Latin-1"),
3743 build_string ("ISO8859-1 (Latin-1)"),
3744 build_string ("ISO8859-1 (Latin-1)"),
3745 build_string ("iso8859-1"),
3746 Qnil, 0xA0, 0xFF, 0, 32);
3747 staticpro (&Vcharset_latin_iso8859_2);
3748 Vcharset_latin_iso8859_2 =
3749 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3750 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3751 build_string ("Latin-2"),
3752 build_string ("ISO8859-2 (Latin-2)"),
3753 build_string ("ISO8859-2 (Latin-2)"),
3754 build_string ("iso8859-2"),
3756 staticpro (&Vcharset_latin_iso8859_3);
3757 Vcharset_latin_iso8859_3 =
3758 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3759 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3760 build_string ("Latin-3"),
3761 build_string ("ISO8859-3 (Latin-3)"),
3762 build_string ("ISO8859-3 (Latin-3)"),
3763 build_string ("iso8859-3"),
3765 staticpro (&Vcharset_latin_iso8859_4);
3766 Vcharset_latin_iso8859_4 =
3767 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3768 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3769 build_string ("Latin-4"),
3770 build_string ("ISO8859-4 (Latin-4)"),
3771 build_string ("ISO8859-4 (Latin-4)"),
3772 build_string ("iso8859-4"),
3774 staticpro (&Vcharset_thai_tis620);
3775 Vcharset_thai_tis620 =
3776 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3777 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3778 build_string ("TIS620"),
3779 build_string ("TIS620 (Thai)"),
3780 build_string ("TIS620.2529 (Thai)"),
3781 build_string ("tis620"),
3782 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3783 staticpro (&Vcharset_greek_iso8859_7);
3784 Vcharset_greek_iso8859_7 =
3785 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3786 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3787 build_string ("ISO8859-7"),
3788 build_string ("ISO8859-7 (Greek)"),
3789 build_string ("ISO8859-7 (Greek)"),
3790 build_string ("iso8859-7"),
3792 staticpro (&Vcharset_arabic_iso8859_6);
3793 Vcharset_arabic_iso8859_6 =
3794 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3795 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3796 build_string ("ISO8859-6"),
3797 build_string ("ISO8859-6 (Arabic)"),
3798 build_string ("ISO8859-6 (Arabic)"),
3799 build_string ("iso8859-6"),
3801 staticpro (&Vcharset_hebrew_iso8859_8);
3802 Vcharset_hebrew_iso8859_8 =
3803 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3804 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3805 build_string ("ISO8859-8"),
3806 build_string ("ISO8859-8 (Hebrew)"),
3807 build_string ("ISO8859-8 (Hebrew)"),
3808 build_string ("iso8859-8"),
3810 0 /* MIN_CHAR_HEBREW */,
3811 0 /* MAX_CHAR_HEBREW */, 0, 32);
3812 staticpro (&Vcharset_katakana_jisx0201);
3813 Vcharset_katakana_jisx0201 =
3814 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3815 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3816 build_string ("JISX0201 Kana"),
3817 build_string ("JISX0201.1976 (Japanese Kana)"),
3818 build_string ("JISX0201.1976 Japanese Kana"),
3819 build_string ("jisx0201\\.1976"),
3821 staticpro (&Vcharset_latin_jisx0201);
3822 Vcharset_latin_jisx0201 =
3823 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3824 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3825 build_string ("JISX0201 Roman"),
3826 build_string ("JISX0201.1976 (Japanese Roman)"),
3827 build_string ("JISX0201.1976 Japanese Roman"),
3828 build_string ("jisx0201\\.1976"),
3830 staticpro (&Vcharset_cyrillic_iso8859_5);
3831 Vcharset_cyrillic_iso8859_5 =
3832 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3833 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3834 build_string ("ISO8859-5"),
3835 build_string ("ISO8859-5 (Cyrillic)"),
3836 build_string ("ISO8859-5 (Cyrillic)"),
3837 build_string ("iso8859-5"),
3839 staticpro (&Vcharset_latin_iso8859_9);
3840 Vcharset_latin_iso8859_9 =
3841 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3842 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3843 build_string ("Latin-5"),
3844 build_string ("ISO8859-9 (Latin-5)"),
3845 build_string ("ISO8859-9 (Latin-5)"),
3846 build_string ("iso8859-9"),
3848 staticpro (&Vcharset_japanese_jisx0208_1978);
3849 Vcharset_japanese_jisx0208_1978 =
3850 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3851 Qjapanese_jisx0208_1978, 94, 2,
3852 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3853 build_string ("JIS X0208:1978"),
3854 build_string ("JIS X0208:1978 (Japanese)"),
3856 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3857 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3859 staticpro (&Vcharset_chinese_gb2312);
3860 Vcharset_chinese_gb2312 =
3861 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3862 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3863 build_string ("GB2312"),
3864 build_string ("GB2312)"),
3865 build_string ("GB2312 Chinese simplified"),
3866 build_string ("gb2312"),
3868 staticpro (&Vcharset_chinese_gb12345);
3869 Vcharset_chinese_gb12345 =
3870 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
3871 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3872 build_string ("G1"),
3873 build_string ("GB 12345)"),
3874 build_string ("GB 12345-1990"),
3875 build_string ("GB12345\\(\\.1990\\)?-0"),
3877 staticpro (&Vcharset_japanese_jisx0208);
3878 Vcharset_japanese_jisx0208 =
3879 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3880 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3881 build_string ("JISX0208"),
3882 build_string ("JIS X0208:1983 (Japanese)"),
3883 build_string ("JIS X0208:1983 Japanese Kanji"),
3884 build_string ("jisx0208\\.1983"),
3887 staticpro (&Vcharset_japanese_jisx0208_1990);
3888 Vcharset_japanese_jisx0208_1990 =
3889 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3890 Qjapanese_jisx0208_1990, 94, 2,
3891 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3892 build_string ("JISX0208-1990"),
3893 build_string ("JIS X0208:1990 (Japanese)"),
3894 build_string ("JIS X0208:1990 Japanese Kanji"),
3895 build_string ("jisx0208\\.1990"),
3897 MIN_CHAR_JIS_X0208_1990,
3898 MAX_CHAR_JIS_X0208_1990, 0, 33);
3900 staticpro (&Vcharset_korean_ksc5601);
3901 Vcharset_korean_ksc5601 =
3902 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3903 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3904 build_string ("KSC5601"),
3905 build_string ("KSC5601 (Korean"),
3906 build_string ("KSC5601 Korean Hangul and Hanja"),
3907 build_string ("ksc5601"),
3909 staticpro (&Vcharset_japanese_jisx0212);
3910 Vcharset_japanese_jisx0212 =
3911 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3912 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3913 build_string ("JISX0212"),
3914 build_string ("JISX0212 (Japanese)"),
3915 build_string ("JISX0212 Japanese Supplement"),
3916 build_string ("jisx0212"),
3919 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3920 staticpro (&Vcharset_chinese_cns11643_1);
3921 Vcharset_chinese_cns11643_1 =
3922 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3923 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3924 build_string ("CNS11643-1"),
3925 build_string ("CNS11643-1 (Chinese traditional)"),
3927 ("CNS 11643 Plane 1 Chinese traditional"),
3928 build_string (CHINESE_CNS_PLANE_RE("1")),
3930 staticpro (&Vcharset_chinese_cns11643_2);
3931 Vcharset_chinese_cns11643_2 =
3932 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3933 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3934 build_string ("CNS11643-2"),
3935 build_string ("CNS11643-2 (Chinese traditional)"),
3937 ("CNS 11643 Plane 2 Chinese traditional"),
3938 build_string (CHINESE_CNS_PLANE_RE("2")),
3941 staticpro (&Vcharset_latin_tcvn5712);
3942 Vcharset_latin_tcvn5712 =
3943 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3944 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3945 build_string ("TCVN 5712"),
3946 build_string ("TCVN 5712 (VSCII-2)"),
3947 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3948 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3950 staticpro (&Vcharset_latin_viscii_lower);
3951 Vcharset_latin_viscii_lower =
3952 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3953 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3954 build_string ("VISCII lower"),
3955 build_string ("VISCII lower (Vietnamese)"),
3956 build_string ("VISCII lower (Vietnamese)"),
3957 build_string ("MULEVISCII-LOWER"),
3959 staticpro (&Vcharset_latin_viscii_upper);
3960 Vcharset_latin_viscii_upper =
3961 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3962 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3963 build_string ("VISCII upper"),
3964 build_string ("VISCII upper (Vietnamese)"),
3965 build_string ("VISCII upper (Vietnamese)"),
3966 build_string ("MULEVISCII-UPPER"),
3968 staticpro (&Vcharset_latin_viscii);
3969 Vcharset_latin_viscii =
3970 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3971 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3972 build_string ("VISCII"),
3973 build_string ("VISCII 1.1 (Vietnamese)"),
3974 build_string ("VISCII 1.1 (Vietnamese)"),
3975 build_string ("VISCII1\\.1"),
3977 staticpro (&Vcharset_chinese_big5);
3978 Vcharset_chinese_big5 =
3979 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3980 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3981 build_string ("Big5"),
3982 build_string ("Big5"),
3983 build_string ("Big5 Chinese traditional"),
3984 build_string ("big5"),
3986 staticpro (&Vcharset_chinese_big5_cdp);
3987 Vcharset_chinese_big5_cdp =
3988 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
3989 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3990 build_string ("Big5-CDP"),
3991 build_string ("Big5 + CDP extension"),
3992 build_string ("Big5 with CDP extension"),
3993 build_string ("big5\\.cdp-0"),
3995 staticpro (&Vcharset_ideograph_gt);
3996 Vcharset_ideograph_gt =
3997 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
3998 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3999 build_string ("GT"),
4000 build_string ("GT"),
4001 build_string ("GT"),
4003 Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0);
4004 #define DEF_GT_PJ(n) \
4005 staticpro (&Vcharset_ideograph_gt_pj_##n); \
4006 Vcharset_ideograph_gt_pj_##n = \
4007 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
4008 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
4009 build_string ("GT-PJ-"#n), \
4010 build_string ("GT (pseudo JIS encoding) part "#n), \
4011 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
4013 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
4027 staticpro (&Vcharset_ideograph_daikanwa);
4028 Vcharset_ideograph_daikanwa =
4029 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
4030 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4031 build_string ("Daikanwa"),
4032 build_string ("Morohashi's Daikanwa"),
4033 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
4034 build_string ("Daikanwa"),
4035 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
4036 staticpro (&Vcharset_mojikyo);
4038 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
4039 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4040 build_string ("Mojikyo"),
4041 build_string ("Mojikyo"),
4042 build_string ("Konjaku-Mojikyo"),
4044 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
4045 staticpro (&Vcharset_mojikyo_2022_1);
4046 Vcharset_mojikyo_2022_1 =
4047 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
4048 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
4049 build_string ("Mojikyo-2022-1"),
4050 build_string ("Mojikyo ISO-2022 Part 1"),
4051 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
4055 #define DEF_MOJIKYO_PJ(n) \
4056 staticpro (&Vcharset_mojikyo_pj_##n); \
4057 Vcharset_mojikyo_pj_##n = \
4058 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
4059 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
4060 build_string ("Mojikyo-PJ-"#n), \
4061 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
4063 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
4065 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
4077 DEF_MOJIKYO_PJ (10);
4078 DEF_MOJIKYO_PJ (11);
4079 DEF_MOJIKYO_PJ (12);
4080 DEF_MOJIKYO_PJ (13);
4081 DEF_MOJIKYO_PJ (14);
4082 DEF_MOJIKYO_PJ (15);
4083 DEF_MOJIKYO_PJ (16);
4084 DEF_MOJIKYO_PJ (17);
4085 DEF_MOJIKYO_PJ (18);
4086 DEF_MOJIKYO_PJ (19);
4087 DEF_MOJIKYO_PJ (20);
4088 DEF_MOJIKYO_PJ (21);
4090 staticpro (&Vcharset_ethiopic_ucs);
4091 Vcharset_ethiopic_ucs =
4092 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
4093 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4094 build_string ("Ethiopic (UCS)"),
4095 build_string ("Ethiopic (UCS)"),
4096 build_string ("Ethiopic of UCS"),
4097 build_string ("Ethiopic-Unicode"),
4098 Qnil, 0x1200, 0x137F, 0x1200, 0);
4100 staticpro (&Vcharset_chinese_big5_1);
4101 Vcharset_chinese_big5_1 =
4102 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
4103 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
4104 build_string ("Big5"),
4105 build_string ("Big5 (Level-1)"),
4107 ("Big5 Level-1 Chinese traditional"),
4108 build_string ("big5"),
4110 staticpro (&Vcharset_chinese_big5_2);
4111 Vcharset_chinese_big5_2 =
4112 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
4113 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
4114 build_string ("Big5"),
4115 build_string ("Big5 (Level-2)"),
4117 ("Big5 Level-2 Chinese traditional"),
4118 build_string ("big5"),
4121 #ifdef ENABLE_COMPOSITE_CHARS
4122 /* #### For simplicity, we put composite chars into a 96x96 charset.
4123 This is going to lead to problems because you can run out of
4124 room, esp. as we don't yet recycle numbers. */
4125 staticpro (&Vcharset_composite);
4126 Vcharset_composite =
4127 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
4128 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4129 build_string ("Composite"),
4130 build_string ("Composite characters"),
4131 build_string ("Composite characters"),
4134 /* #### not dumped properly */
4135 composite_char_row_next = 32;
4136 composite_char_col_next = 32;
4138 Vcomposite_char_string2char_hash_table =
4139 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
4140 Vcomposite_char_char2string_hash_table =
4141 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
4142 staticpro (&Vcomposite_char_string2char_hash_table);
4143 staticpro (&Vcomposite_char_char2string_hash_table);
4144 #endif /* ENABLE_COMPOSITE_CHARS */