1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_cns;
70 Lisp_Object Vcharset_latin_viscii;
71 Lisp_Object Vcharset_latin_tcvn5712;
72 Lisp_Object Vcharset_latin_viscii_lower;
73 Lisp_Object Vcharset_latin_viscii_upper;
74 Lisp_Object Vcharset_chinese_big5;
75 Lisp_Object Vcharset_ideograph_gt;
76 Lisp_Object Vcharset_ideograph_gt_pj_1;
77 Lisp_Object Vcharset_ideograph_gt_pj_2;
78 Lisp_Object Vcharset_ideograph_gt_pj_3;
79 Lisp_Object Vcharset_ideograph_gt_pj_4;
80 Lisp_Object Vcharset_ideograph_gt_pj_5;
81 Lisp_Object Vcharset_ideograph_gt_pj_6;
82 Lisp_Object Vcharset_ideograph_gt_pj_7;
83 Lisp_Object Vcharset_ideograph_gt_pj_8;
84 Lisp_Object Vcharset_ideograph_gt_pj_9;
85 Lisp_Object Vcharset_ideograph_gt_pj_10;
86 Lisp_Object Vcharset_ideograph_gt_pj_11;
87 Lisp_Object Vcharset_ideograph_daikanwa;
88 Lisp_Object Vcharset_mojikyo;
89 Lisp_Object Vcharset_mojikyo_2022_1;
90 Lisp_Object Vcharset_mojikyo_pj_1;
91 Lisp_Object Vcharset_mojikyo_pj_2;
92 Lisp_Object Vcharset_mojikyo_pj_3;
93 Lisp_Object Vcharset_mojikyo_pj_4;
94 Lisp_Object Vcharset_mojikyo_pj_5;
95 Lisp_Object Vcharset_mojikyo_pj_6;
96 Lisp_Object Vcharset_mojikyo_pj_7;
97 Lisp_Object Vcharset_mojikyo_pj_8;
98 Lisp_Object Vcharset_mojikyo_pj_9;
99 Lisp_Object Vcharset_mojikyo_pj_10;
100 Lisp_Object Vcharset_mojikyo_pj_11;
101 Lisp_Object Vcharset_mojikyo_pj_12;
102 Lisp_Object Vcharset_mojikyo_pj_13;
103 Lisp_Object Vcharset_mojikyo_pj_14;
104 Lisp_Object Vcharset_mojikyo_pj_15;
105 Lisp_Object Vcharset_mojikyo_pj_16;
106 Lisp_Object Vcharset_mojikyo_pj_17;
107 Lisp_Object Vcharset_mojikyo_pj_18;
108 Lisp_Object Vcharset_mojikyo_pj_19;
109 Lisp_Object Vcharset_mojikyo_pj_20;
110 Lisp_Object Vcharset_mojikyo_pj_21;
111 Lisp_Object Vcharset_ethiopic_ucs;
113 Lisp_Object Vcharset_chinese_big5_1;
114 Lisp_Object Vcharset_chinese_big5_2;
116 #ifdef ENABLE_COMPOSITE_CHARS
117 Lisp_Object Vcharset_composite;
119 /* Hash tables for composite chars. One maps string representing
120 composed chars to their equivalent chars; one goes the
122 Lisp_Object Vcomposite_char_char2string_hash_table;
123 Lisp_Object Vcomposite_char_string2char_hash_table;
125 static int composite_char_row_next;
126 static int composite_char_col_next;
128 #endif /* ENABLE_COMPOSITE_CHARS */
130 struct charset_lookup *chlook;
132 static const struct lrecord_description charset_lookup_description_1[] = {
133 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
142 static const struct struct_description charset_lookup_description = {
143 sizeof (struct charset_lookup),
144 charset_lookup_description_1
148 /* Table of number of bytes in the string representation of a character
149 indexed by the first byte of that representation.
151 rep_bytes_by_first_byte(c) is more efficient than the equivalent
152 canonical computation:
154 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
156 const Bytecount rep_bytes_by_first_byte[0xA0] =
157 { /* 0x00 - 0x7f are for straight ASCII */
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
165 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
166 /* 0x80 - 0x8f are for Dimension-1 official charsets */
168 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
170 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
172 /* 0x90 - 0x9d are for Dimension-2 official charsets */
173 /* 0x9e is for Dimension-1 private charsets */
174 /* 0x9f is for Dimension-2 private charsets */
175 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
181 #define BT_UINT8_MIN 0
182 #define BT_UINT8_MAX (UCHAR_MAX - 3)
183 #define BT_UINT8_t (UCHAR_MAX - 2)
184 #define BT_UINT8_nil (UCHAR_MAX - 1)
185 #define BT_UINT8_unbound UCHAR_MAX
187 INLINE_HEADER int INT_UINT8_P (Lisp_Object obj);
188 INLINE_HEADER int UINT8_VALUE_P (Lisp_Object obj);
189 INLINE_HEADER unsigned char UINT8_ENCODE (Lisp_Object obj);
190 INLINE_HEADER Lisp_Object UINT8_DECODE (unsigned char n);
191 INLINE_HEADER unsigned short UINT8_TO_UINT16 (unsigned char n);
194 INT_UINT8_P (Lisp_Object obj)
198 int num = XINT (obj);
200 return (BT_UINT8_MIN <= num) && (num <= BT_UINT8_MAX);
207 UINT8_VALUE_P (Lisp_Object obj)
209 return EQ (obj, Qunbound)
210 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT8_P (obj);
213 INLINE_HEADER unsigned char
214 UINT8_ENCODE (Lisp_Object obj)
216 if (EQ (obj, Qunbound))
217 return BT_UINT8_unbound;
218 else if (EQ (obj, Qnil))
220 else if (EQ (obj, Qt))
226 INLINE_HEADER Lisp_Object
227 UINT8_DECODE (unsigned char n)
229 if (n == BT_UINT8_unbound)
231 else if (n == BT_UINT8_nil)
233 else if (n == BT_UINT8_t)
240 mark_uint8_byte_table (Lisp_Object obj)
246 print_uint8_byte_table (Lisp_Object obj,
247 Lisp_Object printcharfun, int escapeflag)
249 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
251 struct gcpro gcpro1, gcpro2;
252 GCPRO2 (obj, printcharfun);
254 write_c_string ("\n#<uint8-byte-table", printcharfun);
255 for (i = 0; i < 256; i++)
257 unsigned char n = bte->property[i];
259 write_c_string ("\n ", printcharfun);
260 write_c_string (" ", printcharfun);
261 if (n == BT_UINT8_unbound)
262 write_c_string ("void", printcharfun);
263 else if (n == BT_UINT8_nil)
264 write_c_string ("nil", printcharfun);
265 else if (n == BT_UINT8_t)
266 write_c_string ("t", printcharfun);
271 sprintf (buf, "%hd", n);
272 write_c_string (buf, printcharfun);
276 write_c_string (">", printcharfun);
280 uint8_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
282 Lisp_Uint8_Byte_Table *te1 = XUINT8_BYTE_TABLE (obj1);
283 Lisp_Uint8_Byte_Table *te2 = XUINT8_BYTE_TABLE (obj2);
286 for (i = 0; i < 256; i++)
287 if (te1->property[i] != te2->property[i])
293 uint8_byte_table_hash (Lisp_Object obj, int depth)
295 Lisp_Uint8_Byte_Table *te = XUINT8_BYTE_TABLE (obj);
299 for (i = 0; i < 256; i++)
300 hash = HASH2 (hash, te->property[i]);
304 DEFINE_LRECORD_IMPLEMENTATION ("uint8-byte-table", uint8_byte_table,
305 mark_uint8_byte_table,
306 print_uint8_byte_table,
307 0, uint8_byte_table_equal,
308 uint8_byte_table_hash,
309 0 /* uint8_byte_table_description */,
310 Lisp_Uint8_Byte_Table);
313 make_uint8_byte_table (unsigned char initval)
317 Lisp_Uint8_Byte_Table *cte;
319 cte = alloc_lcrecord_type (Lisp_Uint8_Byte_Table,
320 &lrecord_uint8_byte_table);
322 for (i = 0; i < 256; i++)
323 cte->property[i] = initval;
325 XSETUINT8_BYTE_TABLE (obj, cte);
330 uint8_byte_table_same_value_p (Lisp_Object obj)
332 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
333 unsigned char v0 = bte->property[0];
336 for (i = 1; i < 256; i++)
338 if (bte->property[i] != v0)
345 #define BT_UINT16_MIN 0
346 #define BT_UINT16_MAX (USHRT_MAX - 3)
347 #define BT_UINT16_t (USHRT_MAX - 2)
348 #define BT_UINT16_nil (USHRT_MAX - 1)
349 #define BT_UINT16_unbound USHRT_MAX
351 INLINE_HEADER int INT_UINT16_P (Lisp_Object obj);
352 INLINE_HEADER int UINT16_VALUE_P (Lisp_Object obj);
353 INLINE_HEADER unsigned short UINT16_ENCODE (Lisp_Object obj);
354 INLINE_HEADER Lisp_Object UINT16_DECODE (unsigned short us);
357 INT_UINT16_P (Lisp_Object obj)
361 int num = XINT (obj);
363 return (BT_UINT16_MIN <= num) && (num <= BT_UINT16_MAX);
370 UINT16_VALUE_P (Lisp_Object obj)
372 return EQ (obj, Qunbound)
373 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT16_P (obj);
376 INLINE_HEADER unsigned short
377 UINT16_ENCODE (Lisp_Object obj)
379 if (EQ (obj, Qunbound))
380 return BT_UINT16_unbound;
381 else if (EQ (obj, Qnil))
382 return BT_UINT16_nil;
383 else if (EQ (obj, Qt))
389 INLINE_HEADER Lisp_Object
390 UINT16_DECODE (unsigned short n)
392 if (n == BT_UINT16_unbound)
394 else if (n == BT_UINT16_nil)
396 else if (n == BT_UINT16_t)
402 INLINE_HEADER unsigned short
403 UINT8_TO_UINT16 (unsigned char n)
405 if (n == BT_UINT8_unbound)
406 return BT_UINT16_unbound;
407 else if (n == BT_UINT8_nil)
408 return BT_UINT16_nil;
409 else if (n == BT_UINT8_t)
416 mark_uint16_byte_table (Lisp_Object obj)
422 print_uint16_byte_table (Lisp_Object obj,
423 Lisp_Object printcharfun, int escapeflag)
425 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
427 struct gcpro gcpro1, gcpro2;
428 GCPRO2 (obj, printcharfun);
430 write_c_string ("\n#<uint16-byte-table", printcharfun);
431 for (i = 0; i < 256; i++)
433 unsigned short n = bte->property[i];
435 write_c_string ("\n ", printcharfun);
436 write_c_string (" ", printcharfun);
437 if (n == BT_UINT16_unbound)
438 write_c_string ("void", printcharfun);
439 else if (n == BT_UINT16_nil)
440 write_c_string ("nil", printcharfun);
441 else if (n == BT_UINT16_t)
442 write_c_string ("t", printcharfun);
447 sprintf (buf, "%hd", n);
448 write_c_string (buf, printcharfun);
452 write_c_string (">", printcharfun);
456 uint16_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
458 Lisp_Uint16_Byte_Table *te1 = XUINT16_BYTE_TABLE (obj1);
459 Lisp_Uint16_Byte_Table *te2 = XUINT16_BYTE_TABLE (obj2);
462 for (i = 0; i < 256; i++)
463 if (te1->property[i] != te2->property[i])
469 uint16_byte_table_hash (Lisp_Object obj, int depth)
471 Lisp_Uint16_Byte_Table *te = XUINT16_BYTE_TABLE (obj);
475 for (i = 0; i < 256; i++)
476 hash = HASH2 (hash, te->property[i]);
480 DEFINE_LRECORD_IMPLEMENTATION ("uint16-byte-table", uint16_byte_table,
481 mark_uint16_byte_table,
482 print_uint16_byte_table,
483 0, uint16_byte_table_equal,
484 uint16_byte_table_hash,
485 0 /* uint16_byte_table_description */,
486 Lisp_Uint16_Byte_Table);
489 make_uint16_byte_table (unsigned short initval)
493 Lisp_Uint16_Byte_Table *cte;
495 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
496 &lrecord_uint16_byte_table);
498 for (i = 0; i < 256; i++)
499 cte->property[i] = initval;
501 XSETUINT16_BYTE_TABLE (obj, cte);
506 expand_uint8_byte_table_to_uint16 (Lisp_Object table)
510 Lisp_Uint8_Byte_Table* bte = XUINT8_BYTE_TABLE(table);
511 Lisp_Uint16_Byte_Table* cte;
513 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
514 &lrecord_uint16_byte_table);
515 for (i = 0; i < 256; i++)
517 cte->property[i] = UINT8_TO_UINT16 (bte->property[i]);
519 XSETUINT16_BYTE_TABLE (obj, cte);
524 uint16_byte_table_same_value_p (Lisp_Object obj)
526 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
527 unsigned short v0 = bte->property[0];
530 for (i = 1; i < 256; i++)
532 if (bte->property[i] != v0)
540 mark_byte_table (Lisp_Object obj)
542 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
545 for (i = 0; i < 256; i++)
547 mark_object (cte->property[i]);
553 print_byte_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
555 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
557 struct gcpro gcpro1, gcpro2;
558 GCPRO2 (obj, printcharfun);
560 write_c_string ("\n#<byte-table", printcharfun);
561 for (i = 0; i < 256; i++)
563 Lisp_Object elt = bte->property[i];
565 write_c_string ("\n ", printcharfun);
566 write_c_string (" ", printcharfun);
567 if (EQ (elt, Qunbound))
568 write_c_string ("void", printcharfun);
570 print_internal (elt, printcharfun, escapeflag);
573 write_c_string (">", printcharfun);
577 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
579 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
580 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
583 for (i = 0; i < 256; i++)
584 if (BYTE_TABLE_P (cte1->property[i]))
586 if (BYTE_TABLE_P (cte2->property[i]))
588 if (!byte_table_equal (cte1->property[i],
589 cte2->property[i], depth + 1))
596 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
602 byte_table_hash (Lisp_Object obj, int depth)
604 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
606 return internal_array_hash (cte->property, 256, depth);
609 static const struct lrecord_description byte_table_description[] = {
610 { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Byte_Table, property), 256 },
614 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
619 byte_table_description,
623 make_byte_table (Lisp_Object initval)
627 Lisp_Byte_Table *cte;
629 cte = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
631 for (i = 0; i < 256; i++)
632 cte->property[i] = initval;
634 XSETBYTE_TABLE (obj, cte);
639 byte_table_same_value_p (Lisp_Object obj)
641 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
642 Lisp_Object v0 = bte->property[0];
645 for (i = 1; i < 256; i++)
647 if (!internal_equal (bte->property[i], v0, 0))
654 Lisp_Object get_byte_table (Lisp_Object table, unsigned char idx);
655 Lisp_Object put_byte_table (Lisp_Object table, unsigned char idx,
659 get_byte_table (Lisp_Object table, unsigned char idx)
661 if (UINT8_BYTE_TABLE_P (table))
662 return UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[idx]);
663 else if (UINT16_BYTE_TABLE_P (table))
664 return UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[idx]);
665 else if (BYTE_TABLE_P (table))
666 return XBYTE_TABLE(table)->property[idx];
672 put_byte_table (Lisp_Object table, unsigned char idx, Lisp_Object value)
674 if (UINT8_BYTE_TABLE_P (table))
676 if (UINT8_VALUE_P (value))
678 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
679 if (!UINT8_BYTE_TABLE_P (value) &&
680 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
681 && uint8_byte_table_same_value_p (table))
686 else if (UINT16_VALUE_P (value))
688 Lisp_Object new = expand_uint8_byte_table_to_uint16 (table);
690 XUINT16_BYTE_TABLE(new)->property[idx] = UINT16_ENCODE (value);
695 Lisp_Object new = make_byte_table (Qnil);
698 for (i = 0; i < 256; i++)
700 XBYTE_TABLE(new)->property[i]
701 = UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[i]);
703 XBYTE_TABLE(new)->property[idx] = value;
707 else if (UINT16_BYTE_TABLE_P (table))
709 if (UINT16_VALUE_P (value))
711 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
712 if (!UINT8_BYTE_TABLE_P (value) &&
713 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
714 && uint16_byte_table_same_value_p (table))
721 Lisp_Object new = make_byte_table (Qnil);
724 for (i = 0; i < 256; i++)
726 XBYTE_TABLE(new)->property[i]
727 = UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[i]);
729 XBYTE_TABLE(new)->property[idx] = value;
733 else if (BYTE_TABLE_P (table))
735 XBYTE_TABLE(table)->property[idx] = value;
736 if (!UINT8_BYTE_TABLE_P (value) &&
737 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
738 && byte_table_same_value_p (table))
743 else if (!internal_equal (table, value, 0))
745 if (UINT8_VALUE_P (table) && UINT8_VALUE_P (value))
747 table = make_uint8_byte_table (UINT8_ENCODE (table));
748 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
750 else if (UINT16_VALUE_P (table) && UINT16_VALUE_P (value))
752 table = make_uint16_byte_table (UINT16_ENCODE (table));
753 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
757 table = make_byte_table (table);
758 XBYTE_TABLE(table)->property[idx] = value;
765 mark_char_id_table (Lisp_Object obj)
767 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
773 print_char_id_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
775 Lisp_Object table = XCHAR_ID_TABLE (obj)->table;
777 struct gcpro gcpro1, gcpro2;
778 GCPRO2 (obj, printcharfun);
780 write_c_string ("#<char-id-table ", printcharfun);
781 for (i = 0; i < 256; i++)
783 Lisp_Object elt = get_byte_table (table, i);
784 if (i != 0) write_c_string ("\n ", printcharfun);
785 if (EQ (elt, Qunbound))
786 write_c_string ("void", printcharfun);
788 print_internal (elt, printcharfun, escapeflag);
791 write_c_string (">", printcharfun);
795 char_id_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
797 Lisp_Object table1 = XCHAR_ID_TABLE (obj1)->table;
798 Lisp_Object table2 = XCHAR_ID_TABLE (obj2)->table;
801 for (i = 0; i < 256; i++)
803 if (!internal_equal (get_byte_table (table1, i),
804 get_byte_table (table2, i), 0))
811 char_id_table_hash (Lisp_Object obj, int depth)
813 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
815 return char_id_table_hash (cte->table, depth + 1);
818 static const struct lrecord_description char_id_table_description[] = {
819 { XD_LISP_OBJECT, offsetof(Lisp_Char_ID_Table, table) },
823 DEFINE_LRECORD_IMPLEMENTATION ("char-id-table", char_id_table,
826 0, char_id_table_equal,
828 char_id_table_description,
832 make_char_id_table (Lisp_Object initval)
835 Lisp_Char_ID_Table *cte;
837 cte = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
839 cte->table = make_byte_table (initval);
841 XSETCHAR_ID_TABLE (obj, cte);
847 get_char_id_table (Emchar ch, Lisp_Object table)
849 unsigned int code = ch;
856 (XCHAR_ID_TABLE (table)->table,
857 (unsigned char)(code >> 24)),
858 (unsigned char) (code >> 16)),
859 (unsigned char) (code >> 8)),
860 (unsigned char) code);
863 void put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table);
865 put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table)
867 unsigned int code = ch;
868 Lisp_Object table1, table2, table3, table4;
870 table1 = XCHAR_ID_TABLE (table)->table;
871 table2 = get_byte_table (table1, (unsigned char)(code >> 24));
872 table3 = get_byte_table (table2, (unsigned char)(code >> 16));
873 table4 = get_byte_table (table3, (unsigned char)(code >> 8));
875 table4 = put_byte_table (table4, (unsigned char)code, value);
876 table3 = put_byte_table (table3, (unsigned char)(code >> 8), table4);
877 table2 = put_byte_table (table2, (unsigned char)(code >> 16), table3);
878 XCHAR_ID_TABLE (table)->table
879 = put_byte_table (table1, (unsigned char)(code >> 24), table2);
883 Lisp_Object Vchar_attribute_hash_table;
884 Lisp_Object Vcharacter_composition_table;
885 Lisp_Object Vcharacter_variant_table;
887 Lisp_Object Qideograph_daikanwa;
888 Lisp_Object Q_decomposition;
892 Lisp_Object Qisolated;
893 Lisp_Object Qinitial;
896 Lisp_Object Qvertical;
897 Lisp_Object QnoBreak;
898 Lisp_Object Qfraction;
908 Emchar to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
910 Lisp_Object put_char_ccs_code_point (Lisp_Object character,
911 Lisp_Object ccs, Lisp_Object value);
912 Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
915 to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
921 else if (EQ (v, Qcompat))
923 else if (EQ (v, Qisolated))
925 else if (EQ (v, Qinitial))
927 else if (EQ (v, Qmedial))
929 else if (EQ (v, Qfinal))
931 else if (EQ (v, Qvertical))
933 else if (EQ (v, QnoBreak))
935 else if (EQ (v, Qfraction))
937 else if (EQ (v, Qsuper))
939 else if (EQ (v, Qsub))
941 else if (EQ (v, Qcircle))
943 else if (EQ (v, Qsquare))
945 else if (EQ (v, Qwide))
947 else if (EQ (v, Qnarrow))
949 else if (EQ (v, Qsmall))
951 else if (EQ (v, Qfont))
954 signal_simple_error (err_msg, err_arg);
957 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
958 Return character corresponding with list.
962 Lisp_Object table = Vcharacter_composition_table;
963 Lisp_Object rest = list;
967 Lisp_Object v = Fcar (rest);
969 Emchar c = to_char_id (v, "Invalid value for composition", list);
971 ret = get_char_id_table (c, table);
976 if (!CHAR_ID_TABLE_P (ret))
981 else if (!CONSP (rest))
983 else if (CHAR_ID_TABLE_P (ret))
986 signal_simple_error ("Invalid table is found with", list);
988 signal_simple_error ("Invalid value for composition", list);
991 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
992 Return variants of CHARACTER.
996 CHECK_CHAR (character);
997 return Fcopy_list (get_char_id_table (XCHAR (character),
998 Vcharacter_variant_table));
1002 /* We store the char-attributes in hash tables with the names as the
1003 key and the actual char-id-table object as the value. Occasionally
1004 we need to use them in a list format. These routines provide us
1006 struct char_attribute_list_closure
1008 Lisp_Object *char_attribute_list;
1012 add_char_attribute_to_list_mapper (Lisp_Object key, Lisp_Object value,
1013 void *char_attribute_list_closure)
1015 /* This function can GC */
1016 struct char_attribute_list_closure *calcl
1017 = (struct char_attribute_list_closure*) char_attribute_list_closure;
1018 Lisp_Object *char_attribute_list = calcl->char_attribute_list;
1020 *char_attribute_list = Fcons (key, *char_attribute_list);
1024 DEFUN ("char-attribute-list", Fchar_attribute_list, 0, 0, 0, /*
1025 Return the list of all existing character attributes except coded-charsets.
1029 Lisp_Object char_attribute_list = Qnil;
1030 struct gcpro gcpro1;
1031 struct char_attribute_list_closure char_attribute_list_closure;
1033 GCPRO1 (char_attribute_list);
1034 char_attribute_list_closure.char_attribute_list = &char_attribute_list;
1035 elisp_maphash (add_char_attribute_to_list_mapper,
1036 Vchar_attribute_hash_table,
1037 &char_attribute_list_closure);
1039 return char_attribute_list;
1042 DEFUN ("find-char-attribute-table", Ffind_char_attribute_table, 1, 1, 0, /*
1043 Return char-id-table corresponding to ATTRIBUTE.
1047 return Fgethash (attribute, Vchar_attribute_hash_table, Qnil);
1051 /* We store the char-id-tables in hash tables with the attributes as
1052 the key and the actual char-id-table object as the value. Each
1053 char-id-table stores values of an attribute corresponding with
1054 characters. Occasionally we need to get attributes of a character
1055 in a association-list format. These routines provide us with
1057 struct char_attribute_alist_closure
1060 Lisp_Object *char_attribute_alist;
1064 add_char_attribute_alist_mapper (Lisp_Object key, Lisp_Object value,
1065 void *char_attribute_alist_closure)
1067 /* This function can GC */
1068 struct char_attribute_alist_closure *caacl =
1069 (struct char_attribute_alist_closure*) char_attribute_alist_closure;
1070 Lisp_Object ret = get_char_id_table (caacl->char_id, value);
1071 if (!UNBOUNDP (ret))
1073 Lisp_Object *char_attribute_alist = caacl->char_attribute_alist;
1074 *char_attribute_alist
1075 = Fcons (Fcons (key, ret), *char_attribute_alist);
1080 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
1081 Return the alist of attributes of CHARACTER.
1085 Lisp_Object alist = Qnil;
1088 CHECK_CHAR (character);
1090 struct gcpro gcpro1;
1091 struct char_attribute_alist_closure char_attribute_alist_closure;
1094 char_attribute_alist_closure.char_id = XCHAR (character);
1095 char_attribute_alist_closure.char_attribute_alist = &alist;
1096 elisp_maphash (add_char_attribute_alist_mapper,
1097 Vchar_attribute_hash_table,
1098 &char_attribute_alist_closure);
1102 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1104 Lisp_Object ccs = chlook->charset_by_leading_byte[i];
1108 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1111 if ( CHAR_ID_TABLE_P (encoding_table)
1112 && INTP (cpos = get_char_id_table (XCHAR (character),
1115 alist = Fcons (Fcons (ccs, cpos), alist);
1122 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 3, 0, /*
1123 Return the value of CHARACTER's ATTRIBUTE.
1124 Return DEFAULT-VALUE if the value is not exist.
1126 (character, attribute, default_value))
1130 CHECK_CHAR (character);
1131 if (!NILP (ccs = Ffind_charset (attribute)))
1133 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1135 if (CHAR_ID_TABLE_P (encoding_table))
1136 return get_char_id_table (XCHAR (character), encoding_table);
1140 Lisp_Object table = Fgethash (attribute,
1141 Vchar_attribute_hash_table,
1143 if (!UNBOUNDP (table))
1145 Lisp_Object ret = get_char_id_table (XCHAR (character), table);
1146 if (!UNBOUNDP (ret))
1150 return default_value;
1153 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
1154 Store CHARACTER's ATTRIBUTE with VALUE.
1156 (character, attribute, value))
1160 CHECK_CHAR (character);
1161 ccs = Ffind_charset (attribute);
1164 return put_char_ccs_code_point (character, ccs, value);
1166 else if (EQ (attribute, Q_decomposition))
1171 signal_simple_error ("Invalid value for ->decomposition",
1174 if (CONSP (Fcdr (value)))
1176 Lisp_Object rest = value;
1177 Lisp_Object table = Vcharacter_composition_table;
1181 GET_EXTERNAL_LIST_LENGTH (rest, len);
1182 seq = make_vector (len, Qnil);
1184 while (CONSP (rest))
1186 Lisp_Object v = Fcar (rest);
1189 = to_char_id (v, "Invalid value for ->decomposition", value);
1192 XVECTOR_DATA(seq)[i++] = v;
1194 XVECTOR_DATA(seq)[i++] = make_char (c);
1198 put_char_id_table (c, character, table);
1203 ntable = get_char_id_table (c, table);
1204 if (!CHAR_ID_TABLE_P (ntable))
1206 ntable = make_char_id_table (Qnil);
1207 put_char_id_table (c, ntable, table);
1215 Lisp_Object v = Fcar (value);
1219 Emchar c = XINT (v);
1221 = get_char_id_table (c, Vcharacter_variant_table);
1223 if (NILP (Fmemq (v, ret)))
1225 put_char_id_table (c, Fcons (character, ret),
1226 Vcharacter_variant_table);
1229 seq = make_vector (1, v);
1233 else if (EQ (attribute, Q_ucs))
1239 signal_simple_error ("Invalid value for ->ucs", value);
1243 ret = get_char_id_table (c, Vcharacter_variant_table);
1244 if (NILP (Fmemq (character, ret)))
1246 put_char_id_table (c, Fcons (character, ret),
1247 Vcharacter_variant_table);
1251 Lisp_Object table = Fgethash (attribute,
1252 Vchar_attribute_hash_table,
1257 table = make_char_id_table (Qunbound);
1258 Fputhash (attribute, table, Vchar_attribute_hash_table);
1260 put_char_id_table (XCHAR (character), value, table);
1265 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
1266 Remove CHARACTER's ATTRIBUTE.
1268 (character, attribute))
1272 CHECK_CHAR (character);
1273 ccs = Ffind_charset (attribute);
1276 return remove_char_ccs (character, ccs);
1280 Lisp_Object table = Fgethash (attribute,
1281 Vchar_attribute_hash_table,
1283 if (!UNBOUNDP (table))
1285 put_char_id_table (XCHAR (character), Qunbound, table);
1292 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
1294 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
1296 /* ad-hoc method for `ascii' */
1297 if ((CHARSET_CHARS (cs) == 94) &&
1298 (CHARSET_BYTE_OFFSET (cs) != 33))
1299 return 128 - CHARSET_BYTE_OFFSET (cs);
1301 return CHARSET_CHARS (cs);
1304 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
1306 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
1308 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
1312 if (XVECTOR_LENGTH (v) > ccs_len)
1315 for (i = 0; i < XVECTOR_LENGTH (v); i++)
1317 Lisp_Object c = XVECTOR_DATA(v)[i];
1319 if (!NILP (c) && !CHARP (c))
1323 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
1335 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1338 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1348 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1349 nv = XVECTOR_DATA(v)[i];
1355 XVECTOR_DATA(v)[i] = Qnil;
1359 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1360 int code_point, Lisp_Object character);
1362 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1363 int code_point, Lisp_Object character)
1367 int ccs_len = XVECTOR_LENGTH (v);
1372 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1373 nv = XVECTOR_DATA(v)[i];
1377 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
1383 XVECTOR_DATA(v)[i] = character;
1387 put_char_ccs_code_point (Lisp_Object character,
1388 Lisp_Object ccs, Lisp_Object value)
1390 Lisp_Object encoding_table;
1392 if (!EQ (XCHARSET_NAME (ccs), Qucs)
1393 || (XCHAR (character) != XINT (value)))
1395 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
1396 int dim = XCHARSET_DIMENSION (ccs);
1397 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
1398 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
1402 { /* obsolete representation: value must be a list of bytes */
1403 Lisp_Object ret = Fcar (value);
1407 signal_simple_error ("Invalid value for coded-charset", value);
1408 code_point = XINT (ret);
1409 if (XCHARSET_GRAPHIC (ccs) == 1)
1411 rest = Fcdr (value);
1412 while (!NILP (rest))
1417 signal_simple_error ("Invalid value for coded-charset",
1421 signal_simple_error ("Invalid value for coded-charset",
1424 if (XCHARSET_GRAPHIC (ccs) == 1)
1426 code_point = (code_point << 8) | j;
1429 value = make_int (code_point);
1431 else if (INTP (value))
1433 code_point = XINT (value);
1434 if (XCHARSET_GRAPHIC (ccs) == 1)
1436 code_point &= 0x7F7F7F7F;
1437 value = make_int (code_point);
1441 signal_simple_error ("Invalid value for coded-charset", value);
1445 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1448 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
1453 XCHARSET_DECODING_TABLE (ccs)
1454 = v = make_older_vector (ccs_len, Qnil);
1457 decoding_table_put_char (v, dim, byte_offset, code_point, character);
1459 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
1461 XCHARSET_ENCODING_TABLE (ccs)
1462 = encoding_table = make_char_id_table (Qnil);
1464 put_char_id_table (XCHAR (character), value, encoding_table);
1469 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
1471 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1472 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1474 if (VECTORP (decoding_table))
1476 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1480 decoding_table_remove_char (decoding_table,
1481 XCHARSET_DIMENSION (ccs),
1482 XCHARSET_BYTE_OFFSET (ccs),
1486 if (CHAR_ID_TABLE_P (encoding_table))
1488 put_char_id_table (XCHAR (character), Qnil, encoding_table);
1493 EXFUN (Fmake_char, 3);
1494 EXFUN (Fdecode_char, 2);
1496 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
1497 Store character's ATTRIBUTES.
1501 Lisp_Object rest = attributes;
1502 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
1503 Lisp_Object character;
1507 while (CONSP (rest))
1509 Lisp_Object cell = Fcar (rest);
1513 signal_simple_error ("Invalid argument", attributes);
1514 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
1515 && ((XCHARSET_FINAL (ccs) != 0) ||
1516 (XCHARSET_UCS_MAX (ccs) > 0)) )
1520 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1522 character = Fdecode_char (ccs, cell);
1523 if (!NILP (character))
1524 goto setup_attributes;
1528 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
1531 signal_simple_error ("Invalid argument", attributes);
1533 character = make_char (XINT (code) + 0x100000);
1534 goto setup_attributes;
1538 else if (!INTP (code))
1539 signal_simple_error ("Invalid argument", attributes);
1541 character = make_char (XINT (code));
1545 while (CONSP (rest))
1547 Lisp_Object cell = Fcar (rest);
1550 signal_simple_error ("Invalid argument", attributes);
1552 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
1558 Lisp_Object Vutf_2000_version;
1562 int leading_code_private_11;
1565 Lisp_Object Qcharsetp;
1567 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1568 Lisp_Object Qregistry, Qfinal, Qgraphic;
1569 Lisp_Object Qdirection;
1570 Lisp_Object Qreverse_direction_charset;
1571 Lisp_Object Qleading_byte;
1572 Lisp_Object Qshort_name, Qlong_name;
1586 Qcyrillic_iso8859_5,
1588 Qjapanese_jisx0208_1978,
1592 Qjapanese_jisx0208_1990,
1595 Qchinese_cns11643_1,
1596 Qchinese_cns11643_2,
1602 Qlatin_viscii_lower,
1603 Qlatin_viscii_upper,
1604 Qvietnamese_viscii_lower,
1605 Qvietnamese_viscii_upper,
1617 Qideograph_gt_pj_10,
1618 Qideograph_gt_pj_11,
1648 Lisp_Object Ql2r, Qr2l;
1650 Lisp_Object Vcharset_hash_table;
1652 /* Composite characters are characters constructed by overstriking two
1653 or more regular characters.
1655 1) The old Mule implementation involves storing composite characters
1656 in a buffer as a tag followed by all of the actual characters
1657 used to make up the composite character. I think this is a bad
1658 idea; it greatly complicates code that wants to handle strings
1659 one character at a time because it has to deal with the possibility
1660 of great big ungainly characters. It's much more reasonable to
1661 simply store an index into a table of composite characters.
1663 2) The current implementation only allows for 16,384 separate
1664 composite characters over the lifetime of the XEmacs process.
1665 This could become a potential problem if the user
1666 edited lots of different files that use composite characters.
1667 Due to FSF bogosity, increasing the number of allowable
1668 composite characters under Mule would decrease the number
1669 of possible faces that can exist. Mule already has shrunk
1670 this to 2048, and further shrinkage would become uncomfortable.
1671 No such problems exist in XEmacs.
1673 Composite characters could be represented as 0x80 C1 C2 C3,
1674 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1675 for slightly under 2^20 (one million) composite characters
1676 over the XEmacs process lifetime, and you only need to
1677 increase the size of a Mule character from 19 to 21 bits.
1678 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1679 85 million (slightly over 2^26) composite characters. */
1682 /************************************************************************/
1683 /* Basic Emchar functions */
1684 /************************************************************************/
1686 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1687 string in STR. Returns the number of bytes stored.
1688 Do not call this directly. Use the macro set_charptr_emchar() instead.
1692 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1698 Lisp_Object charset;
1707 else if ( c <= 0x7ff )
1709 *p++ = (c >> 6) | 0xc0;
1710 *p++ = (c & 0x3f) | 0x80;
1712 else if ( c <= 0xffff )
1714 *p++ = (c >> 12) | 0xe0;
1715 *p++ = ((c >> 6) & 0x3f) | 0x80;
1716 *p++ = (c & 0x3f) | 0x80;
1718 else if ( c <= 0x1fffff )
1720 *p++ = (c >> 18) | 0xf0;
1721 *p++ = ((c >> 12) & 0x3f) | 0x80;
1722 *p++ = ((c >> 6) & 0x3f) | 0x80;
1723 *p++ = (c & 0x3f) | 0x80;
1725 else if ( c <= 0x3ffffff )
1727 *p++ = (c >> 24) | 0xf8;
1728 *p++ = ((c >> 18) & 0x3f) | 0x80;
1729 *p++ = ((c >> 12) & 0x3f) | 0x80;
1730 *p++ = ((c >> 6) & 0x3f) | 0x80;
1731 *p++ = (c & 0x3f) | 0x80;
1735 *p++ = (c >> 30) | 0xfc;
1736 *p++ = ((c >> 24) & 0x3f) | 0x80;
1737 *p++ = ((c >> 18) & 0x3f) | 0x80;
1738 *p++ = ((c >> 12) & 0x3f) | 0x80;
1739 *p++ = ((c >> 6) & 0x3f) | 0x80;
1740 *p++ = (c & 0x3f) | 0x80;
1743 BREAKUP_CHAR (c, charset, c1, c2);
1744 lb = CHAR_LEADING_BYTE (c);
1745 if (LEADING_BYTE_PRIVATE_P (lb))
1746 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1748 if (EQ (charset, Vcharset_control_1))
1757 /* Return the first character from a Mule-encoded string in STR,
1758 assuming it's non-ASCII. Do not call this directly.
1759 Use the macro charptr_emchar() instead. */
1762 non_ascii_charptr_emchar (const Bufbyte *str)
1775 else if ( b >= 0xf8 )
1780 else if ( b >= 0xf0 )
1785 else if ( b >= 0xe0 )
1790 else if ( b >= 0xc0 )
1800 for( ; len > 0; len-- )
1803 ch = ( ch << 6 ) | ( b & 0x3f );
1807 Bufbyte i0 = *str, i1, i2 = 0;
1808 Lisp_Object charset;
1810 if (i0 == LEADING_BYTE_CONTROL_1)
1811 return (Emchar) (*++str - 0x20);
1813 if (LEADING_BYTE_PREFIX_P (i0))
1818 charset = CHARSET_BY_LEADING_BYTE (i0);
1819 if (XCHARSET_DIMENSION (charset) == 2)
1822 return MAKE_CHAR (charset, i1, i2);
1826 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1827 Do not call this directly. Use the macro valid_char_p() instead. */
1831 non_ascii_valid_char_p (Emchar ch)
1835 /* Must have only lowest 19 bits set */
1839 f1 = CHAR_FIELD1 (ch);
1840 f2 = CHAR_FIELD2 (ch);
1841 f3 = CHAR_FIELD3 (ch);
1845 Lisp_Object charset;
1847 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1848 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1849 f2 > MAX_CHAR_FIELD2_PRIVATE)
1854 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1855 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1859 NOTE: This takes advantage of the fact that
1860 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1861 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1863 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1864 if (EQ (charset, Qnil))
1866 return (XCHARSET_CHARS (charset) == 96);
1870 Lisp_Object charset;
1872 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1873 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1874 f1 > MAX_CHAR_FIELD1_PRIVATE)
1876 if (f2 < 0x20 || f3 < 0x20)
1879 #ifdef ENABLE_COMPOSITE_CHARS
1880 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1882 if (UNBOUNDP (Fgethash (make_int (ch),
1883 Vcomposite_char_char2string_hash_table,
1888 #endif /* ENABLE_COMPOSITE_CHARS */
1890 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1891 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1894 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1896 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1899 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1901 if (EQ (charset, Qnil))
1903 return (XCHARSET_CHARS (charset) == 96);
1909 /************************************************************************/
1910 /* Basic string functions */
1911 /************************************************************************/
1913 /* Copy the character pointed to by SRC into DST. Do not call this
1914 directly. Use the macro charptr_copy_char() instead.
1915 Return the number of bytes copied. */
1918 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
1920 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
1922 for (i = bytes; i; i--, dst++, src++)
1928 /************************************************************************/
1929 /* streams of Emchars */
1930 /************************************************************************/
1932 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1933 The functions below are not meant to be called directly; use
1934 the macros in insdel.h. */
1937 Lstream_get_emchar_1 (Lstream *stream, int ch)
1939 Bufbyte str[MAX_EMCHAR_LEN];
1940 Bufbyte *strptr = str;
1943 str[0] = (Bufbyte) ch;
1945 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
1947 int c = Lstream_getc (stream);
1948 bufpos_checking_assert (c >= 0);
1949 *++strptr = (Bufbyte) c;
1951 return charptr_emchar (str);
1955 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1957 Bufbyte str[MAX_EMCHAR_LEN];
1958 Bytecount len = set_charptr_emchar (str, ch);
1959 return Lstream_write (stream, str, len);
1963 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1965 Bufbyte str[MAX_EMCHAR_LEN];
1966 Bytecount len = set_charptr_emchar (str, ch);
1967 Lstream_unread (stream, str, len);
1971 /************************************************************************/
1972 /* charset object */
1973 /************************************************************************/
1976 mark_charset (Lisp_Object obj)
1978 Lisp_Charset *cs = XCHARSET (obj);
1980 mark_object (cs->short_name);
1981 mark_object (cs->long_name);
1982 mark_object (cs->doc_string);
1983 mark_object (cs->registry);
1984 mark_object (cs->ccl_program);
1986 mark_object (cs->encoding_table);
1987 /* mark_object (cs->decoding_table); */
1993 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1995 Lisp_Charset *cs = XCHARSET (obj);
1999 error ("printing unreadable object #<charset %s 0x%x>",
2000 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
2003 write_c_string ("#<charset ", printcharfun);
2004 print_internal (CHARSET_NAME (cs), printcharfun, 0);
2005 write_c_string (" ", printcharfun);
2006 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
2007 write_c_string (" ", printcharfun);
2008 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
2009 write_c_string (" ", printcharfun);
2010 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
2011 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
2013 CHARSET_DIMENSION (cs),
2014 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
2015 CHARSET_COLUMNS (cs),
2016 CHARSET_GRAPHIC (cs),
2017 CHARSET_FINAL (cs));
2018 write_c_string (buf, printcharfun);
2019 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
2020 sprintf (buf, " 0x%x>", cs->header.uid);
2021 write_c_string (buf, printcharfun);
2024 static const struct lrecord_description charset_description[] = {
2025 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
2026 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
2027 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
2028 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
2029 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
2030 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
2031 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
2033 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
2034 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
2039 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
2040 mark_charset, print_charset, 0, 0, 0,
2041 charset_description,
2044 /* Make a new charset. */
2045 /* #### SJT Should generic properties be allowed? */
2047 make_charset (Charset_ID id, Lisp_Object name,
2048 unsigned short chars, unsigned char dimension,
2049 unsigned char columns, unsigned char graphic,
2050 Bufbyte final, unsigned char direction, Lisp_Object short_name,
2051 Lisp_Object long_name, Lisp_Object doc,
2053 Lisp_Object decoding_table,
2054 Emchar ucs_min, Emchar ucs_max,
2055 Emchar code_offset, unsigned char byte_offset)
2058 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
2062 XSETCHARSET (obj, cs);
2064 CHARSET_ID (cs) = id;
2065 CHARSET_NAME (cs) = name;
2066 CHARSET_SHORT_NAME (cs) = short_name;
2067 CHARSET_LONG_NAME (cs) = long_name;
2068 CHARSET_CHARS (cs) = chars;
2069 CHARSET_DIMENSION (cs) = dimension;
2070 CHARSET_DIRECTION (cs) = direction;
2071 CHARSET_COLUMNS (cs) = columns;
2072 CHARSET_GRAPHIC (cs) = graphic;
2073 CHARSET_FINAL (cs) = final;
2074 CHARSET_DOC_STRING (cs) = doc;
2075 CHARSET_REGISTRY (cs) = reg;
2076 CHARSET_CCL_PROGRAM (cs) = Qnil;
2077 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
2079 CHARSET_DECODING_TABLE(cs) = Qnil;
2080 CHARSET_ENCODING_TABLE(cs) = Qnil;
2081 CHARSET_UCS_MIN(cs) = ucs_min;
2082 CHARSET_UCS_MAX(cs) = ucs_max;
2083 CHARSET_CODE_OFFSET(cs) = code_offset;
2084 CHARSET_BYTE_OFFSET(cs) = byte_offset;
2088 if (id == LEADING_BYTE_ASCII)
2089 CHARSET_REP_BYTES (cs) = 1;
2091 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
2093 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
2098 /* some charsets do not have final characters. This includes
2099 ASCII, Control-1, Composite, and the two faux private
2101 unsigned char iso2022_type
2102 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
2104 if (code_offset == 0)
2106 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
2107 chlook->charset_by_attributes[iso2022_type][final] = obj;
2111 (chlook->charset_by_attributes[iso2022_type][final][direction]));
2112 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
2116 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
2117 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
2119 /* Some charsets are "faux" and don't have names or really exist at
2120 all except in the leading-byte table. */
2122 Fputhash (name, obj, Vcharset_hash_table);
2127 get_unallocated_leading_byte (int dimension)
2132 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
2135 lb = chlook->next_allocated_leading_byte++;
2139 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
2142 lb = chlook->next_allocated_1_byte_leading_byte++;
2146 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
2149 lb = chlook->next_allocated_2_byte_leading_byte++;
2155 ("No more character sets free for this dimension",
2156 make_int (dimension));
2162 /* Number of Big5 characters which have the same code in 1st byte. */
2164 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2167 decode_builtin_char (Lisp_Object charset, int code_point)
2171 if (EQ (charset, Vcharset_chinese_big5))
2173 int c1 = code_point >> 8;
2174 int c2 = code_point & 0xFF;
2176 = (c1 - 0xA1) * BIG5_SAME_ROW
2177 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
2181 charset = Vcharset_chinese_big5_1;
2185 charset = Vcharset_chinese_big5_2;
2186 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
2188 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
2190 if ((final = XCHARSET_FINAL (charset)) >= '0')
2192 if (XCHARSET_DIMENSION (charset) == 1)
2194 switch (XCHARSET_CHARS (charset))
2198 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
2201 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
2209 switch (XCHARSET_CHARS (charset))
2212 return MIN_CHAR_94x94
2213 + (final - '0') * 94 * 94
2214 + (((code_point >> 8) & 0x7F) - 33) * 94
2215 + ((code_point & 0x7F) - 33);
2217 return MIN_CHAR_96x96
2218 + (final - '0') * 96 * 96
2219 + (((code_point >> 8) & 0x7F) - 32) * 96
2220 + ((code_point & 0x7F) - 32);
2227 else if (XCHARSET_UCS_MAX (charset))
2230 = (XCHARSET_DIMENSION (charset) == 1
2232 code_point - XCHARSET_BYTE_OFFSET (charset)
2234 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
2235 * XCHARSET_CHARS (charset)
2236 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
2237 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
2238 if ((cid < XCHARSET_UCS_MIN (charset))
2239 || (XCHARSET_UCS_MAX (charset) < cid))
2248 range_charset_code_point (Lisp_Object charset, Emchar ch)
2252 if ((XCHARSET_UCS_MIN (charset) <= ch)
2253 && (ch <= XCHARSET_UCS_MAX (charset)))
2255 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
2257 if (XCHARSET_CHARS (charset) == 256)
2259 else if (XCHARSET_DIMENSION (charset) == 1)
2260 return d + XCHARSET_BYTE_OFFSET (charset);
2261 else if (XCHARSET_DIMENSION (charset) == 2)
2263 ((d / XCHARSET_CHARS (charset)
2264 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2265 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2266 else if (XCHARSET_DIMENSION (charset) == 3)
2268 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2269 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2270 | ((d / XCHARSET_CHARS (charset)
2271 % XCHARSET_CHARS (charset)
2272 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2273 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2274 else /* if (XCHARSET_DIMENSION (charset) == 4) */
2276 ((d / (XCHARSET_CHARS (charset)
2277 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2278 + XCHARSET_BYTE_OFFSET (charset)) << 24)
2279 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2280 % XCHARSET_CHARS (charset)
2281 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2282 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
2283 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2284 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2286 else if (XCHARSET_CODE_OFFSET (charset) == 0)
2288 if (XCHARSET_DIMENSION (charset) == 1)
2290 if (XCHARSET_CHARS (charset) == 94)
2292 if (((d = ch - (MIN_CHAR_94
2293 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
2297 else if (XCHARSET_CHARS (charset) == 96)
2299 if (((d = ch - (MIN_CHAR_96
2300 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
2307 else if (XCHARSET_DIMENSION (charset) == 2)
2309 if (XCHARSET_CHARS (charset) == 94)
2311 if (((d = ch - (MIN_CHAR_94x94
2312 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
2315 return (((d / 94) + 33) << 8) | (d % 94 + 33);
2317 else if (XCHARSET_CHARS (charset) == 96)
2319 if (((d = ch - (MIN_CHAR_96x96
2320 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
2323 return (((d / 96) + 32) << 8) | (d % 96 + 32);
2329 if (EQ (charset, Vcharset_mojikyo_2022_1)
2330 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
2332 int m = ch - MIN_CHAR_MOJIKYO - 1;
2333 int byte1 = m / (94 * 60) + 33;
2334 int byte2 = (m % (94 * 60)) / 94;
2335 int byte3 = m % 94 + 33;
2341 return (byte1 << 16) | (byte2 << 8) | byte3;
2347 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
2349 if (c <= MAX_CHAR_BASIC_LATIN)
2351 *charset = Vcharset_ascii;
2356 *charset = Vcharset_control_1;
2361 *charset = Vcharset_latin_iso8859_1;
2365 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
2367 *charset = Vcharset_greek_iso8859_7;
2368 return c - MIN_CHAR_GREEK + 0x20;
2370 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
2372 *charset = Vcharset_cyrillic_iso8859_5;
2373 return c - MIN_CHAR_CYRILLIC + 0x20;
2376 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
2378 *charset = Vcharset_hebrew_iso8859_8;
2379 return c - MIN_CHAR_HEBREW + 0x20;
2381 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
2383 *charset = Vcharset_thai_tis620;
2384 return c - MIN_CHAR_THAI + 0x20;
2387 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
2388 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
2390 return list2 (Vcharset_katakana_jisx0201,
2391 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
2394 else if (c <= MAX_CHAR_BMP)
2396 *charset = Vcharset_ucs_bmp;
2399 else if (c < MIN_CHAR_DAIKANWA)
2401 *charset = Vcharset_ucs;
2404 else if (c <= MAX_CHAR_DAIKANWA)
2406 *charset = Vcharset_ideograph_daikanwa;
2407 return c - MIN_CHAR_DAIKANWA;
2409 else if (c <= MAX_CHAR_MOJIKYO_0)
2411 *charset = Vcharset_mojikyo;
2412 return c - MIN_CHAR_MOJIKYO_0;
2414 else if (c < MIN_CHAR_94)
2416 *charset = Vcharset_ucs;
2419 else if (c <= MAX_CHAR_94)
2421 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
2422 ((c - MIN_CHAR_94) / 94) + '0',
2423 CHARSET_LEFT_TO_RIGHT);
2424 if (!NILP (*charset))
2425 return ((c - MIN_CHAR_94) % 94) + 33;
2428 *charset = Vcharset_ucs;
2432 else if (c <= MAX_CHAR_96)
2434 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
2435 ((c - MIN_CHAR_96) / 96) + '0',
2436 CHARSET_LEFT_TO_RIGHT);
2437 if (!NILP (*charset))
2438 return ((c - MIN_CHAR_96) % 96) + 32;
2441 *charset = Vcharset_ucs;
2445 else if (c <= MAX_CHAR_94x94)
2448 = CHARSET_BY_ATTRIBUTES (94, 2,
2449 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
2450 CHARSET_LEFT_TO_RIGHT);
2451 if (!NILP (*charset))
2452 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
2453 | (((c - MIN_CHAR_94x94) % 94) + 33);
2456 *charset = Vcharset_ucs;
2460 else if (c <= MAX_CHAR_96x96)
2463 = CHARSET_BY_ATTRIBUTES (96, 2,
2464 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
2465 CHARSET_LEFT_TO_RIGHT);
2466 if (!NILP (*charset))
2467 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
2468 | (((c - MIN_CHAR_96x96) % 96) + 32);
2471 *charset = Vcharset_ucs;
2475 else if (c < MIN_CHAR_MOJIKYO)
2477 *charset = Vcharset_ucs;
2480 else if (c <= MAX_CHAR_MOJIKYO)
2482 *charset = Vcharset_mojikyo;
2483 return c - MIN_CHAR_MOJIKYO;
2487 *charset = Vcharset_ucs;
2492 Lisp_Object Vdefault_coded_charset_priority_list;
2496 /************************************************************************/
2497 /* Basic charset Lisp functions */
2498 /************************************************************************/
2500 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
2501 Return non-nil if OBJECT is a charset.
2505 return CHARSETP (object) ? Qt : Qnil;
2508 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
2509 Retrieve the charset of the given name.
2510 If CHARSET-OR-NAME is a charset object, it is simply returned.
2511 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
2512 nil is returned. Otherwise the associated charset object is returned.
2516 if (CHARSETP (charset_or_name))
2517 return charset_or_name;
2519 CHECK_SYMBOL (charset_or_name);
2520 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
2523 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
2524 Retrieve the charset of the given name.
2525 Same as `find-charset' except an error is signalled if there is no such
2526 charset instead of returning nil.
2530 Lisp_Object charset = Ffind_charset (name);
2533 signal_simple_error ("No such charset", name);
2537 /* We store the charsets in hash tables with the names as the key and the
2538 actual charset object as the value. Occasionally we need to use them
2539 in a list format. These routines provide us with that. */
2540 struct charset_list_closure
2542 Lisp_Object *charset_list;
2546 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
2547 void *charset_list_closure)
2549 /* This function can GC */
2550 struct charset_list_closure *chcl =
2551 (struct charset_list_closure*) charset_list_closure;
2552 Lisp_Object *charset_list = chcl->charset_list;
2554 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
2558 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
2559 Return a list of the names of all defined charsets.
2563 Lisp_Object charset_list = Qnil;
2564 struct gcpro gcpro1;
2565 struct charset_list_closure charset_list_closure;
2567 GCPRO1 (charset_list);
2568 charset_list_closure.charset_list = &charset_list;
2569 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
2570 &charset_list_closure);
2573 return charset_list;
2576 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
2577 Return the name of charset CHARSET.
2581 return XCHARSET_NAME (Fget_charset (charset));
2584 /* #### SJT Should generic properties be allowed? */
2585 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
2586 Define a new character set.
2587 This function is for use with Mule support.
2588 NAME is a symbol, the name by which the character set is normally referred.
2589 DOC-STRING is a string describing the character set.
2590 PROPS is a property list, describing the specific nature of the
2591 character set. Recognized properties are:
2593 'short-name Short version of the charset name (ex: Latin-1)
2594 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
2595 'registry A regular expression matching the font registry field for
2597 'dimension Number of octets used to index a character in this charset.
2598 Either 1 or 2. Defaults to 1.
2599 'columns Number of columns used to display a character in this charset.
2600 Only used in TTY mode. (Under X, the actual width of a
2601 character can be derived from the font used to display the
2602 characters.) If unspecified, defaults to the dimension
2603 (this is almost always the correct value).
2604 'chars Number of characters in each dimension (94 or 96).
2605 Defaults to 94. Note that if the dimension is 2, the
2606 character set thus described is 94x94 or 96x96.
2607 'final Final byte of ISO 2022 escape sequence. Must be
2608 supplied. Each combination of (DIMENSION, CHARS) defines a
2609 separate namespace for final bytes. Note that ISO
2610 2022 restricts the final byte to the range
2611 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2612 dimension == 2. Note also that final bytes in the range
2613 0x30 - 0x3F are reserved for user-defined (not official)
2615 'graphic 0 (use left half of font on output) or 1 (use right half
2616 of font on output). Defaults to 0. For example, for
2617 a font whose registry is ISO8859-1, the left half
2618 (octets 0x20 - 0x7F) is the `ascii' character set, while
2619 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2620 character set. With 'graphic set to 0, the octets
2621 will have their high bit cleared; with it set to 1,
2622 the octets will have their high bit set.
2623 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2625 'ccl-program A compiled CCL program used to convert a character in
2626 this charset into an index into the font. This is in
2627 addition to the 'graphic property. The CCL program
2628 is passed the octets of the character, with the high
2629 bit cleared and set depending upon whether the value
2630 of the 'graphic property is 0 or 1.
2632 (name, doc_string, props))
2634 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2635 int direction = CHARSET_LEFT_TO_RIGHT;
2636 Lisp_Object registry = Qnil;
2637 Lisp_Object charset;
2638 Lisp_Object ccl_program = Qnil;
2639 Lisp_Object short_name = Qnil, long_name = Qnil;
2640 int byte_offset = -1;
2642 CHECK_SYMBOL (name);
2643 if (!NILP (doc_string))
2644 CHECK_STRING (doc_string);
2646 charset = Ffind_charset (name);
2647 if (!NILP (charset))
2648 signal_simple_error ("Cannot redefine existing charset", name);
2651 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
2653 if (EQ (keyword, Qshort_name))
2655 CHECK_STRING (value);
2659 if (EQ (keyword, Qlong_name))
2661 CHECK_STRING (value);
2665 else if (EQ (keyword, Qdimension))
2668 dimension = XINT (value);
2669 if (dimension < 1 || dimension > 2)
2670 signal_simple_error ("Invalid value for 'dimension", value);
2673 else if (EQ (keyword, Qchars))
2676 chars = XINT (value);
2677 if (chars != 94 && chars != 96)
2678 signal_simple_error ("Invalid value for 'chars", value);
2681 else if (EQ (keyword, Qcolumns))
2684 columns = XINT (value);
2685 if (columns != 1 && columns != 2)
2686 signal_simple_error ("Invalid value for 'columns", value);
2689 else if (EQ (keyword, Qgraphic))
2692 graphic = XINT (value);
2694 if (graphic < 0 || graphic > 2)
2696 if (graphic < 0 || graphic > 1)
2698 signal_simple_error ("Invalid value for 'graphic", value);
2701 else if (EQ (keyword, Qregistry))
2703 CHECK_STRING (value);
2707 else if (EQ (keyword, Qdirection))
2709 if (EQ (value, Ql2r))
2710 direction = CHARSET_LEFT_TO_RIGHT;
2711 else if (EQ (value, Qr2l))
2712 direction = CHARSET_RIGHT_TO_LEFT;
2714 signal_simple_error ("Invalid value for 'direction", value);
2717 else if (EQ (keyword, Qfinal))
2719 CHECK_CHAR_COERCE_INT (value);
2720 final = XCHAR (value);
2721 if (final < '0' || final > '~')
2722 signal_simple_error ("Invalid value for 'final", value);
2725 else if (EQ (keyword, Qccl_program))
2727 struct ccl_program test_ccl;
2729 if (setup_ccl_program (&test_ccl, value) < 0)
2730 signal_simple_error ("Invalid value for 'ccl-program", value);
2731 ccl_program = value;
2735 signal_simple_error ("Unrecognized property", keyword);
2740 error ("'final must be specified");
2741 if (dimension == 2 && final > 0x5F)
2743 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2746 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2747 CHARSET_LEFT_TO_RIGHT)) ||
2748 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2749 CHARSET_RIGHT_TO_LEFT)))
2751 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2753 id = get_unallocated_leading_byte (dimension);
2755 if (NILP (doc_string))
2756 doc_string = build_string ("");
2758 if (NILP (registry))
2759 registry = build_string ("");
2761 if (NILP (short_name))
2762 XSETSTRING (short_name, XSYMBOL (name)->name);
2764 if (NILP (long_name))
2765 long_name = doc_string;
2768 columns = dimension;
2770 if (byte_offset < 0)
2774 else if (chars == 96)
2780 charset = make_charset (id, name, chars, dimension, columns, graphic,
2781 final, direction, short_name, long_name,
2782 doc_string, registry,
2783 Qnil, 0, 0, 0, byte_offset);
2784 if (!NILP (ccl_program))
2785 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2789 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2791 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2792 NEW-NAME is the name of the new charset. Return the new charset.
2794 (charset, new_name))
2796 Lisp_Object new_charset = Qnil;
2797 int id, chars, dimension, columns, graphic, final;
2799 Lisp_Object registry, doc_string, short_name, long_name;
2802 charset = Fget_charset (charset);
2803 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2804 signal_simple_error ("Charset already has reverse-direction charset",
2807 CHECK_SYMBOL (new_name);
2808 if (!NILP (Ffind_charset (new_name)))
2809 signal_simple_error ("Cannot redefine existing charset", new_name);
2811 cs = XCHARSET (charset);
2813 chars = CHARSET_CHARS (cs);
2814 dimension = CHARSET_DIMENSION (cs);
2815 columns = CHARSET_COLUMNS (cs);
2816 id = get_unallocated_leading_byte (dimension);
2818 graphic = CHARSET_GRAPHIC (cs);
2819 final = CHARSET_FINAL (cs);
2820 direction = CHARSET_RIGHT_TO_LEFT;
2821 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2822 direction = CHARSET_LEFT_TO_RIGHT;
2823 doc_string = CHARSET_DOC_STRING (cs);
2824 short_name = CHARSET_SHORT_NAME (cs);
2825 long_name = CHARSET_LONG_NAME (cs);
2826 registry = CHARSET_REGISTRY (cs);
2828 new_charset = make_charset (id, new_name, chars, dimension, columns,
2829 graphic, final, direction, short_name, long_name,
2830 doc_string, registry,
2832 CHARSET_DECODING_TABLE(cs),
2833 CHARSET_UCS_MIN(cs),
2834 CHARSET_UCS_MAX(cs),
2835 CHARSET_CODE_OFFSET(cs),
2836 CHARSET_BYTE_OFFSET(cs)
2842 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2843 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2848 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2849 Define symbol ALIAS as an alias for CHARSET.
2853 CHECK_SYMBOL (alias);
2854 charset = Fget_charset (charset);
2855 return Fputhash (alias, charset, Vcharset_hash_table);
2858 /* #### Reverse direction charsets not yet implemented. */
2860 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2862 Return the reverse-direction charset parallel to CHARSET, if any.
2863 This is the charset with the same properties (in particular, the same
2864 dimension, number of characters per dimension, and final byte) as
2865 CHARSET but whose characters are displayed in the opposite direction.
2869 charset = Fget_charset (charset);
2870 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2874 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2875 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2876 If DIRECTION is omitted, both directions will be checked (left-to-right
2877 will be returned if character sets exist for both directions).
2879 (dimension, chars, final, direction))
2881 int dm, ch, fi, di = -1;
2882 Lisp_Object obj = Qnil;
2884 CHECK_INT (dimension);
2885 dm = XINT (dimension);
2886 if (dm < 1 || dm > 2)
2887 signal_simple_error ("Invalid value for DIMENSION", dimension);
2891 if (ch != 94 && ch != 96)
2892 signal_simple_error ("Invalid value for CHARS", chars);
2894 CHECK_CHAR_COERCE_INT (final);
2896 if (fi < '0' || fi > '~')
2897 signal_simple_error ("Invalid value for FINAL", final);
2899 if (EQ (direction, Ql2r))
2900 di = CHARSET_LEFT_TO_RIGHT;
2901 else if (EQ (direction, Qr2l))
2902 di = CHARSET_RIGHT_TO_LEFT;
2903 else if (!NILP (direction))
2904 signal_simple_error ("Invalid value for DIRECTION", direction);
2906 if (dm == 2 && fi > 0x5F)
2908 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2912 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
2914 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
2917 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
2920 return XCHARSET_NAME (obj);
2924 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2925 Return short name of CHARSET.
2929 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2932 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2933 Return long name of CHARSET.
2937 return XCHARSET_LONG_NAME (Fget_charset (charset));
2940 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2941 Return description of CHARSET.
2945 return XCHARSET_DOC_STRING (Fget_charset (charset));
2948 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2949 Return dimension of CHARSET.
2953 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2956 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2957 Return property PROP of CHARSET, a charset object or symbol naming a charset.
2958 Recognized properties are those listed in `make-charset', as well as
2959 'name and 'doc-string.
2965 charset = Fget_charset (charset);
2966 cs = XCHARSET (charset);
2968 CHECK_SYMBOL (prop);
2969 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2970 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2971 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2972 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2973 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2974 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2975 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2976 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2977 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2978 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2979 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2980 if (EQ (prop, Qdirection))
2981 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2982 if (EQ (prop, Qreverse_direction_charset))
2984 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2985 /* #### Is this translation OK? If so, error checking sufficient? */
2986 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
2988 signal_simple_error ("Unrecognized charset property name", prop);
2989 return Qnil; /* not reached */
2992 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2993 Return charset identification number of CHARSET.
2997 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
3000 /* #### We need to figure out which properties we really want to
3003 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
3004 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
3006 (charset, ccl_program))
3008 struct ccl_program test_ccl;
3010 charset = Fget_charset (charset);
3011 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
3012 signal_simple_error ("Invalid ccl-program", ccl_program);
3013 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
3018 invalidate_charset_font_caches (Lisp_Object charset)
3020 /* Invalidate font cache entries for charset on all devices. */
3021 Lisp_Object devcons, concons, hash_table;
3022 DEVICE_LOOP_NO_BREAK (devcons, concons)
3024 struct device *d = XDEVICE (XCAR (devcons));
3025 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
3026 if (!UNBOUNDP (hash_table))
3027 Fclrhash (hash_table);
3031 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
3032 Set the 'registry property of CHARSET to REGISTRY.
3034 (charset, registry))
3036 charset = Fget_charset (charset);
3037 CHECK_STRING (registry);
3038 XCHARSET_REGISTRY (charset) = registry;
3039 invalidate_charset_font_caches (charset);
3040 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
3045 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
3046 Return mapping-table of CHARSET.
3050 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
3053 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
3054 Set mapping-table of CHARSET to TABLE.
3058 struct Lisp_Charset *cs;
3062 charset = Fget_charset (charset);
3063 cs = XCHARSET (charset);
3067 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
3068 make_vector_newer (CHARSET_DECODING_TABLE(cs));
3069 CHARSET_DECODING_TABLE(cs) = Qnil;
3072 else if (VECTORP (table))
3074 int ccs_len = CHARSET_BYTE_SIZE (cs);
3075 int ret = decoding_table_check_elements (table,
3076 CHARSET_DIMENSION (cs),
3081 signal_simple_error ("Too big table", table);
3083 signal_simple_error ("Invalid element is found", table);
3085 signal_simple_error ("Something wrong", table);
3087 CHARSET_DECODING_TABLE(cs) = Qnil;
3090 signal_error (Qwrong_type_argument,
3091 list2 (build_translated_string ("vector-or-nil-p"),
3094 byte_offset = CHARSET_BYTE_OFFSET (cs);
3095 switch (CHARSET_DIMENSION (cs))
3098 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3100 Lisp_Object c = XVECTOR_DATA(table)[i];
3103 put_char_ccs_code_point (c, charset,
3104 make_int (i + byte_offset));
3108 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3110 Lisp_Object v = XVECTOR_DATA(table)[i];
3116 for (j = 0; j < XVECTOR_LENGTH (v); j++)
3118 Lisp_Object c = XVECTOR_DATA(v)[j];
3121 put_char_ccs_code_point
3123 make_int ( ( (i + byte_offset) << 8 )
3129 put_char_ccs_code_point (v, charset,
3130 make_int (i + byte_offset));
3139 /************************************************************************/
3140 /* Lisp primitives for working with characters */
3141 /************************************************************************/
3144 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
3145 Make a character from CHARSET and code-point CODE.
3151 charset = Fget_charset (charset);
3154 if (XCHARSET_GRAPHIC (charset) == 1)
3156 c = DECODE_CHAR (charset, c);
3157 return c ? make_char (c) : Qnil;
3160 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
3161 Make a builtin character from CHARSET and code-point CODE.
3167 charset = Fget_charset (charset);
3169 if (EQ (charset, Vcharset_latin_viscii))
3171 Lisp_Object chr = Fdecode_char (charset, code);
3177 (ret = Fget_char_attribute (chr,
3178 Vcharset_latin_viscii_lower,
3181 charset = Vcharset_latin_viscii_lower;
3185 (ret = Fget_char_attribute (chr,
3186 Vcharset_latin_viscii_upper,
3189 charset = Vcharset_latin_viscii_upper;
3196 if (XCHARSET_GRAPHIC (charset) == 1)
3199 c = decode_builtin_char (charset, c);
3200 return c >= 0 ? make_char (c) : Fdecode_char (charset, code);
3204 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
3205 Make a character from CHARSET and octets ARG1 and ARG2.
3206 ARG2 is required only for characters from two-dimensional charsets.
3207 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
3208 character s with caron.
3210 (charset, arg1, arg2))
3214 int lowlim, highlim;
3216 charset = Fget_charset (charset);
3217 cs = XCHARSET (charset);
3219 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
3220 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
3222 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
3224 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
3225 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
3228 /* It is useful (and safe, according to Olivier Galibert) to strip
3229 the 8th bit off ARG1 and ARG2 because it allows programmers to
3230 write (make-char 'latin-iso8859-2 CODE) where code is the actual
3231 Latin 2 code of the character. */
3239 if (a1 < lowlim || a1 > highlim)
3240 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
3242 if (CHARSET_DIMENSION (cs) == 1)
3246 ("Charset is of dimension one; second octet must be nil", arg2);
3247 return make_char (MAKE_CHAR (charset, a1, 0));
3256 a2 = XINT (arg2) & 0x7f;
3258 if (a2 < lowlim || a2 > highlim)
3259 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
3261 return make_char (MAKE_CHAR (charset, a1, a2));
3264 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
3265 Return the character set of CHARACTER.
3269 CHECK_CHAR_COERCE_INT (character);
3271 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
3274 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
3275 Return the octet numbered N (should be 0 or 1) of CHARACTER.
3276 N defaults to 0 if omitted.
3280 Lisp_Object charset;
3283 CHECK_CHAR_COERCE_INT (character);
3285 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
3287 if (NILP (n) || EQ (n, Qzero))
3288 return make_int (octet0);
3289 else if (EQ (n, make_int (1)))
3290 return make_int (octet1);
3292 signal_simple_error ("Octet number must be 0 or 1", n);
3295 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
3296 Return list of charset and one or two position-codes of CHARACTER.
3300 /* This function can GC */
3301 struct gcpro gcpro1, gcpro2;
3302 Lisp_Object charset = Qnil;
3303 Lisp_Object rc = Qnil;
3311 GCPRO2 (charset, rc);
3312 CHECK_CHAR_COERCE_INT (character);
3315 code_point = ENCODE_CHAR (XCHAR (character), charset);
3316 dimension = XCHARSET_DIMENSION (charset);
3317 while (dimension > 0)
3319 rc = Fcons (make_int (code_point & 255), rc);
3323 rc = Fcons (XCHARSET_NAME (charset), rc);
3325 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
3327 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
3329 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
3333 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
3342 #ifdef ENABLE_COMPOSITE_CHARS
3343 /************************************************************************/
3344 /* composite character functions */
3345 /************************************************************************/
3348 lookup_composite_char (Bufbyte *str, int len)
3350 Lisp_Object lispstr = make_string (str, len);
3351 Lisp_Object ch = Fgethash (lispstr,
3352 Vcomposite_char_string2char_hash_table,
3358 if (composite_char_row_next >= 128)
3359 signal_simple_error ("No more composite chars available", lispstr);
3360 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
3361 composite_char_col_next);
3362 Fputhash (make_char (emch), lispstr,
3363 Vcomposite_char_char2string_hash_table);
3364 Fputhash (lispstr, make_char (emch),
3365 Vcomposite_char_string2char_hash_table);
3366 composite_char_col_next++;
3367 if (composite_char_col_next >= 128)
3369 composite_char_col_next = 32;
3370 composite_char_row_next++;
3379 composite_char_string (Emchar ch)
3381 Lisp_Object str = Fgethash (make_char (ch),
3382 Vcomposite_char_char2string_hash_table,
3384 assert (!UNBOUNDP (str));
3388 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
3389 Convert a string into a single composite character.
3390 The character is the result of overstriking all the characters in
3395 CHECK_STRING (string);
3396 return make_char (lookup_composite_char (XSTRING_DATA (string),
3397 XSTRING_LENGTH (string)));
3400 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3401 Return a string of the characters comprising a composite character.
3409 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
3410 signal_simple_error ("Must be composite char", ch);
3411 return composite_char_string (emch);
3413 #endif /* ENABLE_COMPOSITE_CHARS */
3416 /************************************************************************/
3417 /* initialization */
3418 /************************************************************************/
3421 syms_of_mule_charset (void)
3424 INIT_LRECORD_IMPLEMENTATION (uint8_byte_table);
3425 INIT_LRECORD_IMPLEMENTATION (uint16_byte_table);
3426 INIT_LRECORD_IMPLEMENTATION (byte_table);
3427 INIT_LRECORD_IMPLEMENTATION (char_id_table);
3429 INIT_LRECORD_IMPLEMENTATION (charset);
3431 DEFSUBR (Fcharsetp);
3432 DEFSUBR (Ffind_charset);
3433 DEFSUBR (Fget_charset);
3434 DEFSUBR (Fcharset_list);
3435 DEFSUBR (Fcharset_name);
3436 DEFSUBR (Fmake_charset);
3437 DEFSUBR (Fmake_reverse_direction_charset);
3438 /* DEFSUBR (Freverse_direction_charset); */
3439 DEFSUBR (Fdefine_charset_alias);
3440 DEFSUBR (Fcharset_from_attributes);
3441 DEFSUBR (Fcharset_short_name);
3442 DEFSUBR (Fcharset_long_name);
3443 DEFSUBR (Fcharset_description);
3444 DEFSUBR (Fcharset_dimension);
3445 DEFSUBR (Fcharset_property);
3446 DEFSUBR (Fcharset_id);
3447 DEFSUBR (Fset_charset_ccl_program);
3448 DEFSUBR (Fset_charset_registry);
3450 DEFSUBR (Fchar_attribute_list);
3451 DEFSUBR (Ffind_char_attribute_table);
3452 DEFSUBR (Fchar_attribute_alist);
3453 DEFSUBR (Fget_char_attribute);
3454 DEFSUBR (Fput_char_attribute);
3455 DEFSUBR (Fremove_char_attribute);
3456 DEFSUBR (Fdefine_char);
3457 DEFSUBR (Fchar_variants);
3458 DEFSUBR (Fget_composite_char);
3459 DEFSUBR (Fcharset_mapping_table);
3460 DEFSUBR (Fset_charset_mapping_table);
3464 DEFSUBR (Fdecode_char);
3465 DEFSUBR (Fdecode_builtin_char);
3467 DEFSUBR (Fmake_char);
3468 DEFSUBR (Fchar_charset);
3469 DEFSUBR (Fchar_octet);
3470 DEFSUBR (Fsplit_char);
3472 #ifdef ENABLE_COMPOSITE_CHARS
3473 DEFSUBR (Fmake_composite_char);
3474 DEFSUBR (Fcomposite_char_string);
3477 defsymbol (&Qcharsetp, "charsetp");
3478 defsymbol (&Qregistry, "registry");
3479 defsymbol (&Qfinal, "final");
3480 defsymbol (&Qgraphic, "graphic");
3481 defsymbol (&Qdirection, "direction");
3482 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3483 defsymbol (&Qshort_name, "short-name");
3484 defsymbol (&Qlong_name, "long-name");
3486 defsymbol (&Ql2r, "l2r");
3487 defsymbol (&Qr2l, "r2l");
3489 /* Charsets, compatible with FSF 20.3
3490 Naming convention is Script-Charset[-Edition] */
3491 defsymbol (&Qascii, "ascii");
3492 defsymbol (&Qcontrol_1, "control-1");
3493 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3494 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3495 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3496 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3497 defsymbol (&Qthai_tis620, "thai-tis620");
3498 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3499 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3500 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3501 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3502 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3503 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3504 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3505 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
3506 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
3507 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
3508 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
3509 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
3510 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
3511 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
3512 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
3513 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
3515 defsymbol (&Q_ucs, "->ucs");
3516 defsymbol (&Q_decomposition, "->decomposition");
3517 defsymbol (&Qcompat, "compat");
3518 defsymbol (&Qisolated, "isolated");
3519 defsymbol (&Qinitial, "initial");
3520 defsymbol (&Qmedial, "medial");
3521 defsymbol (&Qfinal, "final");
3522 defsymbol (&Qvertical, "vertical");
3523 defsymbol (&QnoBreak, "noBreak");
3524 defsymbol (&Qfraction, "fraction");
3525 defsymbol (&Qsuper, "super");
3526 defsymbol (&Qsub, "sub");
3527 defsymbol (&Qcircle, "circle");
3528 defsymbol (&Qsquare, "square");
3529 defsymbol (&Qwide, "wide");
3530 defsymbol (&Qnarrow, "narrow");
3531 defsymbol (&Qsmall, "small");
3532 defsymbol (&Qfont, "font");
3533 defsymbol (&Qucs, "ucs");
3534 defsymbol (&Qucs_bmp, "ucs-bmp");
3535 defsymbol (&Qucs_cns, "ucs-cns");
3536 defsymbol (&Qlatin_viscii, "latin-viscii");
3537 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3538 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3539 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3540 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3541 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3542 defsymbol (&Qideograph_gt, "ideograph-gt");
3543 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
3544 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
3545 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
3546 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
3547 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
3548 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
3549 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
3550 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
3551 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
3552 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
3553 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
3554 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
3555 defsymbol (&Qchinese_big5, "chinese-big5");
3556 defsymbol (&Qmojikyo, "mojikyo");
3557 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
3558 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
3559 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
3560 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
3561 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
3562 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
3563 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
3564 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
3565 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
3566 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
3567 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
3568 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
3569 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
3570 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
3571 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
3572 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
3573 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
3574 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
3575 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
3576 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
3577 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
3578 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
3579 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3581 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3582 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3584 defsymbol (&Qcomposite, "composite");
3588 vars_of_mule_charset (void)
3595 chlook = xnew (struct charset_lookup);
3596 dumpstruct (&chlook, &charset_lookup_description);
3598 /* Table of charsets indexed by leading byte. */
3599 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3600 chlook->charset_by_leading_byte[i] = Qnil;
3603 /* Table of charsets indexed by type/final-byte. */
3604 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3605 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3606 chlook->charset_by_attributes[i][j] = Qnil;
3608 /* Table of charsets indexed by type/final-byte/direction. */
3609 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3610 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3611 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3612 chlook->charset_by_attributes[i][j][k] = Qnil;
3616 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3618 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3619 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3623 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3624 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3625 Leading-code of private TYPE9N charset of column-width 1.
3627 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3631 Vutf_2000_version = build_string("0.17 (Hōryūji)");
3632 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
3633 Version number of UTF-2000.
3636 staticpro (&Vcharacter_composition_table);
3637 Vcharacter_composition_table = make_char_id_table (Qnil);
3639 staticpro (&Vcharacter_variant_table);
3640 Vcharacter_variant_table = make_char_id_table (Qnil);
3642 Vdefault_coded_charset_priority_list = Qnil;
3643 DEFVAR_LISP ("default-coded-charset-priority-list",
3644 &Vdefault_coded_charset_priority_list /*
3645 Default order of preferred coded-character-sets.
3651 complex_vars_of_mule_charset (void)
3653 staticpro (&Vcharset_hash_table);
3654 Vcharset_hash_table =
3655 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3657 /* Predefined character sets. We store them into variables for
3661 staticpro (&Vchar_attribute_hash_table);
3662 Vchar_attribute_hash_table
3663 = make_lisp_hash_table (16, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3665 staticpro (&Vcharset_ucs);
3667 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
3668 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3669 build_string ("UCS"),
3670 build_string ("UCS"),
3671 build_string ("ISO/IEC 10646"),
3673 Qnil, 0, 0xFFFFFFF, 0, 0);
3674 staticpro (&Vcharset_ucs_bmp);
3676 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3677 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3678 build_string ("BMP"),
3679 build_string ("BMP"),
3680 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3681 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
3682 Qnil, 0, 0xFFFF, 0, 0);
3683 staticpro (&Vcharset_ucs_cns);
3685 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
3686 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3687 build_string ("UCS for CNS"),
3688 build_string ("UCS for CNS 11643"),
3689 build_string ("ISO/IEC 10646 for CNS 11643"),
3693 # define MIN_CHAR_THAI 0
3694 # define MAX_CHAR_THAI 0
3695 # define MIN_CHAR_HEBREW 0
3696 # define MAX_CHAR_HEBREW 0
3697 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3698 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3700 staticpro (&Vcharset_ascii);
3702 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3703 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3704 build_string ("ASCII"),
3705 build_string ("ASCII)"),
3706 build_string ("ASCII (ISO646 IRV)"),
3707 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3708 Qnil, 0, 0x7F, 0, 0);
3709 staticpro (&Vcharset_control_1);
3710 Vcharset_control_1 =
3711 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3712 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3713 build_string ("C1"),
3714 build_string ("Control characters"),
3715 build_string ("Control characters 128-191"),
3717 Qnil, 0x80, 0x9F, 0, 0);
3718 staticpro (&Vcharset_latin_iso8859_1);
3719 Vcharset_latin_iso8859_1 =
3720 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3721 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3722 build_string ("Latin-1"),
3723 build_string ("ISO8859-1 (Latin-1)"),
3724 build_string ("ISO8859-1 (Latin-1)"),
3725 build_string ("iso8859-1"),
3726 Qnil, 0xA0, 0xFF, 0, 32);
3727 staticpro (&Vcharset_latin_iso8859_2);
3728 Vcharset_latin_iso8859_2 =
3729 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3730 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3731 build_string ("Latin-2"),
3732 build_string ("ISO8859-2 (Latin-2)"),
3733 build_string ("ISO8859-2 (Latin-2)"),
3734 build_string ("iso8859-2"),
3736 staticpro (&Vcharset_latin_iso8859_3);
3737 Vcharset_latin_iso8859_3 =
3738 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3739 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3740 build_string ("Latin-3"),
3741 build_string ("ISO8859-3 (Latin-3)"),
3742 build_string ("ISO8859-3 (Latin-3)"),
3743 build_string ("iso8859-3"),
3745 staticpro (&Vcharset_latin_iso8859_4);
3746 Vcharset_latin_iso8859_4 =
3747 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3748 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3749 build_string ("Latin-4"),
3750 build_string ("ISO8859-4 (Latin-4)"),
3751 build_string ("ISO8859-4 (Latin-4)"),
3752 build_string ("iso8859-4"),
3754 staticpro (&Vcharset_thai_tis620);
3755 Vcharset_thai_tis620 =
3756 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3757 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3758 build_string ("TIS620"),
3759 build_string ("TIS620 (Thai)"),
3760 build_string ("TIS620.2529 (Thai)"),
3761 build_string ("tis620"),
3762 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3763 staticpro (&Vcharset_greek_iso8859_7);
3764 Vcharset_greek_iso8859_7 =
3765 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3766 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3767 build_string ("ISO8859-7"),
3768 build_string ("ISO8859-7 (Greek)"),
3769 build_string ("ISO8859-7 (Greek)"),
3770 build_string ("iso8859-7"),
3772 0 /* MIN_CHAR_GREEK */,
3773 0 /* MAX_CHAR_GREEK */, 0, 32);
3774 staticpro (&Vcharset_arabic_iso8859_6);
3775 Vcharset_arabic_iso8859_6 =
3776 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3777 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3778 build_string ("ISO8859-6"),
3779 build_string ("ISO8859-6 (Arabic)"),
3780 build_string ("ISO8859-6 (Arabic)"),
3781 build_string ("iso8859-6"),
3783 staticpro (&Vcharset_hebrew_iso8859_8);
3784 Vcharset_hebrew_iso8859_8 =
3785 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3786 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3787 build_string ("ISO8859-8"),
3788 build_string ("ISO8859-8 (Hebrew)"),
3789 build_string ("ISO8859-8 (Hebrew)"),
3790 build_string ("iso8859-8"),
3791 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3792 staticpro (&Vcharset_katakana_jisx0201);
3793 Vcharset_katakana_jisx0201 =
3794 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3795 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3796 build_string ("JISX0201 Kana"),
3797 build_string ("JISX0201.1976 (Japanese Kana)"),
3798 build_string ("JISX0201.1976 Japanese Kana"),
3799 build_string ("jisx0201\\.1976"),
3801 staticpro (&Vcharset_latin_jisx0201);
3802 Vcharset_latin_jisx0201 =
3803 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3804 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3805 build_string ("JISX0201 Roman"),
3806 build_string ("JISX0201.1976 (Japanese Roman)"),
3807 build_string ("JISX0201.1976 Japanese Roman"),
3808 build_string ("jisx0201\\.1976"),
3810 staticpro (&Vcharset_cyrillic_iso8859_5);
3811 Vcharset_cyrillic_iso8859_5 =
3812 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3813 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3814 build_string ("ISO8859-5"),
3815 build_string ("ISO8859-5 (Cyrillic)"),
3816 build_string ("ISO8859-5 (Cyrillic)"),
3817 build_string ("iso8859-5"),
3819 0 /* MIN_CHAR_CYRILLIC */,
3820 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3821 staticpro (&Vcharset_latin_iso8859_9);
3822 Vcharset_latin_iso8859_9 =
3823 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3824 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3825 build_string ("Latin-5"),
3826 build_string ("ISO8859-9 (Latin-5)"),
3827 build_string ("ISO8859-9 (Latin-5)"),
3828 build_string ("iso8859-9"),
3830 staticpro (&Vcharset_japanese_jisx0208_1978);
3831 Vcharset_japanese_jisx0208_1978 =
3832 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3833 Qjapanese_jisx0208_1978, 94, 2,
3834 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3835 build_string ("JIS X0208:1978"),
3836 build_string ("JIS X0208:1978 (Japanese)"),
3838 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3839 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3841 staticpro (&Vcharset_chinese_gb2312);
3842 Vcharset_chinese_gb2312 =
3843 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3844 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3845 build_string ("GB2312"),
3846 build_string ("GB2312)"),
3847 build_string ("GB2312 Chinese simplified"),
3848 build_string ("gb2312"),
3850 staticpro (&Vcharset_chinese_gb12345);
3851 Vcharset_chinese_gb12345 =
3852 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
3853 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3854 build_string ("G1"),
3855 build_string ("GB 12345)"),
3856 build_string ("GB 12345-1990"),
3857 build_string ("GB12345\\(\\.1990\\)?-0"),
3859 staticpro (&Vcharset_japanese_jisx0208);
3860 Vcharset_japanese_jisx0208 =
3861 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3862 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3863 build_string ("JISX0208"),
3864 build_string ("JIS X0208:1983 (Japanese)"),
3865 build_string ("JIS X0208:1983 Japanese Kanji"),
3866 build_string ("jisx0208\\.1983"),
3869 staticpro (&Vcharset_japanese_jisx0208_1990);
3870 Vcharset_japanese_jisx0208_1990 =
3871 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3872 Qjapanese_jisx0208_1990, 94, 2,
3873 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3874 build_string ("JISX0208-1990"),
3875 build_string ("JIS X0208:1990 (Japanese)"),
3876 build_string ("JIS X0208:1990 Japanese Kanji"),
3877 build_string ("jisx0208\\.1990"),
3879 MIN_CHAR_JIS_X0208_1990,
3880 MAX_CHAR_JIS_X0208_1990, 0, 33);
3882 staticpro (&Vcharset_korean_ksc5601);
3883 Vcharset_korean_ksc5601 =
3884 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3885 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3886 build_string ("KSC5601"),
3887 build_string ("KSC5601 (Korean"),
3888 build_string ("KSC5601 Korean Hangul and Hanja"),
3889 build_string ("ksc5601"),
3891 staticpro (&Vcharset_japanese_jisx0212);
3892 Vcharset_japanese_jisx0212 =
3893 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3894 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3895 build_string ("JISX0212"),
3896 build_string ("JISX0212 (Japanese)"),
3897 build_string ("JISX0212 Japanese Supplement"),
3898 build_string ("jisx0212"),
3901 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3902 staticpro (&Vcharset_chinese_cns11643_1);
3903 Vcharset_chinese_cns11643_1 =
3904 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3905 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3906 build_string ("CNS11643-1"),
3907 build_string ("CNS11643-1 (Chinese traditional)"),
3909 ("CNS 11643 Plane 1 Chinese traditional"),
3910 build_string (CHINESE_CNS_PLANE_RE("1")),
3912 staticpro (&Vcharset_chinese_cns11643_2);
3913 Vcharset_chinese_cns11643_2 =
3914 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3915 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3916 build_string ("CNS11643-2"),
3917 build_string ("CNS11643-2 (Chinese traditional)"),
3919 ("CNS 11643 Plane 2 Chinese traditional"),
3920 build_string (CHINESE_CNS_PLANE_RE("2")),
3923 staticpro (&Vcharset_latin_tcvn5712);
3924 Vcharset_latin_tcvn5712 =
3925 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3926 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3927 build_string ("TCVN 5712"),
3928 build_string ("TCVN 5712 (VSCII-2)"),
3929 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3930 build_string ("tcvn5712-1"),
3932 staticpro (&Vcharset_latin_viscii_lower);
3933 Vcharset_latin_viscii_lower =
3934 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3935 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3936 build_string ("VISCII lower"),
3937 build_string ("VISCII lower (Vietnamese)"),
3938 build_string ("VISCII lower (Vietnamese)"),
3939 build_string ("MULEVISCII-LOWER"),
3941 staticpro (&Vcharset_latin_viscii_upper);
3942 Vcharset_latin_viscii_upper =
3943 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3944 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3945 build_string ("VISCII upper"),
3946 build_string ("VISCII upper (Vietnamese)"),
3947 build_string ("VISCII upper (Vietnamese)"),
3948 build_string ("MULEVISCII-UPPER"),
3950 staticpro (&Vcharset_latin_viscii);
3951 Vcharset_latin_viscii =
3952 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3953 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3954 build_string ("VISCII"),
3955 build_string ("VISCII 1.1 (Vietnamese)"),
3956 build_string ("VISCII 1.1 (Vietnamese)"),
3957 build_string ("VISCII1\\.1"),
3959 staticpro (&Vcharset_chinese_big5);
3960 Vcharset_chinese_big5 =
3961 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3962 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3963 build_string ("Big5"),
3964 build_string ("Big5"),
3965 build_string ("Big5 Chinese traditional"),
3966 build_string ("big5"),
3968 staticpro (&Vcharset_ideograph_gt);
3969 Vcharset_ideograph_gt =
3970 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
3971 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3972 build_string ("GT"),
3973 build_string ("GT"),
3974 build_string ("GT"),
3976 Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0);
3977 #define DEF_GT_PJ(n) \
3978 staticpro (&Vcharset_ideograph_gt_pj_##n); \
3979 Vcharset_ideograph_gt_pj_##n = \
3980 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
3981 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3982 build_string ("GT-PJ-"#n), \
3983 build_string ("GT (pseudo JIS encoding) part "#n), \
3984 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
3986 ("\\(GT2000PJ-"#n "\\|jisx0208\\.GT2000-"#n "\\)$"), \
4000 staticpro (&Vcharset_ideograph_daikanwa);
4001 Vcharset_ideograph_daikanwa =
4002 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
4003 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4004 build_string ("Daikanwa"),
4005 build_string ("Morohashi's Daikanwa"),
4006 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
4007 build_string ("Daikanwa"),
4008 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
4009 staticpro (&Vcharset_mojikyo);
4011 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
4012 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4013 build_string ("Mojikyo"),
4014 build_string ("Mojikyo"),
4015 build_string ("Konjaku-Mojikyo"),
4017 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
4018 staticpro (&Vcharset_mojikyo_2022_1);
4019 Vcharset_mojikyo_2022_1 =
4020 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
4021 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
4022 build_string ("Mojikyo-2022-1"),
4023 build_string ("Mojikyo ISO-2022 Part 1"),
4024 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
4028 #define DEF_MOJIKYO_PJ(n) \
4029 staticpro (&Vcharset_mojikyo_pj_##n); \
4030 Vcharset_mojikyo_pj_##n = \
4031 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
4032 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
4033 build_string ("Mojikyo-PJ-"#n), \
4034 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
4036 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
4038 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
4050 DEF_MOJIKYO_PJ (10);
4051 DEF_MOJIKYO_PJ (11);
4052 DEF_MOJIKYO_PJ (12);
4053 DEF_MOJIKYO_PJ (13);
4054 DEF_MOJIKYO_PJ (14);
4055 DEF_MOJIKYO_PJ (15);
4056 DEF_MOJIKYO_PJ (16);
4057 DEF_MOJIKYO_PJ (17);
4058 DEF_MOJIKYO_PJ (18);
4059 DEF_MOJIKYO_PJ (19);
4060 DEF_MOJIKYO_PJ (20);
4061 DEF_MOJIKYO_PJ (21);
4063 staticpro (&Vcharset_ethiopic_ucs);
4064 Vcharset_ethiopic_ucs =
4065 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
4066 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4067 build_string ("Ethiopic (UCS)"),
4068 build_string ("Ethiopic (UCS)"),
4069 build_string ("Ethiopic of UCS"),
4070 build_string ("Ethiopic-Unicode"),
4071 Qnil, 0x1200, 0x137F, 0x1200, 0);
4073 staticpro (&Vcharset_chinese_big5_1);
4074 Vcharset_chinese_big5_1 =
4075 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
4076 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
4077 build_string ("Big5"),
4078 build_string ("Big5 (Level-1)"),
4080 ("Big5 Level-1 Chinese traditional"),
4081 build_string ("big5"),
4083 staticpro (&Vcharset_chinese_big5_2);
4084 Vcharset_chinese_big5_2 =
4085 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
4086 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
4087 build_string ("Big5"),
4088 build_string ("Big5 (Level-2)"),
4090 ("Big5 Level-2 Chinese traditional"),
4091 build_string ("big5"),
4094 #ifdef ENABLE_COMPOSITE_CHARS
4095 /* #### For simplicity, we put composite chars into a 96x96 charset.
4096 This is going to lead to problems because you can run out of
4097 room, esp. as we don't yet recycle numbers. */
4098 staticpro (&Vcharset_composite);
4099 Vcharset_composite =
4100 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
4101 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4102 build_string ("Composite"),
4103 build_string ("Composite characters"),
4104 build_string ("Composite characters"),
4107 /* #### not dumped properly */
4108 composite_char_row_next = 32;
4109 composite_char_col_next = 32;
4111 Vcomposite_char_string2char_hash_table =
4112 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
4113 Vcomposite_char_char2string_hash_table =
4114 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
4115 staticpro (&Vcomposite_char_string2char_hash_table);
4116 staticpro (&Vcomposite_char_char2string_hash_table);
4117 #endif /* ENABLE_COMPOSITE_CHARS */