1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_japanese_jisx0208;
60 Lisp_Object Vcharset_japanese_jisx0208_1990;
61 Lisp_Object Vcharset_korean_ksc5601;
62 Lisp_Object Vcharset_japanese_jisx0212;
63 Lisp_Object Vcharset_chinese_cns11643_1;
64 Lisp_Object Vcharset_chinese_cns11643_2;
66 Lisp_Object Vcharset_ucs;
67 Lisp_Object Vcharset_ucs_bmp;
68 Lisp_Object Vcharset_latin_viscii;
69 Lisp_Object Vcharset_latin_tcvn5712;
70 Lisp_Object Vcharset_latin_viscii_lower;
71 Lisp_Object Vcharset_latin_viscii_upper;
72 Lisp_Object Vcharset_chinese_big5;
73 Lisp_Object Vcharset_ideograph_daikanwa;
74 Lisp_Object Vcharset_mojikyo;
75 Lisp_Object Vcharset_mojikyo_2022_1;
76 Lisp_Object Vcharset_mojikyo_pj_1;
77 Lisp_Object Vcharset_mojikyo_pj_2;
78 Lisp_Object Vcharset_mojikyo_pj_3;
79 Lisp_Object Vcharset_mojikyo_pj_4;
80 Lisp_Object Vcharset_mojikyo_pj_5;
81 Lisp_Object Vcharset_mojikyo_pj_6;
82 Lisp_Object Vcharset_mojikyo_pj_7;
83 Lisp_Object Vcharset_mojikyo_pj_8;
84 Lisp_Object Vcharset_mojikyo_pj_9;
85 Lisp_Object Vcharset_mojikyo_pj_10;
86 Lisp_Object Vcharset_mojikyo_pj_11;
87 Lisp_Object Vcharset_mojikyo_pj_12;
88 Lisp_Object Vcharset_mojikyo_pj_13;
89 Lisp_Object Vcharset_mojikyo_pj_14;
90 Lisp_Object Vcharset_mojikyo_pj_15;
91 Lisp_Object Vcharset_mojikyo_pj_16;
92 Lisp_Object Vcharset_mojikyo_pj_17;
93 Lisp_Object Vcharset_mojikyo_pj_18;
94 Lisp_Object Vcharset_mojikyo_pj_19;
95 Lisp_Object Vcharset_mojikyo_pj_20;
96 Lisp_Object Vcharset_mojikyo_pj_21;
97 Lisp_Object Vcharset_ethiopic_ucs;
99 Lisp_Object Vcharset_chinese_big5_1;
100 Lisp_Object Vcharset_chinese_big5_2;
102 #ifdef ENABLE_COMPOSITE_CHARS
103 Lisp_Object Vcharset_composite;
105 /* Hash tables for composite chars. One maps string representing
106 composed chars to their equivalent chars; one goes the
108 Lisp_Object Vcomposite_char_char2string_hash_table;
109 Lisp_Object Vcomposite_char_string2char_hash_table;
111 static int composite_char_row_next;
112 static int composite_char_col_next;
114 #endif /* ENABLE_COMPOSITE_CHARS */
116 struct charset_lookup *chlook;
118 static const struct lrecord_description charset_lookup_description_1[] = {
119 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
128 static const struct struct_description charset_lookup_description = {
129 sizeof (struct charset_lookup),
130 charset_lookup_description_1
134 /* Table of number of bytes in the string representation of a character
135 indexed by the first byte of that representation.
137 rep_bytes_by_first_byte(c) is more efficient than the equivalent
138 canonical computation:
140 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
142 const Bytecount rep_bytes_by_first_byte[0xA0] =
143 { /* 0x00 - 0x7f are for straight ASCII */
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
147 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
148 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
149 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
150 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
151 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
152 /* 0x80 - 0x8f are for Dimension-1 official charsets */
154 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
156 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
158 /* 0x90 - 0x9d are for Dimension-2 official charsets */
159 /* 0x9e is for Dimension-1 private charsets */
160 /* 0x9f is for Dimension-2 private charsets */
161 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
167 #define BT_UINT8_MIN 0
168 #define BT_UINT8_MAX (UCHAR_MAX - 3)
169 #define BT_UINT8_t (UCHAR_MAX - 2)
170 #define BT_UINT8_nil (UCHAR_MAX - 1)
171 #define BT_UINT8_unbound UCHAR_MAX
173 INLINE_HEADER int INT_UINT8_P (Lisp_Object obj);
174 INLINE_HEADER int UINT8_VALUE_P (Lisp_Object obj);
175 INLINE_HEADER unsigned char UINT8_ENCODE (Lisp_Object obj);
176 INLINE_HEADER Lisp_Object UINT8_DECODE (unsigned char n);
177 INLINE_HEADER unsigned short UINT8_TO_UINT16 (unsigned char n);
180 INT_UINT8_P (Lisp_Object obj)
184 int num = XINT (obj);
186 return (BT_UINT8_MIN <= num) && (num <= BT_UINT8_MAX);
193 UINT8_VALUE_P (Lisp_Object obj)
195 return EQ (obj, Qunbound)
196 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT8_P (obj);
199 INLINE_HEADER unsigned char
200 UINT8_ENCODE (Lisp_Object obj)
202 if (EQ (obj, Qunbound))
203 return BT_UINT8_unbound;
204 else if (EQ (obj, Qnil))
206 else if (EQ (obj, Qt))
212 INLINE_HEADER Lisp_Object
213 UINT8_DECODE (unsigned char n)
215 if (n == BT_UINT8_unbound)
217 else if (n == BT_UINT8_nil)
219 else if (n == BT_UINT8_t)
226 mark_uint8_byte_table (Lisp_Object obj)
232 print_uint8_byte_table (Lisp_Object obj,
233 Lisp_Object printcharfun, int escapeflag)
235 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
237 struct gcpro gcpro1, gcpro2;
238 GCPRO2 (obj, printcharfun);
240 write_c_string ("\n#<uint8-byte-table", printcharfun);
241 for (i = 0; i < 256; i++)
243 unsigned char n = bte->property[i];
245 write_c_string ("\n ", printcharfun);
246 write_c_string (" ", printcharfun);
247 if (n == BT_UINT8_unbound)
248 write_c_string ("void", printcharfun);
249 else if (n == BT_UINT8_nil)
250 write_c_string ("nil", printcharfun);
251 else if (n == BT_UINT8_t)
252 write_c_string ("t", printcharfun);
257 sprintf (buf, "%hd", n);
258 write_c_string (buf, printcharfun);
262 write_c_string (">", printcharfun);
266 uint8_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
268 Lisp_Uint8_Byte_Table *te1 = XUINT8_BYTE_TABLE (obj1);
269 Lisp_Uint8_Byte_Table *te2 = XUINT8_BYTE_TABLE (obj2);
272 for (i = 0; i < 256; i++)
273 if (te1->property[i] != te2->property[i])
279 uint8_byte_table_hash (Lisp_Object obj, int depth)
281 Lisp_Uint8_Byte_Table *te = XUINT8_BYTE_TABLE (obj);
285 for (i = 0; i < 256; i++)
286 hash = HASH2 (hash, te->property[i]);
290 DEFINE_LRECORD_IMPLEMENTATION ("uint8-byte-table", uint8_byte_table,
291 mark_uint8_byte_table,
292 print_uint8_byte_table,
293 0, uint8_byte_table_equal,
294 uint8_byte_table_hash,
295 0 /* uint8_byte_table_description */,
296 Lisp_Uint8_Byte_Table);
299 make_uint8_byte_table (unsigned char initval)
303 Lisp_Uint8_Byte_Table *cte;
305 cte = alloc_lcrecord_type (Lisp_Uint8_Byte_Table,
306 &lrecord_uint8_byte_table);
308 for (i = 0; i < 256; i++)
309 cte->property[i] = initval;
311 XSETUINT8_BYTE_TABLE (obj, cte);
316 uint8_byte_table_same_value_p (Lisp_Object obj)
318 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
319 unsigned char v0 = bte->property[0];
322 for (i = 1; i < 256; i++)
324 if (bte->property[i] != v0)
331 #define BT_UINT16_MIN 0
332 #define BT_UINT16_MAX (USHRT_MAX - 3)
333 #define BT_UINT16_t (USHRT_MAX - 2)
334 #define BT_UINT16_nil (USHRT_MAX - 1)
335 #define BT_UINT16_unbound USHRT_MAX
337 INLINE_HEADER int INT_UINT16_P (Lisp_Object obj);
338 INLINE_HEADER int UINT16_VALUE_P (Lisp_Object obj);
339 INLINE_HEADER unsigned short UINT16_ENCODE (Lisp_Object obj);
340 INLINE_HEADER Lisp_Object UINT16_DECODE (unsigned short us);
343 INT_UINT16_P (Lisp_Object obj)
347 int num = XINT (obj);
349 return (BT_UINT16_MIN <= num) && (num <= BT_UINT16_MAX);
356 UINT16_VALUE_P (Lisp_Object obj)
358 return EQ (obj, Qunbound)
359 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT16_P (obj);
362 INLINE_HEADER unsigned short
363 UINT16_ENCODE (Lisp_Object obj)
365 if (EQ (obj, Qunbound))
366 return BT_UINT16_unbound;
367 else if (EQ (obj, Qnil))
368 return BT_UINT16_nil;
369 else if (EQ (obj, Qt))
375 INLINE_HEADER Lisp_Object
376 UINT16_DECODE (unsigned short n)
378 if (n == BT_UINT16_unbound)
380 else if (n == BT_UINT16_nil)
382 else if (n == BT_UINT16_t)
388 INLINE_HEADER unsigned short
389 UINT8_TO_UINT16 (unsigned char n)
391 if (n == BT_UINT8_unbound)
392 return BT_UINT16_unbound;
393 else if (n == BT_UINT8_nil)
394 return BT_UINT16_nil;
395 else if (n == BT_UINT8_t)
402 mark_uint16_byte_table (Lisp_Object obj)
408 print_uint16_byte_table (Lisp_Object obj,
409 Lisp_Object printcharfun, int escapeflag)
411 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
413 struct gcpro gcpro1, gcpro2;
414 GCPRO2 (obj, printcharfun);
416 write_c_string ("\n#<uint16-byte-table", printcharfun);
417 for (i = 0; i < 256; i++)
419 unsigned short n = bte->property[i];
421 write_c_string ("\n ", printcharfun);
422 write_c_string (" ", printcharfun);
423 if (n == BT_UINT16_unbound)
424 write_c_string ("void", printcharfun);
425 else if (n == BT_UINT16_nil)
426 write_c_string ("nil", printcharfun);
427 else if (n == BT_UINT16_t)
428 write_c_string ("t", printcharfun);
433 sprintf (buf, "%hd", n);
434 write_c_string (buf, printcharfun);
438 write_c_string (">", printcharfun);
442 uint16_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
444 Lisp_Uint16_Byte_Table *te1 = XUINT16_BYTE_TABLE (obj1);
445 Lisp_Uint16_Byte_Table *te2 = XUINT16_BYTE_TABLE (obj2);
448 for (i = 0; i < 256; i++)
449 if (te1->property[i] != te2->property[i])
455 uint16_byte_table_hash (Lisp_Object obj, int depth)
457 Lisp_Uint16_Byte_Table *te = XUINT16_BYTE_TABLE (obj);
461 for (i = 0; i < 256; i++)
462 hash = HASH2 (hash, te->property[i]);
466 DEFINE_LRECORD_IMPLEMENTATION ("uint16-byte-table", uint16_byte_table,
467 mark_uint16_byte_table,
468 print_uint16_byte_table,
469 0, uint16_byte_table_equal,
470 uint16_byte_table_hash,
471 0 /* uint16_byte_table_description */,
472 Lisp_Uint16_Byte_Table);
475 make_uint16_byte_table (unsigned short initval)
479 Lisp_Uint16_Byte_Table *cte;
481 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
482 &lrecord_uint16_byte_table);
484 for (i = 0; i < 256; i++)
485 cte->property[i] = initval;
487 XSETUINT16_BYTE_TABLE (obj, cte);
492 expand_uint8_byte_table_to_uint16 (Lisp_Object table)
496 Lisp_Uint8_Byte_Table* bte = XUINT8_BYTE_TABLE(table);
497 Lisp_Uint16_Byte_Table* cte;
499 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
500 &lrecord_uint16_byte_table);
501 for (i = 0; i < 256; i++)
503 cte->property[i] = UINT8_TO_UINT16 (bte->property[i]);
505 XSETUINT16_BYTE_TABLE (obj, cte);
510 uint16_byte_table_same_value_p (Lisp_Object obj)
512 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
513 unsigned short v0 = bte->property[0];
516 for (i = 1; i < 256; i++)
518 if (bte->property[i] != v0)
526 mark_byte_table (Lisp_Object obj)
528 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
531 for (i = 0; i < 256; i++)
533 mark_object (cte->property[i]);
539 print_byte_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
541 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
543 struct gcpro gcpro1, gcpro2;
544 GCPRO2 (obj, printcharfun);
546 write_c_string ("\n#<byte-table", printcharfun);
547 for (i = 0; i < 256; i++)
549 Lisp_Object elt = bte->property[i];
551 write_c_string ("\n ", printcharfun);
552 write_c_string (" ", printcharfun);
553 if (EQ (elt, Qunbound))
554 write_c_string ("void", printcharfun);
556 print_internal (elt, printcharfun, escapeflag);
559 write_c_string (">", printcharfun);
563 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
565 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
566 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
569 for (i = 0; i < 256; i++)
570 if (BYTE_TABLE_P (cte1->property[i]))
572 if (BYTE_TABLE_P (cte2->property[i]))
574 if (!byte_table_equal (cte1->property[i],
575 cte2->property[i], depth + 1))
582 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
588 byte_table_hash (Lisp_Object obj, int depth)
590 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
592 return internal_array_hash (cte->property, 256, depth);
595 static const struct lrecord_description byte_table_description[] = {
596 { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Byte_Table, property), 256 },
600 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
605 byte_table_description,
609 make_byte_table (Lisp_Object initval)
613 Lisp_Byte_Table *cte;
615 cte = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
617 for (i = 0; i < 256; i++)
618 cte->property[i] = initval;
620 XSETBYTE_TABLE (obj, cte);
625 byte_table_same_value_p (Lisp_Object obj)
627 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
628 Lisp_Object v0 = bte->property[0];
631 for (i = 1; i < 256; i++)
633 if (!internal_equal (bte->property[i], v0, 0))
640 Lisp_Object get_byte_table (Lisp_Object table, unsigned char idx);
641 Lisp_Object put_byte_table (Lisp_Object table, unsigned char idx,
645 get_byte_table (Lisp_Object table, unsigned char idx)
647 if (UINT8_BYTE_TABLE_P (table))
648 return UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[idx]);
649 else if (UINT16_BYTE_TABLE_P (table))
650 return UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[idx]);
651 else if (BYTE_TABLE_P (table))
652 return XBYTE_TABLE(table)->property[idx];
658 put_byte_table (Lisp_Object table, unsigned char idx, Lisp_Object value)
660 if (UINT8_BYTE_TABLE_P (table))
662 if (UINT8_VALUE_P (value))
664 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
665 if (!UINT8_BYTE_TABLE_P (value) &&
666 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
667 && uint8_byte_table_same_value_p (table))
672 else if (UINT16_VALUE_P (value))
674 Lisp_Object new = expand_uint8_byte_table_to_uint16 (table);
676 XUINT16_BYTE_TABLE(new)->property[idx] = UINT16_ENCODE (value);
681 Lisp_Object new = make_byte_table (Qnil);
684 for (i = 0; i < 256; i++)
686 XBYTE_TABLE(new)->property[i]
687 = UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[i]);
689 XBYTE_TABLE(new)->property[idx] = value;
693 else if (UINT16_BYTE_TABLE_P (table))
695 if (UINT16_VALUE_P (value))
697 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
698 if (!UINT8_BYTE_TABLE_P (value) &&
699 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
700 && uint16_byte_table_same_value_p (table))
707 Lisp_Object new = make_byte_table (Qnil);
710 for (i = 0; i < 256; i++)
712 XBYTE_TABLE(new)->property[i]
713 = UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[i]);
715 XBYTE_TABLE(new)->property[idx] = value;
719 else if (BYTE_TABLE_P (table))
721 XBYTE_TABLE(table)->property[idx] = value;
722 if (!UINT8_BYTE_TABLE_P (value) &&
723 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
724 && byte_table_same_value_p (table))
729 else if (!internal_equal (table, value, 0))
731 if (UINT8_VALUE_P (table) && UINT8_VALUE_P (value))
733 table = make_uint8_byte_table (UINT8_ENCODE (table));
734 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
736 else if (UINT16_VALUE_P (table) && UINT16_VALUE_P (value))
738 table = make_uint16_byte_table (UINT16_ENCODE (table));
739 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
743 table = make_byte_table (table);
744 XBYTE_TABLE(table)->property[idx] = value;
751 mark_char_id_table (Lisp_Object obj)
753 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
759 print_char_id_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
761 Lisp_Object table = XCHAR_ID_TABLE (obj)->table;
763 struct gcpro gcpro1, gcpro2;
764 GCPRO2 (obj, printcharfun);
766 write_c_string ("#<char-id-table ", printcharfun);
767 for (i = 0; i < 256; i++)
769 Lisp_Object elt = get_byte_table (table, i);
770 if (i != 0) write_c_string ("\n ", printcharfun);
771 if (EQ (elt, Qunbound))
772 write_c_string ("void", printcharfun);
774 print_internal (elt, printcharfun, escapeflag);
777 write_c_string (">", printcharfun);
781 char_id_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
783 Lisp_Object table1 = XCHAR_ID_TABLE (obj1)->table;
784 Lisp_Object table2 = XCHAR_ID_TABLE (obj2)->table;
787 for (i = 0; i < 256; i++)
789 if (!internal_equal (get_byte_table (table1, i),
790 get_byte_table (table2, i), 0))
797 char_id_table_hash (Lisp_Object obj, int depth)
799 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
801 return char_id_table_hash (cte->table, depth + 1);
804 static const struct lrecord_description char_id_table_description[] = {
805 { XD_LISP_OBJECT, offsetof(Lisp_Char_ID_Table, table) },
809 DEFINE_LRECORD_IMPLEMENTATION ("char-id-table", char_id_table,
812 0, char_id_table_equal,
814 char_id_table_description,
818 make_char_id_table (Lisp_Object initval)
821 Lisp_Char_ID_Table *cte;
823 cte = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
825 cte->table = make_byte_table (initval);
827 XSETCHAR_ID_TABLE (obj, cte);
833 get_char_id_table (Emchar ch, Lisp_Object table)
835 unsigned int code = ch;
842 (XCHAR_ID_TABLE (table)->table,
843 (unsigned char)(code >> 24)),
844 (unsigned char) (code >> 16)),
845 (unsigned char) (code >> 8)),
846 (unsigned char) code);
849 void put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table);
851 put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table)
853 unsigned int code = ch;
854 Lisp_Object table1, table2, table3, table4;
856 table1 = XCHAR_ID_TABLE (table)->table;
857 table2 = get_byte_table (table1, (unsigned char)(code >> 24));
858 table3 = get_byte_table (table2, (unsigned char)(code >> 16));
859 table4 = get_byte_table (table3, (unsigned char)(code >> 8));
861 table4 = put_byte_table (table4, (unsigned char)code, value);
862 table3 = put_byte_table (table3, (unsigned char)(code >> 8), table4);
863 table2 = put_byte_table (table2, (unsigned char)(code >> 16), table3);
864 XCHAR_ID_TABLE (table)->table
865 = put_byte_table (table1, (unsigned char)(code >> 24), table2);
869 Lisp_Object Vchar_attribute_hash_table;
870 Lisp_Object Vcharacter_composition_table;
871 Lisp_Object Vcharacter_variant_table;
873 Lisp_Object Qideograph_daikanwa;
874 Lisp_Object Q_decomposition;
878 Lisp_Object Qisolated;
879 Lisp_Object Qinitial;
882 Lisp_Object Qvertical;
883 Lisp_Object QnoBreak;
884 Lisp_Object Qfraction;
894 Emchar to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
896 Lisp_Object put_char_ccs_code_point (Lisp_Object character,
897 Lisp_Object ccs, Lisp_Object value);
898 Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
901 to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
907 else if (EQ (v, Qcompat))
909 else if (EQ (v, Qisolated))
911 else if (EQ (v, Qinitial))
913 else if (EQ (v, Qmedial))
915 else if (EQ (v, Qfinal))
917 else if (EQ (v, Qvertical))
919 else if (EQ (v, QnoBreak))
921 else if (EQ (v, Qfraction))
923 else if (EQ (v, Qsuper))
925 else if (EQ (v, Qsub))
927 else if (EQ (v, Qcircle))
929 else if (EQ (v, Qsquare))
931 else if (EQ (v, Qwide))
933 else if (EQ (v, Qnarrow))
935 else if (EQ (v, Qsmall))
937 else if (EQ (v, Qfont))
940 signal_simple_error (err_msg, err_arg);
943 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
944 Return character corresponding with list.
948 Lisp_Object table = Vcharacter_composition_table;
949 Lisp_Object rest = list;
953 Lisp_Object v = Fcar (rest);
955 Emchar c = to_char_id (v, "Invalid value for composition", list);
957 ret = get_char_id_table (c, table);
962 if (!CHAR_ID_TABLE_P (ret))
967 else if (!CONSP (rest))
969 else if (CHAR_ID_TABLE_P (ret))
972 signal_simple_error ("Invalid table is found with", list);
974 signal_simple_error ("Invalid value for composition", list);
977 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
978 Return variants of CHARACTER.
982 CHECK_CHAR (character);
983 return Fcopy_list (get_char_id_table (XCHAR (character),
984 Vcharacter_variant_table));
988 /* We store the char-attributes in hash tables with the names as the
989 key and the actual char-id-table object as the value. Occasionally
990 we need to use them in a list format. These routines provide us
992 struct char_attribute_list_closure
994 Lisp_Object *char_attribute_list;
998 add_char_attribute_to_list_mapper (Lisp_Object key, Lisp_Object value,
999 void *char_attribute_list_closure)
1001 /* This function can GC */
1002 struct char_attribute_list_closure *calcl
1003 = (struct char_attribute_list_closure*) char_attribute_list_closure;
1004 Lisp_Object *char_attribute_list = calcl->char_attribute_list;
1006 *char_attribute_list = Fcons (key, *char_attribute_list);
1010 DEFUN ("char-attribute-list", Fchar_attribute_list, 0, 0, 0, /*
1011 Return the list of all existing character attributes except coded-charsets.
1015 Lisp_Object char_attribute_list = Qnil;
1016 struct gcpro gcpro1;
1017 struct char_attribute_list_closure char_attribute_list_closure;
1019 GCPRO1 (char_attribute_list);
1020 char_attribute_list_closure.char_attribute_list = &char_attribute_list;
1021 elisp_maphash (add_char_attribute_to_list_mapper,
1022 Vchar_attribute_hash_table,
1023 &char_attribute_list_closure);
1025 return char_attribute_list;
1028 DEFUN ("find-char-attribute-table", Ffind_char_attribute_table, 1, 1, 0, /*
1029 Return char-id-table corresponding to ATTRIBUTE.
1033 return Fgethash (attribute, Vchar_attribute_hash_table, Qnil);
1037 /* We store the char-id-tables in hash tables with the attributes as
1038 the key and the actual char-id-table object as the value. Each
1039 char-id-table stores values of an attribute corresponding with
1040 characters. Occasionally we need to get attributes of a character
1041 in a association-list format. These routines provide us with
1043 struct char_attribute_alist_closure
1046 Lisp_Object *char_attribute_alist;
1050 add_char_attribute_alist_mapper (Lisp_Object key, Lisp_Object value,
1051 void *char_attribute_alist_closure)
1053 /* This function can GC */
1054 struct char_attribute_alist_closure *caacl =
1055 (struct char_attribute_alist_closure*) char_attribute_alist_closure;
1056 Lisp_Object ret = get_char_id_table (caacl->char_id, value);
1057 if (!UNBOUNDP (ret))
1059 Lisp_Object *char_attribute_alist = caacl->char_attribute_alist;
1060 *char_attribute_alist
1061 = Fcons (Fcons (key, ret), *char_attribute_alist);
1066 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
1067 Return the alist of attributes of CHARACTER.
1071 Lisp_Object alist = Qnil;
1074 CHECK_CHAR (character);
1076 struct gcpro gcpro1;
1077 struct char_attribute_alist_closure char_attribute_alist_closure;
1080 char_attribute_alist_closure.char_id = XCHAR (character);
1081 char_attribute_alist_closure.char_attribute_alist = &alist;
1082 elisp_maphash (add_char_attribute_alist_mapper,
1083 Vchar_attribute_hash_table,
1084 &char_attribute_alist_closure);
1088 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1090 Lisp_Object ccs = chlook->charset_by_leading_byte[i];
1094 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1097 if ( CHAR_ID_TABLE_P (encoding_table)
1098 && INTP (cpos = get_char_id_table (XCHAR (character),
1101 alist = Fcons (Fcons (ccs, cpos), alist);
1108 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
1109 Return the value of CHARACTER's ATTRIBUTE.
1111 (character, attribute))
1115 CHECK_CHAR (character);
1116 if (!NILP (ccs = Ffind_charset (attribute)))
1118 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1120 if (CHAR_ID_TABLE_P (encoding_table))
1121 return get_char_id_table (XCHAR (character), encoding_table);
1127 Lisp_Object table = Fgethash (attribute,
1128 Vchar_attribute_hash_table,
1130 if (!UNBOUNDP (table))
1132 Lisp_Object ret = get_char_id_table (XCHAR (character), table);
1133 if (!UNBOUNDP (ret))
1140 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
1141 Store CHARACTER's ATTRIBUTE with VALUE.
1143 (character, attribute, value))
1147 CHECK_CHAR (character);
1148 ccs = Ffind_charset (attribute);
1151 return put_char_ccs_code_point (character, ccs, value);
1153 else if (EQ (attribute, Q_decomposition))
1158 signal_simple_error ("Invalid value for ->decomposition",
1161 if (CONSP (Fcdr (value)))
1163 Lisp_Object rest = value;
1164 Lisp_Object table = Vcharacter_composition_table;
1168 GET_EXTERNAL_LIST_LENGTH (rest, len);
1169 seq = make_vector (len, Qnil);
1171 while (CONSP (rest))
1173 Lisp_Object v = Fcar (rest);
1176 = to_char_id (v, "Invalid value for ->decomposition", value);
1179 XVECTOR_DATA(seq)[i++] = v;
1181 XVECTOR_DATA(seq)[i++] = make_char (c);
1185 put_char_id_table (c, character, table);
1190 ntable = get_char_id_table (c, table);
1191 if (!CHAR_ID_TABLE_P (ntable))
1193 ntable = make_char_id_table (Qnil);
1194 put_char_id_table (c, ntable, table);
1202 Lisp_Object v = Fcar (value);
1206 Emchar c = XINT (v);
1208 = get_char_id_table (c, Vcharacter_variant_table);
1210 if (NILP (Fmemq (v, ret)))
1212 put_char_id_table (c, Fcons (character, ret),
1213 Vcharacter_variant_table);
1216 seq = make_vector (1, v);
1220 else if (EQ (attribute, Q_ucs))
1226 signal_simple_error ("Invalid value for ->ucs", value);
1230 ret = get_char_id_table (c, Vcharacter_variant_table);
1231 if (NILP (Fmemq (character, ret)))
1233 put_char_id_table (c, Fcons (character, ret),
1234 Vcharacter_variant_table);
1238 Lisp_Object table = Fgethash (attribute,
1239 Vchar_attribute_hash_table,
1244 table = make_char_id_table (Qunbound);
1245 Fputhash (attribute, table, Vchar_attribute_hash_table);
1247 put_char_id_table (XCHAR (character), value, table);
1252 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
1253 Remove CHARACTER's ATTRIBUTE.
1255 (character, attribute))
1259 CHECK_CHAR (character);
1260 ccs = Ffind_charset (attribute);
1263 return remove_char_ccs (character, ccs);
1267 Lisp_Object table = Fgethash (attribute,
1268 Vchar_attribute_hash_table,
1270 if (!UNBOUNDP (table))
1272 put_char_id_table (XCHAR (character), Qunbound, table);
1279 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
1281 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
1283 /* ad-hoc method for `ascii' */
1284 if ((CHARSET_CHARS (cs) == 94) &&
1285 (CHARSET_BYTE_OFFSET (cs) != 33))
1286 return 128 - CHARSET_BYTE_OFFSET (cs);
1288 return CHARSET_CHARS (cs);
1291 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
1293 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
1295 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
1299 if (XVECTOR_LENGTH (v) > ccs_len)
1302 for (i = 0; i < XVECTOR_LENGTH (v); i++)
1304 Lisp_Object c = XVECTOR_DATA(v)[i];
1306 if (!NILP (c) && !CHARP (c))
1310 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
1322 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1325 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1335 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1336 nv = XVECTOR_DATA(v)[i];
1342 XVECTOR_DATA(v)[i] = Qnil;
1346 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1347 int code_point, Lisp_Object character);
1349 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1350 int code_point, Lisp_Object character)
1354 int ccs_len = XVECTOR_LENGTH (v);
1359 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1360 nv = XVECTOR_DATA(v)[i];
1364 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
1370 XVECTOR_DATA(v)[i] = character;
1374 put_char_ccs_code_point (Lisp_Object character,
1375 Lisp_Object ccs, Lisp_Object value)
1377 Lisp_Object encoding_table;
1379 if (!EQ (XCHARSET_NAME (ccs), Qucs)
1380 || (XCHAR (character) != XINT (value)))
1382 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
1383 int dim = XCHARSET_DIMENSION (ccs);
1384 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
1385 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
1389 { /* obsolete representation: value must be a list of bytes */
1390 Lisp_Object ret = Fcar (value);
1394 signal_simple_error ("Invalid value for coded-charset", value);
1395 code_point = XINT (ret);
1396 if (XCHARSET_GRAPHIC (ccs) == 1)
1398 rest = Fcdr (value);
1399 while (!NILP (rest))
1404 signal_simple_error ("Invalid value for coded-charset",
1408 signal_simple_error ("Invalid value for coded-charset",
1411 if (XCHARSET_GRAPHIC (ccs) == 1)
1413 code_point = (code_point << 8) | j;
1416 value = make_int (code_point);
1418 else if (INTP (value))
1420 code_point = XINT (value);
1421 if (XCHARSET_GRAPHIC (ccs) == 1)
1423 code_point &= 0x7F7F7F7F;
1424 value = make_int (code_point);
1428 signal_simple_error ("Invalid value for coded-charset", value);
1432 Lisp_Object cpos = Fget_char_attribute (character, ccs);
1435 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
1440 XCHARSET_DECODING_TABLE (ccs)
1441 = v = make_older_vector (ccs_len, Qnil);
1444 decoding_table_put_char (v, dim, byte_offset, code_point, character);
1446 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
1448 XCHARSET_ENCODING_TABLE (ccs)
1449 = encoding_table = make_char_id_table (Qnil);
1451 put_char_id_table (XCHAR (character), value, encoding_table);
1456 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
1458 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1459 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1461 if (VECTORP (decoding_table))
1463 Lisp_Object cpos = Fget_char_attribute (character, ccs);
1467 decoding_table_remove_char (decoding_table,
1468 XCHARSET_DIMENSION (ccs),
1469 XCHARSET_BYTE_OFFSET (ccs),
1473 if (CHAR_ID_TABLE_P (encoding_table))
1475 put_char_id_table (XCHAR (character), Qnil, encoding_table);
1480 EXFUN (Fmake_char, 3);
1481 EXFUN (Fdecode_char, 2);
1483 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
1484 Store character's ATTRIBUTES.
1488 Lisp_Object rest = attributes;
1489 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
1490 Lisp_Object character;
1492 Lisp_Object daikanwa = Qnil;
1497 while (CONSP (rest))
1499 Lisp_Object cell = Fcar (rest);
1503 signal_simple_error ("Invalid argument", attributes);
1504 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
1505 && ((XCHARSET_FINAL (ccs) != 0) ||
1506 (XCHARSET_UCS_MAX (ccs) > 0)) )
1510 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1512 character = Fdecode_char (ccs, cell);
1513 goto setup_attributes;
1517 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
1520 signal_simple_error ("Invalid argument", attributes);
1522 character = make_char (XINT (code) + 0x100000);
1523 goto setup_attributes;
1527 else if (!INTP (code))
1528 signal_simple_error ("Invalid argument", attributes);
1530 character = make_char (XINT (code));
1534 while (CONSP (rest))
1536 Lisp_Object cell = Fcar (rest);
1538 Lisp_Object key = Fcar (cell);
1539 Lisp_Object value = Fcdr (cell);
1543 signal_simple_error ("Invalid argument", attributes);
1546 if (EQ (key, Qmorohashi_daikanwa))
1549 GET_EXTERNAL_LIST_LENGTH (value, len);
1553 if (NILP (daikanwa))
1554 daikanwa = Fcdr (Fassq (Qideograph_daikanwa, rest));
1555 if (EQ (Fcar (value), daikanwa))
1559 else if (EQ (key, Qideograph_daikanwa))
1563 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
1572 Lisp_Object Vutf_2000_version;
1576 int leading_code_private_11;
1579 Lisp_Object Qcharsetp;
1581 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1582 Lisp_Object Qregistry, Qfinal, Qgraphic;
1583 Lisp_Object Qdirection;
1584 Lisp_Object Qreverse_direction_charset;
1585 Lisp_Object Qleading_byte;
1586 Lisp_Object Qshort_name, Qlong_name;
1600 Qcyrillic_iso8859_5,
1602 Qjapanese_jisx0208_1978,
1605 Qjapanese_jisx0208_1990,
1608 Qchinese_cns11643_1,
1609 Qchinese_cns11643_2,
1614 Qlatin_viscii_lower,
1615 Qlatin_viscii_upper,
1616 Qvietnamese_viscii_lower,
1617 Qvietnamese_viscii_upper,
1648 Lisp_Object Ql2r, Qr2l;
1650 Lisp_Object Vcharset_hash_table;
1652 /* Composite characters are characters constructed by overstriking two
1653 or more regular characters.
1655 1) The old Mule implementation involves storing composite characters
1656 in a buffer as a tag followed by all of the actual characters
1657 used to make up the composite character. I think this is a bad
1658 idea; it greatly complicates code that wants to handle strings
1659 one character at a time because it has to deal with the possibility
1660 of great big ungainly characters. It's much more reasonable to
1661 simply store an index into a table of composite characters.
1663 2) The current implementation only allows for 16,384 separate
1664 composite characters over the lifetime of the XEmacs process.
1665 This could become a potential problem if the user
1666 edited lots of different files that use composite characters.
1667 Due to FSF bogosity, increasing the number of allowable
1668 composite characters under Mule would decrease the number
1669 of possible faces that can exist. Mule already has shrunk
1670 this to 2048, and further shrinkage would become uncomfortable.
1671 No such problems exist in XEmacs.
1673 Composite characters could be represented as 0x80 C1 C2 C3,
1674 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1675 for slightly under 2^20 (one million) composite characters
1676 over the XEmacs process lifetime, and you only need to
1677 increase the size of a Mule character from 19 to 21 bits.
1678 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1679 85 million (slightly over 2^26) composite characters. */
1682 /************************************************************************/
1683 /* Basic Emchar functions */
1684 /************************************************************************/
1686 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1687 string in STR. Returns the number of bytes stored.
1688 Do not call this directly. Use the macro set_charptr_emchar() instead.
1692 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1698 Lisp_Object charset;
1707 else if ( c <= 0x7ff )
1709 *p++ = (c >> 6) | 0xc0;
1710 *p++ = (c & 0x3f) | 0x80;
1712 else if ( c <= 0xffff )
1714 *p++ = (c >> 12) | 0xe0;
1715 *p++ = ((c >> 6) & 0x3f) | 0x80;
1716 *p++ = (c & 0x3f) | 0x80;
1718 else if ( c <= 0x1fffff )
1720 *p++ = (c >> 18) | 0xf0;
1721 *p++ = ((c >> 12) & 0x3f) | 0x80;
1722 *p++ = ((c >> 6) & 0x3f) | 0x80;
1723 *p++ = (c & 0x3f) | 0x80;
1725 else if ( c <= 0x3ffffff )
1727 *p++ = (c >> 24) | 0xf8;
1728 *p++ = ((c >> 18) & 0x3f) | 0x80;
1729 *p++ = ((c >> 12) & 0x3f) | 0x80;
1730 *p++ = ((c >> 6) & 0x3f) | 0x80;
1731 *p++ = (c & 0x3f) | 0x80;
1735 *p++ = (c >> 30) | 0xfc;
1736 *p++ = ((c >> 24) & 0x3f) | 0x80;
1737 *p++ = ((c >> 18) & 0x3f) | 0x80;
1738 *p++ = ((c >> 12) & 0x3f) | 0x80;
1739 *p++ = ((c >> 6) & 0x3f) | 0x80;
1740 *p++ = (c & 0x3f) | 0x80;
1743 BREAKUP_CHAR (c, charset, c1, c2);
1744 lb = CHAR_LEADING_BYTE (c);
1745 if (LEADING_BYTE_PRIVATE_P (lb))
1746 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1748 if (EQ (charset, Vcharset_control_1))
1757 /* Return the first character from a Mule-encoded string in STR,
1758 assuming it's non-ASCII. Do not call this directly.
1759 Use the macro charptr_emchar() instead. */
1762 non_ascii_charptr_emchar (const Bufbyte *str)
1775 else if ( b >= 0xf8 )
1780 else if ( b >= 0xf0 )
1785 else if ( b >= 0xe0 )
1790 else if ( b >= 0xc0 )
1800 for( ; len > 0; len-- )
1803 ch = ( ch << 6 ) | ( b & 0x3f );
1807 Bufbyte i0 = *str, i1, i2 = 0;
1808 Lisp_Object charset;
1810 if (i0 == LEADING_BYTE_CONTROL_1)
1811 return (Emchar) (*++str - 0x20);
1813 if (LEADING_BYTE_PREFIX_P (i0))
1818 charset = CHARSET_BY_LEADING_BYTE (i0);
1819 if (XCHARSET_DIMENSION (charset) == 2)
1822 return MAKE_CHAR (charset, i1, i2);
1826 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1827 Do not call this directly. Use the macro valid_char_p() instead. */
1831 non_ascii_valid_char_p (Emchar ch)
1835 /* Must have only lowest 19 bits set */
1839 f1 = CHAR_FIELD1 (ch);
1840 f2 = CHAR_FIELD2 (ch);
1841 f3 = CHAR_FIELD3 (ch);
1845 Lisp_Object charset;
1847 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1848 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1849 f2 > MAX_CHAR_FIELD2_PRIVATE)
1854 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1855 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1859 NOTE: This takes advantage of the fact that
1860 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1861 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1863 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1864 if (EQ (charset, Qnil))
1866 return (XCHARSET_CHARS (charset) == 96);
1870 Lisp_Object charset;
1872 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1873 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1874 f1 > MAX_CHAR_FIELD1_PRIVATE)
1876 if (f2 < 0x20 || f3 < 0x20)
1879 #ifdef ENABLE_COMPOSITE_CHARS
1880 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1882 if (UNBOUNDP (Fgethash (make_int (ch),
1883 Vcomposite_char_char2string_hash_table,
1888 #endif /* ENABLE_COMPOSITE_CHARS */
1890 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1891 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1894 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1896 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1899 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1901 if (EQ (charset, Qnil))
1903 return (XCHARSET_CHARS (charset) == 96);
1909 /************************************************************************/
1910 /* Basic string functions */
1911 /************************************************************************/
1913 /* Copy the character pointed to by PTR into STR, assuming it's
1914 non-ASCII. Do not call this directly. Use the macro
1915 charptr_copy_char() instead. */
1918 non_ascii_charptr_copy_char (const Bufbyte *ptr, Bufbyte *str)
1920 Bufbyte *strptr = str;
1922 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1924 /* Notice fallthrough. */
1926 case 6: *++strptr = *ptr++;
1927 case 5: *++strptr = *ptr++;
1929 case 4: *++strptr = *ptr++;
1930 case 3: *++strptr = *ptr++;
1931 case 2: *++strptr = *ptr;
1936 return strptr + 1 - str;
1940 /************************************************************************/
1941 /* streams of Emchars */
1942 /************************************************************************/
1944 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1945 The functions below are not meant to be called directly; use
1946 the macros in insdel.h. */
1949 Lstream_get_emchar_1 (Lstream *stream, int ch)
1951 Bufbyte str[MAX_EMCHAR_LEN];
1952 Bufbyte *strptr = str;
1954 str[0] = (Bufbyte) ch;
1955 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1957 /* Notice fallthrough. */
1960 ch = Lstream_getc (stream);
1962 *++strptr = (Bufbyte) ch;
1964 ch = Lstream_getc (stream);
1966 *++strptr = (Bufbyte) ch;
1969 ch = Lstream_getc (stream);
1971 *++strptr = (Bufbyte) ch;
1973 ch = Lstream_getc (stream);
1975 *++strptr = (Bufbyte) ch;
1977 ch = Lstream_getc (stream);
1979 *++strptr = (Bufbyte) ch;
1984 return charptr_emchar (str);
1988 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1990 Bufbyte str[MAX_EMCHAR_LEN];
1991 Bytecount len = set_charptr_emchar (str, ch);
1992 return Lstream_write (stream, str, len);
1996 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1998 Bufbyte str[MAX_EMCHAR_LEN];
1999 Bytecount len = set_charptr_emchar (str, ch);
2000 Lstream_unread (stream, str, len);
2004 /************************************************************************/
2005 /* charset object */
2006 /************************************************************************/
2009 mark_charset (Lisp_Object obj)
2011 Lisp_Charset *cs = XCHARSET (obj);
2013 mark_object (cs->short_name);
2014 mark_object (cs->long_name);
2015 mark_object (cs->doc_string);
2016 mark_object (cs->registry);
2017 mark_object (cs->ccl_program);
2019 mark_object (cs->encoding_table);
2020 /* mark_object (cs->decoding_table); */
2026 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
2028 Lisp_Charset *cs = XCHARSET (obj);
2032 error ("printing unreadable object #<charset %s 0x%x>",
2033 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
2036 write_c_string ("#<charset ", printcharfun);
2037 print_internal (CHARSET_NAME (cs), printcharfun, 0);
2038 write_c_string (" ", printcharfun);
2039 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
2040 write_c_string (" ", printcharfun);
2041 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
2042 write_c_string (" ", printcharfun);
2043 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
2044 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
2046 CHARSET_DIMENSION (cs),
2047 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
2048 CHARSET_COLUMNS (cs),
2049 CHARSET_GRAPHIC (cs),
2050 CHARSET_FINAL (cs));
2051 write_c_string (buf, printcharfun);
2052 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
2053 sprintf (buf, " 0x%x>", cs->header.uid);
2054 write_c_string (buf, printcharfun);
2057 static const struct lrecord_description charset_description[] = {
2058 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
2059 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
2060 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
2061 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
2062 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
2063 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
2064 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
2066 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
2067 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
2072 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
2073 mark_charset, print_charset, 0, 0, 0,
2074 charset_description,
2076 /* Make a new charset. */
2079 make_charset (Charset_ID id, Lisp_Object name,
2080 unsigned short chars, unsigned char dimension,
2081 unsigned char columns, unsigned char graphic,
2082 Bufbyte final, unsigned char direction, Lisp_Object short_name,
2083 Lisp_Object long_name, Lisp_Object doc,
2085 Lisp_Object decoding_table,
2086 Emchar ucs_min, Emchar ucs_max,
2087 Emchar code_offset, unsigned char byte_offset)
2090 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
2094 XSETCHARSET (obj, cs);
2096 CHARSET_ID (cs) = id;
2097 CHARSET_NAME (cs) = name;
2098 CHARSET_SHORT_NAME (cs) = short_name;
2099 CHARSET_LONG_NAME (cs) = long_name;
2100 CHARSET_CHARS (cs) = chars;
2101 CHARSET_DIMENSION (cs) = dimension;
2102 CHARSET_DIRECTION (cs) = direction;
2103 CHARSET_COLUMNS (cs) = columns;
2104 CHARSET_GRAPHIC (cs) = graphic;
2105 CHARSET_FINAL (cs) = final;
2106 CHARSET_DOC_STRING (cs) = doc;
2107 CHARSET_REGISTRY (cs) = reg;
2108 CHARSET_CCL_PROGRAM (cs) = Qnil;
2109 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
2111 CHARSET_DECODING_TABLE(cs) = Qnil;
2112 CHARSET_ENCODING_TABLE(cs) = Qnil;
2113 CHARSET_UCS_MIN(cs) = ucs_min;
2114 CHARSET_UCS_MAX(cs) = ucs_max;
2115 CHARSET_CODE_OFFSET(cs) = code_offset;
2116 CHARSET_BYTE_OFFSET(cs) = byte_offset;
2120 if (id == LEADING_BYTE_ASCII)
2121 CHARSET_REP_BYTES (cs) = 1;
2123 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
2125 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
2130 /* some charsets do not have final characters. This includes
2131 ASCII, Control-1, Composite, and the two faux private
2133 unsigned char iso2022_type
2134 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
2136 if (code_offset == 0)
2138 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
2139 chlook->charset_by_attributes[iso2022_type][final] = obj;
2143 (chlook->charset_by_attributes[iso2022_type][final][direction]));
2144 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
2148 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
2149 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
2151 /* Some charsets are "faux" and don't have names or really exist at
2152 all except in the leading-byte table. */
2154 Fputhash (name, obj, Vcharset_hash_table);
2159 get_unallocated_leading_byte (int dimension)
2164 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
2167 lb = chlook->next_allocated_leading_byte++;
2171 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
2174 lb = chlook->next_allocated_1_byte_leading_byte++;
2178 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
2181 lb = chlook->next_allocated_2_byte_leading_byte++;
2187 ("No more character sets free for this dimension",
2188 make_int (dimension));
2194 /* Number of Big5 characters which have the same code in 1st byte. */
2196 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2199 make_builtin_char (Lisp_Object charset, int c1, int c2)
2201 if (XCHARSET_UCS_MAX (charset))
2204 = (XCHARSET_DIMENSION (charset) == 1
2206 c1 - XCHARSET_BYTE_OFFSET (charset)
2208 (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
2209 + c2 - XCHARSET_BYTE_OFFSET (charset))
2210 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
2211 if ((code < XCHARSET_UCS_MIN (charset))
2212 || (XCHARSET_UCS_MAX (charset) < code))
2213 signal_simple_error ("Arguments makes invalid character",
2217 else if (XCHARSET_DIMENSION (charset) == 1)
2219 switch (XCHARSET_CHARS (charset))
2223 + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
2226 + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
2233 if (EQ (charset, Vcharset_chinese_big5))
2235 int B1 = c1, B2 = c2;
2237 = (B1 - 0xA1) * BIG5_SAME_ROW
2238 + B2 - (B2 < 0x7F ? 0x40 : 0x62);
2242 charset = Vcharset_chinese_big5_1;
2246 charset = Vcharset_chinese_big5_2;
2247 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
2252 switch (XCHARSET_CHARS (charset))
2255 return MIN_CHAR_94x94
2256 + (XCHARSET_FINAL (charset) - '0') * 94 * 94
2257 + (c1 - 33) * 94 + (c2 - 33);
2259 return MIN_CHAR_96x96
2260 + (XCHARSET_FINAL (charset) - '0') * 96 * 96
2261 + (c1 - 32) * 96 + (c2 - 32);
2269 range_charset_code_point (Lisp_Object charset, Emchar ch)
2273 if ((XCHARSET_UCS_MIN (charset) <= ch)
2274 && (ch <= XCHARSET_UCS_MAX (charset)))
2276 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
2278 if (XCHARSET_CHARS (charset) == 256)
2280 else if (XCHARSET_DIMENSION (charset) == 1)
2281 return d + XCHARSET_BYTE_OFFSET (charset);
2282 else if (XCHARSET_DIMENSION (charset) == 2)
2284 ((d / XCHARSET_CHARS (charset)
2285 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2286 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2287 else if (XCHARSET_DIMENSION (charset) == 3)
2289 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2290 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2291 | ((d / XCHARSET_CHARS (charset)
2292 % XCHARSET_CHARS (charset)
2293 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2294 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2295 else /* if (XCHARSET_DIMENSION (charset) == 4) */
2297 ((d / (XCHARSET_CHARS (charset)
2298 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2299 + XCHARSET_BYTE_OFFSET (charset)) << 24)
2300 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2301 % XCHARSET_CHARS (charset)
2302 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2303 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
2304 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2305 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2307 else if (XCHARSET_CODE_OFFSET (charset) == 0)
2309 if (XCHARSET_DIMENSION (charset) == 1)
2311 if (XCHARSET_CHARS (charset) == 94)
2313 if (((d = ch - (MIN_CHAR_94
2314 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
2318 else if (XCHARSET_CHARS (charset) == 96)
2320 if (((d = ch - (MIN_CHAR_96
2321 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
2328 else if (XCHARSET_DIMENSION (charset) == 2)
2330 if (XCHARSET_CHARS (charset) == 94)
2332 if (((d = ch - (MIN_CHAR_94x94
2333 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
2336 return (((d / 94) + 33) << 8) | (d % 94 + 33);
2338 else if (XCHARSET_CHARS (charset) == 96)
2340 if (((d = ch - (MIN_CHAR_96x96
2341 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
2344 return (((d / 96) + 32) << 8) | (d % 96 + 32);
2350 if (EQ (charset, Vcharset_mojikyo_2022_1)
2351 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
2353 int m = ch - MIN_CHAR_MOJIKYO - 1;
2354 int byte1 = m / (94 * 60) + 33;
2355 int byte2 = (m % (94 * 60)) / 94;
2356 int byte3 = m % 94 + 33;
2362 return (byte1 << 16) | (byte2 << 8) | byte3;
2368 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
2370 if (c <= MAX_CHAR_BASIC_LATIN)
2372 *charset = Vcharset_ascii;
2377 *charset = Vcharset_control_1;
2382 *charset = Vcharset_latin_iso8859_1;
2386 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
2388 *charset = Vcharset_greek_iso8859_7;
2389 return c - MIN_CHAR_GREEK + 0x20;
2391 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
2393 *charset = Vcharset_cyrillic_iso8859_5;
2394 return c - MIN_CHAR_CYRILLIC + 0x20;
2397 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
2399 *charset = Vcharset_hebrew_iso8859_8;
2400 return c - MIN_CHAR_HEBREW + 0x20;
2402 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
2404 *charset = Vcharset_thai_tis620;
2405 return c - MIN_CHAR_THAI + 0x20;
2408 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
2409 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
2411 return list2 (Vcharset_katakana_jisx0201,
2412 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
2415 else if (c <= MAX_CHAR_BMP)
2417 *charset = Vcharset_ucs_bmp;
2420 else if (c < MIN_CHAR_DAIKANWA)
2422 *charset = Vcharset_ucs;
2426 else if (c <= MAX_CHAR_DAIKANWA)
2428 *charset = Vcharset_ideograph_daikanwa;
2429 return c - MIN_CHAR_DAIKANWA;
2432 else if (c <= MAX_CHAR_MOJIKYO)
2434 *charset = Vcharset_mojikyo;
2435 return c - MIN_CHAR_MOJIKYO;
2437 else if (c < MIN_CHAR_94)
2439 *charset = Vcharset_ucs;
2442 else if (c <= MAX_CHAR_94)
2444 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
2445 ((c - MIN_CHAR_94) / 94) + '0',
2446 CHARSET_LEFT_TO_RIGHT);
2447 if (!NILP (*charset))
2448 return ((c - MIN_CHAR_94) % 94) + 33;
2451 *charset = Vcharset_ucs;
2455 else if (c <= MAX_CHAR_96)
2457 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
2458 ((c - MIN_CHAR_96) / 96) + '0',
2459 CHARSET_LEFT_TO_RIGHT);
2460 if (!NILP (*charset))
2461 return ((c - MIN_CHAR_96) % 96) + 32;
2464 *charset = Vcharset_ucs;
2468 else if (c <= MAX_CHAR_94x94)
2471 = CHARSET_BY_ATTRIBUTES (94, 2,
2472 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
2473 CHARSET_LEFT_TO_RIGHT);
2474 if (!NILP (*charset))
2475 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
2476 | (((c - MIN_CHAR_94x94) % 94) + 33);
2479 *charset = Vcharset_ucs;
2483 else if (c <= MAX_CHAR_96x96)
2486 = CHARSET_BY_ATTRIBUTES (96, 2,
2487 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
2488 CHARSET_LEFT_TO_RIGHT);
2489 if (!NILP (*charset))
2490 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
2491 | (((c - MIN_CHAR_96x96) % 96) + 32);
2494 *charset = Vcharset_ucs;
2500 *charset = Vcharset_ucs;
2505 Lisp_Object Vdefault_coded_charset_priority_list;
2509 /************************************************************************/
2510 /* Basic charset Lisp functions */
2511 /************************************************************************/
2513 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
2514 Return non-nil if OBJECT is a charset.
2518 return CHARSETP (object) ? Qt : Qnil;
2521 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
2522 Retrieve the charset of the given name.
2523 If CHARSET-OR-NAME is a charset object, it is simply returned.
2524 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
2525 nil is returned. Otherwise the associated charset object is returned.
2529 if (CHARSETP (charset_or_name))
2530 return charset_or_name;
2532 CHECK_SYMBOL (charset_or_name);
2533 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
2536 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
2537 Retrieve the charset of the given name.
2538 Same as `find-charset' except an error is signalled if there is no such
2539 charset instead of returning nil.
2543 Lisp_Object charset = Ffind_charset (name);
2546 signal_simple_error ("No such charset", name);
2550 /* We store the charsets in hash tables with the names as the key and the
2551 actual charset object as the value. Occasionally we need to use them
2552 in a list format. These routines provide us with that. */
2553 struct charset_list_closure
2555 Lisp_Object *charset_list;
2559 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
2560 void *charset_list_closure)
2562 /* This function can GC */
2563 struct charset_list_closure *chcl =
2564 (struct charset_list_closure*) charset_list_closure;
2565 Lisp_Object *charset_list = chcl->charset_list;
2567 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
2571 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
2572 Return a list of the names of all defined charsets.
2576 Lisp_Object charset_list = Qnil;
2577 struct gcpro gcpro1;
2578 struct charset_list_closure charset_list_closure;
2580 GCPRO1 (charset_list);
2581 charset_list_closure.charset_list = &charset_list;
2582 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
2583 &charset_list_closure);
2586 return charset_list;
2589 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
2590 Return the name of the given charset.
2594 return XCHARSET_NAME (Fget_charset (charset));
2597 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
2598 Define a new character set.
2599 This function is for use with Mule support.
2600 NAME is a symbol, the name by which the character set is normally referred.
2601 DOC-STRING is a string describing the character set.
2602 PROPS is a property list, describing the specific nature of the
2603 character set. Recognized properties are:
2605 'short-name Short version of the charset name (ex: Latin-1)
2606 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
2607 'registry A regular expression matching the font registry field for
2609 'dimension Number of octets used to index a character in this charset.
2610 Either 1 or 2. Defaults to 1.
2611 'columns Number of columns used to display a character in this charset.
2612 Only used in TTY mode. (Under X, the actual width of a
2613 character can be derived from the font used to display the
2614 characters.) If unspecified, defaults to the dimension
2615 (this is almost always the correct value).
2616 'chars Number of characters in each dimension (94 or 96).
2617 Defaults to 94. Note that if the dimension is 2, the
2618 character set thus described is 94x94 or 96x96.
2619 'final Final byte of ISO 2022 escape sequence. Must be
2620 supplied. Each combination of (DIMENSION, CHARS) defines a
2621 separate namespace for final bytes. Note that ISO
2622 2022 restricts the final byte to the range
2623 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2624 dimension == 2. Note also that final bytes in the range
2625 0x30 - 0x3F are reserved for user-defined (not official)
2627 'graphic 0 (use left half of font on output) or 1 (use right half
2628 of font on output). Defaults to 0. For example, for
2629 a font whose registry is ISO8859-1, the left half
2630 (octets 0x20 - 0x7F) is the `ascii' character set, while
2631 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2632 character set. With 'graphic set to 0, the octets
2633 will have their high bit cleared; with it set to 1,
2634 the octets will have their high bit set.
2635 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2637 'ccl-program A compiled CCL program used to convert a character in
2638 this charset into an index into the font. This is in
2639 addition to the 'graphic property. The CCL program
2640 is passed the octets of the character, with the high
2641 bit cleared and set depending upon whether the value
2642 of the 'graphic property is 0 or 1.
2644 (name, doc_string, props))
2646 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2647 int direction = CHARSET_LEFT_TO_RIGHT;
2648 Lisp_Object registry = Qnil;
2649 Lisp_Object charset;
2650 Lisp_Object rest, keyword, value;
2651 Lisp_Object ccl_program = Qnil;
2652 Lisp_Object short_name = Qnil, long_name = Qnil;
2653 int byte_offset = -1;
2655 CHECK_SYMBOL (name);
2656 if (!NILP (doc_string))
2657 CHECK_STRING (doc_string);
2659 charset = Ffind_charset (name);
2660 if (!NILP (charset))
2661 signal_simple_error ("Cannot redefine existing charset", name);
2663 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
2665 if (EQ (keyword, Qshort_name))
2667 CHECK_STRING (value);
2671 if (EQ (keyword, Qlong_name))
2673 CHECK_STRING (value);
2677 else if (EQ (keyword, Qdimension))
2680 dimension = XINT (value);
2681 if (dimension < 1 || dimension > 2)
2682 signal_simple_error ("Invalid value for 'dimension", value);
2685 else if (EQ (keyword, Qchars))
2688 chars = XINT (value);
2689 if (chars != 94 && chars != 96)
2690 signal_simple_error ("Invalid value for 'chars", value);
2693 else if (EQ (keyword, Qcolumns))
2696 columns = XINT (value);
2697 if (columns != 1 && columns != 2)
2698 signal_simple_error ("Invalid value for 'columns", value);
2701 else if (EQ (keyword, Qgraphic))
2704 graphic = XINT (value);
2706 if (graphic < 0 || graphic > 2)
2708 if (graphic < 0 || graphic > 1)
2710 signal_simple_error ("Invalid value for 'graphic", value);
2713 else if (EQ (keyword, Qregistry))
2715 CHECK_STRING (value);
2719 else if (EQ (keyword, Qdirection))
2721 if (EQ (value, Ql2r))
2722 direction = CHARSET_LEFT_TO_RIGHT;
2723 else if (EQ (value, Qr2l))
2724 direction = CHARSET_RIGHT_TO_LEFT;
2726 signal_simple_error ("Invalid value for 'direction", value);
2729 else if (EQ (keyword, Qfinal))
2731 CHECK_CHAR_COERCE_INT (value);
2732 final = XCHAR (value);
2733 if (final < '0' || final > '~')
2734 signal_simple_error ("Invalid value for 'final", value);
2737 else if (EQ (keyword, Qccl_program))
2739 CHECK_VECTOR (value);
2740 ccl_program = value;
2744 signal_simple_error ("Unrecognized property", keyword);
2748 error ("'final must be specified");
2749 if (dimension == 2 && final > 0x5F)
2751 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2754 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2755 CHARSET_LEFT_TO_RIGHT)) ||
2756 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2757 CHARSET_RIGHT_TO_LEFT)))
2759 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2761 id = get_unallocated_leading_byte (dimension);
2763 if (NILP (doc_string))
2764 doc_string = build_string ("");
2766 if (NILP (registry))
2767 registry = build_string ("");
2769 if (NILP (short_name))
2770 XSETSTRING (short_name, XSYMBOL (name)->name);
2772 if (NILP (long_name))
2773 long_name = doc_string;
2776 columns = dimension;
2778 if (byte_offset < 0)
2782 else if (chars == 96)
2788 charset = make_charset (id, name, chars, dimension, columns, graphic,
2789 final, direction, short_name, long_name,
2790 doc_string, registry,
2791 Qnil, 0, 0, 0, byte_offset);
2792 if (!NILP (ccl_program))
2793 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2797 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2799 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2800 NEW-NAME is the name of the new charset. Return the new charset.
2802 (charset, new_name))
2804 Lisp_Object new_charset = Qnil;
2805 int id, chars, dimension, columns, graphic, final;
2807 Lisp_Object registry, doc_string, short_name, long_name;
2810 charset = Fget_charset (charset);
2811 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2812 signal_simple_error ("Charset already has reverse-direction charset",
2815 CHECK_SYMBOL (new_name);
2816 if (!NILP (Ffind_charset (new_name)))
2817 signal_simple_error ("Cannot redefine existing charset", new_name);
2819 cs = XCHARSET (charset);
2821 chars = CHARSET_CHARS (cs);
2822 dimension = CHARSET_DIMENSION (cs);
2823 columns = CHARSET_COLUMNS (cs);
2824 id = get_unallocated_leading_byte (dimension);
2826 graphic = CHARSET_GRAPHIC (cs);
2827 final = CHARSET_FINAL (cs);
2828 direction = CHARSET_RIGHT_TO_LEFT;
2829 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2830 direction = CHARSET_LEFT_TO_RIGHT;
2831 doc_string = CHARSET_DOC_STRING (cs);
2832 short_name = CHARSET_SHORT_NAME (cs);
2833 long_name = CHARSET_LONG_NAME (cs);
2834 registry = CHARSET_REGISTRY (cs);
2836 new_charset = make_charset (id, new_name, chars, dimension, columns,
2837 graphic, final, direction, short_name, long_name,
2838 doc_string, registry,
2840 CHARSET_DECODING_TABLE(cs),
2841 CHARSET_UCS_MIN(cs),
2842 CHARSET_UCS_MAX(cs),
2843 CHARSET_CODE_OFFSET(cs),
2844 CHARSET_BYTE_OFFSET(cs)
2850 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2851 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2856 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2857 Define symbol ALIAS as an alias for CHARSET.
2861 CHECK_SYMBOL (alias);
2862 charset = Fget_charset (charset);
2863 return Fputhash (alias, charset, Vcharset_hash_table);
2866 /* #### Reverse direction charsets not yet implemented. */
2868 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2870 Return the reverse-direction charset parallel to CHARSET, if any.
2871 This is the charset with the same properties (in particular, the same
2872 dimension, number of characters per dimension, and final byte) as
2873 CHARSET but whose characters are displayed in the opposite direction.
2877 charset = Fget_charset (charset);
2878 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2882 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2883 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2884 If DIRECTION is omitted, both directions will be checked (left-to-right
2885 will be returned if character sets exist for both directions).
2887 (dimension, chars, final, direction))
2889 int dm, ch, fi, di = -1;
2890 Lisp_Object obj = Qnil;
2892 CHECK_INT (dimension);
2893 dm = XINT (dimension);
2894 if (dm < 1 || dm > 2)
2895 signal_simple_error ("Invalid value for DIMENSION", dimension);
2899 if (ch != 94 && ch != 96)
2900 signal_simple_error ("Invalid value for CHARS", chars);
2902 CHECK_CHAR_COERCE_INT (final);
2904 if (fi < '0' || fi > '~')
2905 signal_simple_error ("Invalid value for FINAL", final);
2907 if (EQ (direction, Ql2r))
2908 di = CHARSET_LEFT_TO_RIGHT;
2909 else if (EQ (direction, Qr2l))
2910 di = CHARSET_RIGHT_TO_LEFT;
2911 else if (!NILP (direction))
2912 signal_simple_error ("Invalid value for DIRECTION", direction);
2914 if (dm == 2 && fi > 0x5F)
2916 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2920 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
2922 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
2925 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
2928 return XCHARSET_NAME (obj);
2932 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2933 Return short name of CHARSET.
2937 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2940 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2941 Return long name of CHARSET.
2945 return XCHARSET_LONG_NAME (Fget_charset (charset));
2948 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2949 Return description of CHARSET.
2953 return XCHARSET_DOC_STRING (Fget_charset (charset));
2956 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2957 Return dimension of CHARSET.
2961 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2964 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2965 Return property PROP of CHARSET.
2966 Recognized properties are those listed in `make-charset', as well as
2967 'name and 'doc-string.
2973 charset = Fget_charset (charset);
2974 cs = XCHARSET (charset);
2976 CHECK_SYMBOL (prop);
2977 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2978 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2979 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2980 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2981 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2982 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2983 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2984 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2985 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2986 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2987 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2988 if (EQ (prop, Qdirection))
2989 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2990 if (EQ (prop, Qreverse_direction_charset))
2992 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2996 return XCHARSET_NAME (obj);
2998 signal_simple_error ("Unrecognized charset property name", prop);
2999 return Qnil; /* not reached */
3002 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
3003 Return charset identification number of CHARSET.
3007 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
3010 /* #### We need to figure out which properties we really want to
3013 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
3014 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
3016 (charset, ccl_program))
3018 charset = Fget_charset (charset);
3019 CHECK_VECTOR (ccl_program);
3020 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
3025 invalidate_charset_font_caches (Lisp_Object charset)
3027 /* Invalidate font cache entries for charset on all devices. */
3028 Lisp_Object devcons, concons, hash_table;
3029 DEVICE_LOOP_NO_BREAK (devcons, concons)
3031 struct device *d = XDEVICE (XCAR (devcons));
3032 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
3033 if (!UNBOUNDP (hash_table))
3034 Fclrhash (hash_table);
3038 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
3039 Set the 'registry property of CHARSET to REGISTRY.
3041 (charset, registry))
3043 charset = Fget_charset (charset);
3044 CHECK_STRING (registry);
3045 XCHARSET_REGISTRY (charset) = registry;
3046 invalidate_charset_font_caches (charset);
3047 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
3052 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
3053 Return mapping-table of CHARSET.
3057 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
3060 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
3061 Set mapping-table of CHARSET to TABLE.
3065 struct Lisp_Charset *cs;
3069 charset = Fget_charset (charset);
3070 cs = XCHARSET (charset);
3074 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
3075 make_vector_newer (CHARSET_DECODING_TABLE(cs));
3076 CHARSET_DECODING_TABLE(cs) = Qnil;
3079 else if (VECTORP (table))
3081 int ccs_len = CHARSET_BYTE_SIZE (cs);
3082 int ret = decoding_table_check_elements (table,
3083 CHARSET_DIMENSION (cs),
3088 signal_simple_error ("Too big table", table);
3090 signal_simple_error ("Invalid element is found", table);
3092 signal_simple_error ("Something wrong", table);
3094 CHARSET_DECODING_TABLE(cs) = Qnil;
3097 signal_error (Qwrong_type_argument,
3098 list2 (build_translated_string ("vector-or-nil-p"),
3101 byte_offset = CHARSET_BYTE_OFFSET (cs);
3102 switch (CHARSET_DIMENSION (cs))
3105 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3107 Lisp_Object c = XVECTOR_DATA(table)[i];
3110 put_char_ccs_code_point (c, charset,
3111 make_int (i + byte_offset));
3115 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3117 Lisp_Object v = XVECTOR_DATA(table)[i];
3123 for (j = 0; j < XVECTOR_LENGTH (v); j++)
3125 Lisp_Object c = XVECTOR_DATA(v)[j];
3128 put_char_ccs_code_point
3130 make_int ( ( (i + byte_offset) << 8 )
3136 put_char_ccs_code_point (v, charset,
3137 make_int (i + byte_offset));
3146 /************************************************************************/
3147 /* Lisp primitives for working with characters */
3148 /************************************************************************/
3151 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
3152 Make a character from CHARSET and code-point CODE.
3158 charset = Fget_charset (charset);
3161 if (XCHARSET_GRAPHIC (charset) == 1)
3163 return make_char (DECODE_CHAR (charset, c));
3166 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
3167 Make a builtin character from CHARSET and code-point CODE.
3174 charset = Fget_charset (charset);
3178 if ((final = XCHARSET_FINAL (charset)) >= '0')
3180 if (XCHARSET_DIMENSION (charset) == 1)
3182 switch (XCHARSET_CHARS (charset))
3186 make_char (MIN_CHAR_94 + (final - '0') * 94
3187 + ((c & 0x7F) - 33));
3190 make_char (MIN_CHAR_96 + (final - '0') * 96
3191 + ((c & 0x7F) - 32));
3193 return Fdecode_char (charset, code);
3198 switch (XCHARSET_CHARS (charset))
3202 make_char (MIN_CHAR_94x94
3203 + (final - '0') * 94 * 94
3204 + (((c >> 8) & 0x7F) - 33) * 94
3205 + ((c & 0x7F) - 33));
3208 make_char (MIN_CHAR_96x96
3209 + (final - '0') * 96 * 96
3210 + (((c >> 8) & 0x7F) - 32) * 96
3211 + ((c & 0x7F) - 32));
3213 return Fdecode_char (charset, code);
3217 else if (XCHARSET_UCS_MAX (charset))
3220 = (XCHARSET_DIMENSION (charset) == 1
3222 c - XCHARSET_BYTE_OFFSET (charset)
3224 ((c >> 8) - XCHARSET_BYTE_OFFSET (charset))
3225 * XCHARSET_CHARS (charset)
3226 + (c & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
3227 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
3228 if ((cid < XCHARSET_UCS_MIN (charset))
3229 || (XCHARSET_UCS_MAX (charset) < cid))
3230 return Fdecode_char (charset, code);
3231 return make_char (cid);
3234 return Fdecode_char (charset, code);
3238 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
3239 Make a character from CHARSET and octets ARG1 and ARG2.
3240 ARG2 is required only for characters from two-dimensional charsets.
3241 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
3242 character s with caron.
3244 (charset, arg1, arg2))
3248 int lowlim, highlim;
3250 charset = Fget_charset (charset);
3251 cs = XCHARSET (charset);
3253 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
3254 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
3256 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
3258 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
3259 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
3262 /* It is useful (and safe, according to Olivier Galibert) to strip
3263 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
3264 write (make-char 'latin-iso8859-2 CODE) where code is the actual
3265 Latin 2 code of the character. */
3273 if (a1 < lowlim || a1 > highlim)
3274 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
3276 if (CHARSET_DIMENSION (cs) == 1)
3280 ("Charset is of dimension one; second octet must be nil", arg2);
3281 return make_char (MAKE_CHAR (charset, a1, 0));
3290 a2 = XINT (arg2) & 0x7f;
3292 if (a2 < lowlim || a2 > highlim)
3293 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
3295 return make_char (MAKE_CHAR (charset, a1, a2));
3298 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
3299 Return the character set of char CH.
3303 CHECK_CHAR_COERCE_INT (ch);
3305 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
3308 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
3309 Return the octet numbered N (should be 0 or 1) of char CH.
3310 N defaults to 0 if omitted.
3314 Lisp_Object charset;
3317 CHECK_CHAR_COERCE_INT (ch);
3319 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
3321 if (NILP (n) || EQ (n, Qzero))
3322 return make_int (octet0);
3323 else if (EQ (n, make_int (1)))
3324 return make_int (octet1);
3326 signal_simple_error ("Octet number must be 0 or 1", n);
3329 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
3330 Return list of charset and one or two position-codes of CHAR.
3334 /* This function can GC */
3335 struct gcpro gcpro1, gcpro2;
3336 Lisp_Object charset = Qnil;
3337 Lisp_Object rc = Qnil;
3345 GCPRO2 (charset, rc);
3346 CHECK_CHAR_COERCE_INT (character);
3349 code_point = ENCODE_CHAR (XCHAR (character), charset);
3350 dimension = XCHARSET_DIMENSION (charset);
3351 while (dimension > 0)
3353 rc = Fcons (make_int (code_point & 255), rc);
3357 rc = Fcons (XCHARSET_NAME (charset), rc);
3359 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
3361 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
3363 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
3367 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
3376 #ifdef ENABLE_COMPOSITE_CHARS
3377 /************************************************************************/
3378 /* composite character functions */
3379 /************************************************************************/
3382 lookup_composite_char (Bufbyte *str, int len)
3384 Lisp_Object lispstr = make_string (str, len);
3385 Lisp_Object ch = Fgethash (lispstr,
3386 Vcomposite_char_string2char_hash_table,
3392 if (composite_char_row_next >= 128)
3393 signal_simple_error ("No more composite chars available", lispstr);
3394 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
3395 composite_char_col_next);
3396 Fputhash (make_char (emch), lispstr,
3397 Vcomposite_char_char2string_hash_table);
3398 Fputhash (lispstr, make_char (emch),
3399 Vcomposite_char_string2char_hash_table);
3400 composite_char_col_next++;
3401 if (composite_char_col_next >= 128)
3403 composite_char_col_next = 32;
3404 composite_char_row_next++;
3413 composite_char_string (Emchar ch)
3415 Lisp_Object str = Fgethash (make_char (ch),
3416 Vcomposite_char_char2string_hash_table,
3418 assert (!UNBOUNDP (str));
3422 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
3423 Convert a string into a single composite character.
3424 The character is the result of overstriking all the characters in
3429 CHECK_STRING (string);
3430 return make_char (lookup_composite_char (XSTRING_DATA (string),
3431 XSTRING_LENGTH (string)));
3434 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3435 Return a string of the characters comprising a composite character.
3443 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
3444 signal_simple_error ("Must be composite char", ch);
3445 return composite_char_string (emch);
3447 #endif /* ENABLE_COMPOSITE_CHARS */
3450 /************************************************************************/
3451 /* initialization */
3452 /************************************************************************/
3455 syms_of_mule_charset (void)
3458 INIT_LRECORD_IMPLEMENTATION (uint8_byte_table);
3459 INIT_LRECORD_IMPLEMENTATION (uint16_byte_table);
3460 INIT_LRECORD_IMPLEMENTATION (byte_table);
3461 INIT_LRECORD_IMPLEMENTATION (char_id_table);
3463 INIT_LRECORD_IMPLEMENTATION (charset);
3465 DEFSUBR (Fcharsetp);
3466 DEFSUBR (Ffind_charset);
3467 DEFSUBR (Fget_charset);
3468 DEFSUBR (Fcharset_list);
3469 DEFSUBR (Fcharset_name);
3470 DEFSUBR (Fmake_charset);
3471 DEFSUBR (Fmake_reverse_direction_charset);
3472 /* DEFSUBR (Freverse_direction_charset); */
3473 DEFSUBR (Fdefine_charset_alias);
3474 DEFSUBR (Fcharset_from_attributes);
3475 DEFSUBR (Fcharset_short_name);
3476 DEFSUBR (Fcharset_long_name);
3477 DEFSUBR (Fcharset_description);
3478 DEFSUBR (Fcharset_dimension);
3479 DEFSUBR (Fcharset_property);
3480 DEFSUBR (Fcharset_id);
3481 DEFSUBR (Fset_charset_ccl_program);
3482 DEFSUBR (Fset_charset_registry);
3484 DEFSUBR (Fchar_attribute_list);
3485 DEFSUBR (Ffind_char_attribute_table);
3486 DEFSUBR (Fchar_attribute_alist);
3487 DEFSUBR (Fget_char_attribute);
3488 DEFSUBR (Fput_char_attribute);
3489 DEFSUBR (Fremove_char_attribute);
3490 DEFSUBR (Fdefine_char);
3491 DEFSUBR (Fchar_variants);
3492 DEFSUBR (Fget_composite_char);
3493 DEFSUBR (Fcharset_mapping_table);
3494 DEFSUBR (Fset_charset_mapping_table);
3498 DEFSUBR (Fdecode_char);
3499 DEFSUBR (Fdecode_builtin_char);
3501 DEFSUBR (Fmake_char);
3502 DEFSUBR (Fchar_charset);
3503 DEFSUBR (Fchar_octet);
3504 DEFSUBR (Fsplit_char);
3506 #ifdef ENABLE_COMPOSITE_CHARS
3507 DEFSUBR (Fmake_composite_char);
3508 DEFSUBR (Fcomposite_char_string);
3511 defsymbol (&Qcharsetp, "charsetp");
3512 defsymbol (&Qregistry, "registry");
3513 defsymbol (&Qfinal, "final");
3514 defsymbol (&Qgraphic, "graphic");
3515 defsymbol (&Qdirection, "direction");
3516 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3517 defsymbol (&Qshort_name, "short-name");
3518 defsymbol (&Qlong_name, "long-name");
3520 defsymbol (&Ql2r, "l2r");
3521 defsymbol (&Qr2l, "r2l");
3523 /* Charsets, compatible with FSF 20.3
3524 Naming convention is Script-Charset[-Edition] */
3525 defsymbol (&Qascii, "ascii");
3526 defsymbol (&Qcontrol_1, "control-1");
3527 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3528 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3529 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3530 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3531 defsymbol (&Qthai_tis620, "thai-tis620");
3532 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3533 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3534 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3535 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3536 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3537 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3538 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3539 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
3540 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
3541 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
3542 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
3543 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
3544 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
3545 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
3546 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
3548 defsymbol (&Q_ucs, "->ucs");
3549 defsymbol (&Q_decomposition, "->decomposition");
3550 defsymbol (&Qcompat, "compat");
3551 defsymbol (&Qisolated, "isolated");
3552 defsymbol (&Qinitial, "initial");
3553 defsymbol (&Qmedial, "medial");
3554 defsymbol (&Qfinal, "final");
3555 defsymbol (&Qvertical, "vertical");
3556 defsymbol (&QnoBreak, "noBreak");
3557 defsymbol (&Qfraction, "fraction");
3558 defsymbol (&Qsuper, "super");
3559 defsymbol (&Qsub, "sub");
3560 defsymbol (&Qcircle, "circle");
3561 defsymbol (&Qsquare, "square");
3562 defsymbol (&Qwide, "wide");
3563 defsymbol (&Qnarrow, "narrow");
3564 defsymbol (&Qsmall, "small");
3565 defsymbol (&Qfont, "font");
3566 defsymbol (&Qucs, "ucs");
3567 defsymbol (&Qucs_bmp, "ucs-bmp");
3568 defsymbol (&Qlatin_viscii, "latin-viscii");
3569 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3570 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3571 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3572 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3573 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3574 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
3575 defsymbol (&Qchinese_big5, "chinese-big5");
3576 defsymbol (&Qmojikyo, "mojikyo");
3577 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
3578 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
3579 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
3580 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
3581 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
3582 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
3583 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
3584 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
3585 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
3586 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
3587 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
3588 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
3589 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
3590 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
3591 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
3592 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
3593 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
3594 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
3595 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
3596 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
3597 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
3598 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
3599 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3601 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3602 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3604 defsymbol (&Qcomposite, "composite");
3608 vars_of_mule_charset (void)
3615 chlook = xnew (struct charset_lookup);
3616 dumpstruct (&chlook, &charset_lookup_description);
3618 /* Table of charsets indexed by leading byte. */
3619 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3620 chlook->charset_by_leading_byte[i] = Qnil;
3623 /* Table of charsets indexed by type/final-byte. */
3624 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3625 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3626 chlook->charset_by_attributes[i][j] = Qnil;
3628 /* Table of charsets indexed by type/final-byte/direction. */
3629 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3630 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3631 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3632 chlook->charset_by_attributes[i][j][k] = Qnil;
3636 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3638 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3639 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3643 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3644 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3645 Leading-code of private TYPE9N charset of column-width 1.
3647 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3651 Vutf_2000_version = build_string("0.17 (Hōryūji)");
3652 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
3653 Version number of UTF-2000.
3656 staticpro (&Vcharacter_composition_table);
3657 Vcharacter_composition_table = make_char_id_table (Qnil);
3659 staticpro (&Vcharacter_variant_table);
3660 Vcharacter_variant_table = make_char_id_table (Qnil);
3662 Vdefault_coded_charset_priority_list = Qnil;
3663 DEFVAR_LISP ("default-coded-charset-priority-list",
3664 &Vdefault_coded_charset_priority_list /*
3665 Default order of preferred coded-character-sets.
3671 complex_vars_of_mule_charset (void)
3673 staticpro (&Vcharset_hash_table);
3674 Vcharset_hash_table =
3675 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3677 /* Predefined character sets. We store them into variables for
3681 staticpro (&Vchar_attribute_hash_table);
3682 Vchar_attribute_hash_table
3683 = make_lisp_hash_table (16, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3685 staticpro (&Vcharset_ucs);
3687 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
3688 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3689 build_string ("UCS"),
3690 build_string ("UCS"),
3691 build_string ("ISO/IEC 10646"),
3693 Qnil, 0, 0xFFFFFFF, 0, 0);
3694 staticpro (&Vcharset_ucs_bmp);
3696 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3697 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3698 build_string ("BMP"),
3699 build_string ("BMP"),
3700 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3701 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
3702 Qnil, 0, 0xFFFF, 0, 0);
3704 # define MIN_CHAR_THAI 0
3705 # define MAX_CHAR_THAI 0
3706 # define MIN_CHAR_HEBREW 0
3707 # define MAX_CHAR_HEBREW 0
3708 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3709 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3711 staticpro (&Vcharset_ascii);
3713 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3714 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3715 build_string ("ASCII"),
3716 build_string ("ASCII)"),
3717 build_string ("ASCII (ISO646 IRV)"),
3718 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3719 Qnil, 0, 0x7F, 0, 0);
3720 staticpro (&Vcharset_control_1);
3721 Vcharset_control_1 =
3722 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3723 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3724 build_string ("C1"),
3725 build_string ("Control characters"),
3726 build_string ("Control characters 128-191"),
3728 Qnil, 0x80, 0x9F, 0, 0);
3729 staticpro (&Vcharset_latin_iso8859_1);
3730 Vcharset_latin_iso8859_1 =
3731 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3732 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3733 build_string ("Latin-1"),
3734 build_string ("ISO8859-1 (Latin-1)"),
3735 build_string ("ISO8859-1 (Latin-1)"),
3736 build_string ("iso8859-1"),
3737 Qnil, 0xA0, 0xFF, 0, 32);
3738 staticpro (&Vcharset_latin_iso8859_2);
3739 Vcharset_latin_iso8859_2 =
3740 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3741 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3742 build_string ("Latin-2"),
3743 build_string ("ISO8859-2 (Latin-2)"),
3744 build_string ("ISO8859-2 (Latin-2)"),
3745 build_string ("iso8859-2"),
3747 staticpro (&Vcharset_latin_iso8859_3);
3748 Vcharset_latin_iso8859_3 =
3749 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3750 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3751 build_string ("Latin-3"),
3752 build_string ("ISO8859-3 (Latin-3)"),
3753 build_string ("ISO8859-3 (Latin-3)"),
3754 build_string ("iso8859-3"),
3756 staticpro (&Vcharset_latin_iso8859_4);
3757 Vcharset_latin_iso8859_4 =
3758 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3759 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3760 build_string ("Latin-4"),
3761 build_string ("ISO8859-4 (Latin-4)"),
3762 build_string ("ISO8859-4 (Latin-4)"),
3763 build_string ("iso8859-4"),
3765 staticpro (&Vcharset_thai_tis620);
3766 Vcharset_thai_tis620 =
3767 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3768 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3769 build_string ("TIS620"),
3770 build_string ("TIS620 (Thai)"),
3771 build_string ("TIS620.2529 (Thai)"),
3772 build_string ("tis620"),
3773 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3774 staticpro (&Vcharset_greek_iso8859_7);
3775 Vcharset_greek_iso8859_7 =
3776 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3777 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3778 build_string ("ISO8859-7"),
3779 build_string ("ISO8859-7 (Greek)"),
3780 build_string ("ISO8859-7 (Greek)"),
3781 build_string ("iso8859-7"),
3783 0 /* MIN_CHAR_GREEK */,
3784 0 /* MAX_CHAR_GREEK */, 0, 32);
3785 staticpro (&Vcharset_arabic_iso8859_6);
3786 Vcharset_arabic_iso8859_6 =
3787 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3788 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3789 build_string ("ISO8859-6"),
3790 build_string ("ISO8859-6 (Arabic)"),
3791 build_string ("ISO8859-6 (Arabic)"),
3792 build_string ("iso8859-6"),
3794 staticpro (&Vcharset_hebrew_iso8859_8);
3795 Vcharset_hebrew_iso8859_8 =
3796 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3797 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3798 build_string ("ISO8859-8"),
3799 build_string ("ISO8859-8 (Hebrew)"),
3800 build_string ("ISO8859-8 (Hebrew)"),
3801 build_string ("iso8859-8"),
3802 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3803 staticpro (&Vcharset_katakana_jisx0201);
3804 Vcharset_katakana_jisx0201 =
3805 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3806 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3807 build_string ("JISX0201 Kana"),
3808 build_string ("JISX0201.1976 (Japanese Kana)"),
3809 build_string ("JISX0201.1976 Japanese Kana"),
3810 build_string ("jisx0201\\.1976"),
3812 staticpro (&Vcharset_latin_jisx0201);
3813 Vcharset_latin_jisx0201 =
3814 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3815 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3816 build_string ("JISX0201 Roman"),
3817 build_string ("JISX0201.1976 (Japanese Roman)"),
3818 build_string ("JISX0201.1976 Japanese Roman"),
3819 build_string ("jisx0201\\.1976"),
3821 staticpro (&Vcharset_cyrillic_iso8859_5);
3822 Vcharset_cyrillic_iso8859_5 =
3823 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3824 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3825 build_string ("ISO8859-5"),
3826 build_string ("ISO8859-5 (Cyrillic)"),
3827 build_string ("ISO8859-5 (Cyrillic)"),
3828 build_string ("iso8859-5"),
3830 0 /* MIN_CHAR_CYRILLIC */,
3831 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3832 staticpro (&Vcharset_latin_iso8859_9);
3833 Vcharset_latin_iso8859_9 =
3834 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3835 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3836 build_string ("Latin-5"),
3837 build_string ("ISO8859-9 (Latin-5)"),
3838 build_string ("ISO8859-9 (Latin-5)"),
3839 build_string ("iso8859-9"),
3841 staticpro (&Vcharset_japanese_jisx0208_1978);
3842 Vcharset_japanese_jisx0208_1978 =
3843 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3844 Qjapanese_jisx0208_1978, 94, 2,
3845 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3846 build_string ("JIS X0208:1978"),
3847 build_string ("JIS X0208:1978 (Japanese)"),
3849 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3850 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3852 staticpro (&Vcharset_chinese_gb2312);
3853 Vcharset_chinese_gb2312 =
3854 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3855 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3856 build_string ("GB2312"),
3857 build_string ("GB2312)"),
3858 build_string ("GB2312 Chinese simplified"),
3859 build_string ("gb2312"),
3861 staticpro (&Vcharset_japanese_jisx0208);
3862 Vcharset_japanese_jisx0208 =
3863 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3864 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3865 build_string ("JISX0208"),
3866 build_string ("JIS X0208:1983 (Japanese)"),
3867 build_string ("JIS X0208:1983 Japanese Kanji"),
3868 build_string ("jisx0208\\.1983"),
3871 staticpro (&Vcharset_japanese_jisx0208_1990);
3872 Vcharset_japanese_jisx0208_1990 =
3873 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3874 Qjapanese_jisx0208_1990, 94, 2,
3875 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3876 build_string ("JISX0208-1990"),
3877 build_string ("JIS X0208:1990 (Japanese)"),
3878 build_string ("JIS X0208:1990 Japanese Kanji"),
3879 build_string ("jisx0208\\.1990"),
3881 MIN_CHAR_JIS_X0208_1990,
3882 MAX_CHAR_JIS_X0208_1990, 0, 33);
3884 staticpro (&Vcharset_korean_ksc5601);
3885 Vcharset_korean_ksc5601 =
3886 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3887 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3888 build_string ("KSC5601"),
3889 build_string ("KSC5601 (Korean"),
3890 build_string ("KSC5601 Korean Hangul and Hanja"),
3891 build_string ("ksc5601"),
3893 staticpro (&Vcharset_japanese_jisx0212);
3894 Vcharset_japanese_jisx0212 =
3895 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3896 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3897 build_string ("JISX0212"),
3898 build_string ("JISX0212 (Japanese)"),
3899 build_string ("JISX0212 Japanese Supplement"),
3900 build_string ("jisx0212"),
3903 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3904 staticpro (&Vcharset_chinese_cns11643_1);
3905 Vcharset_chinese_cns11643_1 =
3906 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3907 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3908 build_string ("CNS11643-1"),
3909 build_string ("CNS11643-1 (Chinese traditional)"),
3911 ("CNS 11643 Plane 1 Chinese traditional"),
3912 build_string (CHINESE_CNS_PLANE_RE("1")),
3914 staticpro (&Vcharset_chinese_cns11643_2);
3915 Vcharset_chinese_cns11643_2 =
3916 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3917 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3918 build_string ("CNS11643-2"),
3919 build_string ("CNS11643-2 (Chinese traditional)"),
3921 ("CNS 11643 Plane 2 Chinese traditional"),
3922 build_string (CHINESE_CNS_PLANE_RE("2")),
3925 staticpro (&Vcharset_latin_tcvn5712);
3926 Vcharset_latin_tcvn5712 =
3927 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3928 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3929 build_string ("TCVN 5712"),
3930 build_string ("TCVN 5712 (VSCII-2)"),
3931 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3932 build_string ("tcvn5712-1"),
3934 staticpro (&Vcharset_latin_viscii_lower);
3935 Vcharset_latin_viscii_lower =
3936 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3937 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3938 build_string ("VISCII lower"),
3939 build_string ("VISCII lower (Vietnamese)"),
3940 build_string ("VISCII lower (Vietnamese)"),
3941 build_string ("MULEVISCII-LOWER"),
3943 staticpro (&Vcharset_latin_viscii_upper);
3944 Vcharset_latin_viscii_upper =
3945 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3946 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3947 build_string ("VISCII upper"),
3948 build_string ("VISCII upper (Vietnamese)"),
3949 build_string ("VISCII upper (Vietnamese)"),
3950 build_string ("MULEVISCII-UPPER"),
3952 staticpro (&Vcharset_latin_viscii);
3953 Vcharset_latin_viscii =
3954 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3955 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3956 build_string ("VISCII"),
3957 build_string ("VISCII 1.1 (Vietnamese)"),
3958 build_string ("VISCII 1.1 (Vietnamese)"),
3959 build_string ("VISCII1\\.1"),
3961 staticpro (&Vcharset_chinese_big5);
3962 Vcharset_chinese_big5 =
3963 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3964 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3965 build_string ("Big5"),
3966 build_string ("Big5"),
3967 build_string ("Big5 Chinese traditional"),
3968 build_string ("big5"),
3970 staticpro (&Vcharset_ideograph_daikanwa);
3971 Vcharset_ideograph_daikanwa =
3972 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
3973 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3974 build_string ("Daikanwa"),
3975 build_string ("Morohashi's Daikanwa"),
3976 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3977 build_string ("Daikanwa"),
3978 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3979 staticpro (&Vcharset_mojikyo);
3981 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
3982 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3983 build_string ("Mojikyo"),
3984 build_string ("Mojikyo"),
3985 build_string ("Konjaku-Mojikyo"),
3987 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
3988 staticpro (&Vcharset_mojikyo_2022_1);
3989 Vcharset_mojikyo_2022_1 =
3990 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
3991 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
3992 build_string ("Mojikyo-2022-1"),
3993 build_string ("Mojikyo ISO-2022 Part 1"),
3994 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
3998 #define DEF_MOJIKYO_PJ(n) \
3999 staticpro (&Vcharset_mojikyo_pj_##n); \
4000 Vcharset_mojikyo_pj_##n = \
4001 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
4002 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
4003 build_string ("Mojikyo-PJ-"#n), \
4004 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
4006 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
4008 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
4020 DEF_MOJIKYO_PJ (10);
4021 DEF_MOJIKYO_PJ (11);
4022 DEF_MOJIKYO_PJ (12);
4023 DEF_MOJIKYO_PJ (13);
4024 DEF_MOJIKYO_PJ (14);
4025 DEF_MOJIKYO_PJ (15);
4026 DEF_MOJIKYO_PJ (16);
4027 DEF_MOJIKYO_PJ (17);
4028 DEF_MOJIKYO_PJ (18);
4029 DEF_MOJIKYO_PJ (19);
4030 DEF_MOJIKYO_PJ (20);
4031 DEF_MOJIKYO_PJ (21);
4033 staticpro (&Vcharset_ethiopic_ucs);
4034 Vcharset_ethiopic_ucs =
4035 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
4036 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4037 build_string ("Ethiopic (UCS)"),
4038 build_string ("Ethiopic (UCS)"),
4039 build_string ("Ethiopic of UCS"),
4040 build_string ("Ethiopic-Unicode"),
4041 Qnil, 0x1200, 0x137F, 0x1200, 0);
4043 staticpro (&Vcharset_chinese_big5_1);
4044 Vcharset_chinese_big5_1 =
4045 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
4046 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
4047 build_string ("Big5"),
4048 build_string ("Big5 (Level-1)"),
4050 ("Big5 Level-1 Chinese traditional"),
4051 build_string ("big5"),
4053 staticpro (&Vcharset_chinese_big5_2);
4054 Vcharset_chinese_big5_2 =
4055 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
4056 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
4057 build_string ("Big5"),
4058 build_string ("Big5 (Level-2)"),
4060 ("Big5 Level-2 Chinese traditional"),
4061 build_string ("big5"),
4064 #ifdef ENABLE_COMPOSITE_CHARS
4065 /* #### For simplicity, we put composite chars into a 96x96 charset.
4066 This is going to lead to problems because you can run out of
4067 room, esp. as we don't yet recycle numbers. */
4068 staticpro (&Vcharset_composite);
4069 Vcharset_composite =
4070 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
4071 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4072 build_string ("Composite"),
4073 build_string ("Composite characters"),
4074 build_string ("Composite characters"),
4077 /* #### not dumped properly */
4078 composite_char_row_next = 32;
4079 composite_char_col_next = 32;
4081 Vcomposite_char_string2char_hash_table =
4082 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
4083 Vcomposite_char_char2string_hash_table =
4084 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
4085 staticpro (&Vcomposite_char_string2char_hash_table);
4086 staticpro (&Vcomposite_char_char2string_hash_table);
4087 #endif /* ENABLE_COMPOSITE_CHARS */