1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_japanese_jisx0208;
60 Lisp_Object Vcharset_japanese_jisx0208_1990;
61 Lisp_Object Vcharset_korean_ksc5601;
62 Lisp_Object Vcharset_japanese_jisx0212;
63 Lisp_Object Vcharset_chinese_cns11643_1;
64 Lisp_Object Vcharset_chinese_cns11643_2;
66 Lisp_Object Vcharset_ucs;
67 Lisp_Object Vcharset_ucs_bmp;
68 Lisp_Object Vcharset_latin_viscii;
69 Lisp_Object Vcharset_latin_tcvn5712;
70 Lisp_Object Vcharset_latin_viscii_lower;
71 Lisp_Object Vcharset_latin_viscii_upper;
72 Lisp_Object Vcharset_chinese_big5;
73 Lisp_Object Vcharset_ideograph_daikanwa;
74 Lisp_Object Vcharset_mojikyo;
75 Lisp_Object Vcharset_mojikyo_2022_1;
76 Lisp_Object Vcharset_mojikyo_pj_1;
77 Lisp_Object Vcharset_mojikyo_pj_2;
78 Lisp_Object Vcharset_mojikyo_pj_3;
79 Lisp_Object Vcharset_mojikyo_pj_4;
80 Lisp_Object Vcharset_mojikyo_pj_5;
81 Lisp_Object Vcharset_mojikyo_pj_6;
82 Lisp_Object Vcharset_mojikyo_pj_7;
83 Lisp_Object Vcharset_mojikyo_pj_8;
84 Lisp_Object Vcharset_mojikyo_pj_9;
85 Lisp_Object Vcharset_mojikyo_pj_10;
86 Lisp_Object Vcharset_mojikyo_pj_11;
87 Lisp_Object Vcharset_mojikyo_pj_12;
88 Lisp_Object Vcharset_mojikyo_pj_13;
89 Lisp_Object Vcharset_mojikyo_pj_14;
90 Lisp_Object Vcharset_mojikyo_pj_15;
91 Lisp_Object Vcharset_mojikyo_pj_16;
92 Lisp_Object Vcharset_mojikyo_pj_17;
93 Lisp_Object Vcharset_mojikyo_pj_18;
94 Lisp_Object Vcharset_mojikyo_pj_19;
95 Lisp_Object Vcharset_mojikyo_pj_20;
96 Lisp_Object Vcharset_mojikyo_pj_21;
97 Lisp_Object Vcharset_ethiopic_ucs;
99 Lisp_Object Vcharset_chinese_big5_1;
100 Lisp_Object Vcharset_chinese_big5_2;
102 #ifdef ENABLE_COMPOSITE_CHARS
103 Lisp_Object Vcharset_composite;
105 /* Hash tables for composite chars. One maps string representing
106 composed chars to their equivalent chars; one goes the
108 Lisp_Object Vcomposite_char_char2string_hash_table;
109 Lisp_Object Vcomposite_char_string2char_hash_table;
111 static int composite_char_row_next;
112 static int composite_char_col_next;
114 #endif /* ENABLE_COMPOSITE_CHARS */
116 struct charset_lookup *chlook;
118 static const struct lrecord_description charset_lookup_description_1[] = {
119 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
128 static const struct struct_description charset_lookup_description = {
129 sizeof (struct charset_lookup),
130 charset_lookup_description_1
134 /* Table of number of bytes in the string representation of a character
135 indexed by the first byte of that representation.
137 rep_bytes_by_first_byte(c) is more efficient than the equivalent
138 canonical computation:
140 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
142 const Bytecount rep_bytes_by_first_byte[0xA0] =
143 { /* 0x00 - 0x7f are for straight ASCII */
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
147 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
148 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
149 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
150 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
151 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
152 /* 0x80 - 0x8f are for Dimension-1 official charsets */
154 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
156 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
158 /* 0x90 - 0x9d are for Dimension-2 official charsets */
159 /* 0x9e is for Dimension-1 private charsets */
160 /* 0x9f is for Dimension-2 private charsets */
161 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
167 #define BT_UINT8_MIN 0
168 #define BT_UINT8_MAX (UCHAR_MAX - 3)
169 #define BT_UINT8_t (UCHAR_MAX - 2)
170 #define BT_UINT8_nil (UCHAR_MAX - 1)
171 #define BT_UINT8_unbound UCHAR_MAX
173 INLINE_HEADER int INT_UINT8_P (Lisp_Object obj);
174 INLINE_HEADER int UINT8_VALUE_P (Lisp_Object obj);
175 INLINE_HEADER unsigned char UINT8_ENCODE (Lisp_Object obj);
176 INLINE_HEADER Lisp_Object UINT8_DECODE (unsigned char n);
177 INLINE_HEADER unsigned short UINT8_TO_UINT16 (unsigned char n);
180 INT_UINT8_P (Lisp_Object obj)
184 int num = XINT (obj);
186 return (BT_UINT8_MIN <= num) && (num <= BT_UINT8_MAX);
193 UINT8_VALUE_P (Lisp_Object obj)
195 return EQ (obj, Qunbound)
196 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT8_P (obj);
199 INLINE_HEADER unsigned char
200 UINT8_ENCODE (Lisp_Object obj)
202 if (EQ (obj, Qunbound))
203 return BT_UINT8_unbound;
204 else if (EQ (obj, Qnil))
206 else if (EQ (obj, Qt))
212 INLINE_HEADER Lisp_Object
213 UINT8_DECODE (unsigned char n)
215 if (n == BT_UINT8_unbound)
217 else if (n == BT_UINT8_nil)
219 else if (n == BT_UINT8_t)
226 mark_uint8_byte_table (Lisp_Object obj)
232 print_uint8_byte_table (Lisp_Object obj,
233 Lisp_Object printcharfun, int escapeflag)
235 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
237 struct gcpro gcpro1, gcpro2;
238 GCPRO2 (obj, printcharfun);
240 write_c_string ("\n#<uint8-byte-table", printcharfun);
241 for (i = 0; i < 256; i++)
243 unsigned char n = bte->property[i];
245 write_c_string ("\n ", printcharfun);
246 write_c_string (" ", printcharfun);
247 if (n == BT_UINT8_unbound)
248 write_c_string ("void", printcharfun);
249 else if (n == BT_UINT8_nil)
250 write_c_string ("nil", printcharfun);
251 else if (n == BT_UINT8_t)
252 write_c_string ("t", printcharfun);
257 sprintf (buf, "%hd", n);
258 write_c_string (buf, printcharfun);
262 write_c_string (">", printcharfun);
266 uint8_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
268 Lisp_Uint8_Byte_Table *te1 = XUINT8_BYTE_TABLE (obj1);
269 Lisp_Uint8_Byte_Table *te2 = XUINT8_BYTE_TABLE (obj2);
272 for (i = 0; i < 256; i++)
273 if (te1->property[i] != te2->property[i])
279 uint8_byte_table_hash (Lisp_Object obj, int depth)
281 Lisp_Uint8_Byte_Table *te = XUINT8_BYTE_TABLE (obj);
285 for (i = 0; i < 256; i++)
286 hash = HASH2 (hash, te->property[i]);
290 DEFINE_LRECORD_IMPLEMENTATION ("uint8-byte-table", uint8_byte_table,
291 mark_uint8_byte_table,
292 print_uint8_byte_table,
293 0, uint8_byte_table_equal,
294 uint8_byte_table_hash,
295 0 /* uint8_byte_table_description */,
296 Lisp_Uint8_Byte_Table);
299 make_uint8_byte_table (unsigned char initval)
303 Lisp_Uint8_Byte_Table *cte;
305 cte = alloc_lcrecord_type (Lisp_Uint8_Byte_Table,
306 &lrecord_uint8_byte_table);
308 for (i = 0; i < 256; i++)
309 cte->property[i] = initval;
311 XSETUINT8_BYTE_TABLE (obj, cte);
316 uint8_byte_table_same_value_p (Lisp_Object obj)
318 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
319 unsigned char v0 = bte->property[0];
322 for (i = 1; i < 256; i++)
324 if (bte->property[i] != v0)
331 #define BT_UINT16_MIN 0
332 #define BT_UINT16_MAX (USHRT_MAX - 3)
333 #define BT_UINT16_t (USHRT_MAX - 2)
334 #define BT_UINT16_nil (USHRT_MAX - 1)
335 #define BT_UINT16_unbound USHRT_MAX
337 INLINE_HEADER int INT_UINT16_P (Lisp_Object obj);
338 INLINE_HEADER int UINT16_VALUE_P (Lisp_Object obj);
339 INLINE_HEADER unsigned short UINT16_ENCODE (Lisp_Object obj);
340 INLINE_HEADER Lisp_Object UINT16_DECODE (unsigned short us);
343 INT_UINT16_P (Lisp_Object obj)
347 int num = XINT (obj);
349 return (BT_UINT16_MIN <= num) && (num <= BT_UINT16_MAX);
356 UINT16_VALUE_P (Lisp_Object obj)
358 return EQ (obj, Qunbound)
359 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT16_P (obj);
362 INLINE_HEADER unsigned short
363 UINT16_ENCODE (Lisp_Object obj)
365 if (EQ (obj, Qunbound))
366 return BT_UINT16_unbound;
367 else if (EQ (obj, Qnil))
368 return BT_UINT16_nil;
369 else if (EQ (obj, Qt))
375 INLINE_HEADER Lisp_Object
376 UINT16_DECODE (unsigned short n)
378 if (n == BT_UINT16_unbound)
380 else if (n == BT_UINT16_nil)
382 else if (n == BT_UINT16_t)
388 INLINE_HEADER unsigned short
389 UINT8_TO_UINT16 (unsigned char n)
391 if (n == BT_UINT8_unbound)
392 return BT_UINT16_unbound;
393 else if (n == BT_UINT8_nil)
394 return BT_UINT16_nil;
395 else if (n == BT_UINT8_t)
402 mark_uint16_byte_table (Lisp_Object obj)
408 print_uint16_byte_table (Lisp_Object obj,
409 Lisp_Object printcharfun, int escapeflag)
411 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
413 struct gcpro gcpro1, gcpro2;
414 GCPRO2 (obj, printcharfun);
416 write_c_string ("\n#<uint16-byte-table", printcharfun);
417 for (i = 0; i < 256; i++)
419 unsigned short n = bte->property[i];
421 write_c_string ("\n ", printcharfun);
422 write_c_string (" ", printcharfun);
423 if (n == BT_UINT16_unbound)
424 write_c_string ("void", printcharfun);
425 else if (n == BT_UINT16_nil)
426 write_c_string ("nil", printcharfun);
427 else if (n == BT_UINT16_t)
428 write_c_string ("t", printcharfun);
433 sprintf (buf, "%hd", n);
434 write_c_string (buf, printcharfun);
438 write_c_string (">", printcharfun);
442 uint16_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
444 Lisp_Uint16_Byte_Table *te1 = XUINT16_BYTE_TABLE (obj1);
445 Lisp_Uint16_Byte_Table *te2 = XUINT16_BYTE_TABLE (obj2);
448 for (i = 0; i < 256; i++)
449 if (te1->property[i] != te2->property[i])
455 uint16_byte_table_hash (Lisp_Object obj, int depth)
457 Lisp_Uint16_Byte_Table *te = XUINT16_BYTE_TABLE (obj);
461 for (i = 0; i < 256; i++)
462 hash = HASH2 (hash, te->property[i]);
466 DEFINE_LRECORD_IMPLEMENTATION ("uint16-byte-table", uint16_byte_table,
467 mark_uint16_byte_table,
468 print_uint16_byte_table,
469 0, uint16_byte_table_equal,
470 uint16_byte_table_hash,
471 0 /* uint16_byte_table_description */,
472 Lisp_Uint16_Byte_Table);
475 make_uint16_byte_table (unsigned short initval)
479 Lisp_Uint16_Byte_Table *cte;
481 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
482 &lrecord_uint16_byte_table);
484 for (i = 0; i < 256; i++)
485 cte->property[i] = initval;
487 XSETUINT16_BYTE_TABLE (obj, cte);
492 expand_uint8_byte_table_to_uint16 (Lisp_Object table)
496 Lisp_Uint8_Byte_Table* bte = XUINT8_BYTE_TABLE(table);
497 Lisp_Uint16_Byte_Table* cte;
499 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
500 &lrecord_uint16_byte_table);
501 for (i = 0; i < 256; i++)
503 cte->property[i] = UINT8_TO_UINT16 (bte->property[i]);
505 XSETUINT16_BYTE_TABLE (obj, cte);
510 uint16_byte_table_same_value_p (Lisp_Object obj)
512 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
513 unsigned short v0 = bte->property[0];
516 for (i = 1; i < 256; i++)
518 if (bte->property[i] != v0)
526 mark_byte_table (Lisp_Object obj)
528 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
531 for (i = 0; i < 256; i++)
533 mark_object (cte->property[i]);
539 print_byte_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
541 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
543 struct gcpro gcpro1, gcpro2;
544 GCPRO2 (obj, printcharfun);
546 write_c_string ("\n#<byte-table", printcharfun);
547 for (i = 0; i < 256; i++)
549 Lisp_Object elt = bte->property[i];
551 write_c_string ("\n ", printcharfun);
552 write_c_string (" ", printcharfun);
553 if (EQ (elt, Qunbound))
554 write_c_string ("void", printcharfun);
556 print_internal (elt, printcharfun, escapeflag);
559 write_c_string (">", printcharfun);
563 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
565 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
566 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
569 for (i = 0; i < 256; i++)
570 if (BYTE_TABLE_P (cte1->property[i]))
572 if (BYTE_TABLE_P (cte2->property[i]))
574 if (!byte_table_equal (cte1->property[i],
575 cte2->property[i], depth + 1))
582 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
588 byte_table_hash (Lisp_Object obj, int depth)
590 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
592 return internal_array_hash (cte->property, 256, depth);
595 static const struct lrecord_description byte_table_description[] = {
596 { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Byte_Table, property), 256 },
600 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
605 byte_table_description,
609 make_byte_table (Lisp_Object initval)
613 Lisp_Byte_Table *cte;
615 cte = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
617 for (i = 0; i < 256; i++)
618 cte->property[i] = initval;
620 XSETBYTE_TABLE (obj, cte);
625 byte_table_same_value_p (Lisp_Object obj)
627 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
628 Lisp_Object v0 = bte->property[0];
631 for (i = 1; i < 256; i++)
633 if (!internal_equal (bte->property[i], v0, 0))
640 Lisp_Object get_byte_table (Lisp_Object table, unsigned char idx);
641 Lisp_Object put_byte_table (Lisp_Object table, unsigned char idx,
645 get_byte_table (Lisp_Object table, unsigned char idx)
647 if (UINT8_BYTE_TABLE_P (table))
648 return UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[idx]);
649 else if (UINT16_BYTE_TABLE_P (table))
650 return UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[idx]);
651 else if (BYTE_TABLE_P (table))
652 return XBYTE_TABLE(table)->property[idx];
658 put_byte_table (Lisp_Object table, unsigned char idx, Lisp_Object value)
660 if (UINT8_BYTE_TABLE_P (table))
662 if (UINT8_VALUE_P (value))
664 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
665 if (!UINT8_BYTE_TABLE_P (value) &&
666 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
667 && uint8_byte_table_same_value_p (table))
672 else if (UINT16_VALUE_P (value))
674 Lisp_Object new = expand_uint8_byte_table_to_uint16 (table);
676 XUINT16_BYTE_TABLE(new)->property[idx] = UINT16_ENCODE (value);
681 Lisp_Object new = make_byte_table (Qnil);
684 for (i = 0; i < 256; i++)
686 XBYTE_TABLE(new)->property[i]
687 = UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[i]);
689 XBYTE_TABLE(new)->property[idx] = value;
693 else if (UINT16_BYTE_TABLE_P (table))
695 if (UINT16_VALUE_P (value))
697 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
698 if (!UINT8_BYTE_TABLE_P (value) &&
699 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
700 && uint16_byte_table_same_value_p (table))
707 Lisp_Object new = make_byte_table (Qnil);
710 for (i = 0; i < 256; i++)
712 XBYTE_TABLE(new)->property[i]
713 = UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[i]);
715 XBYTE_TABLE(new)->property[idx] = value;
719 else if (BYTE_TABLE_P (table))
721 XBYTE_TABLE(table)->property[idx] = value;
722 if (!UINT8_BYTE_TABLE_P (value) &&
723 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
724 && byte_table_same_value_p (table))
729 else if (!internal_equal (table, value, 0))
731 if (UINT8_VALUE_P (table) && UINT8_VALUE_P (value))
733 table = make_uint8_byte_table (UINT8_ENCODE (table));
734 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
736 else if (UINT16_VALUE_P (table) && UINT16_VALUE_P (value))
738 table = make_uint16_byte_table (UINT16_ENCODE (table));
739 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
743 table = make_byte_table (table);
744 XBYTE_TABLE(table)->property[idx] = value;
751 mark_char_id_table (Lisp_Object obj)
753 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
759 print_char_id_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
761 Lisp_Object table = XCHAR_ID_TABLE (obj)->table;
763 struct gcpro gcpro1, gcpro2;
764 GCPRO2 (obj, printcharfun);
766 write_c_string ("#<char-id-table ", printcharfun);
767 for (i = 0; i < 256; i++)
769 Lisp_Object elt = get_byte_table (table, i);
770 if (i != 0) write_c_string ("\n ", printcharfun);
771 if (EQ (elt, Qunbound))
772 write_c_string ("void", printcharfun);
774 print_internal (elt, printcharfun, escapeflag);
777 write_c_string (">", printcharfun);
781 char_id_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
783 Lisp_Object table1 = XCHAR_ID_TABLE (obj1)->table;
784 Lisp_Object table2 = XCHAR_ID_TABLE (obj2)->table;
787 for (i = 0; i < 256; i++)
789 if (!internal_equal (get_byte_table (table1, i),
790 get_byte_table (table2, i), 0))
797 char_id_table_hash (Lisp_Object obj, int depth)
799 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
801 return char_id_table_hash (cte->table, depth + 1);
804 static const struct lrecord_description char_id_table_description[] = {
805 { XD_LISP_OBJECT, offsetof(Lisp_Char_ID_Table, table) },
809 DEFINE_LRECORD_IMPLEMENTATION ("char-id-table", char_id_table,
812 0, char_id_table_equal,
814 char_id_table_description,
818 make_char_id_table (Lisp_Object initval)
821 Lisp_Char_ID_Table *cte;
823 cte = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
825 cte->table = make_byte_table (initval);
827 XSETCHAR_ID_TABLE (obj, cte);
833 get_char_id_table (Emchar ch, Lisp_Object table)
835 unsigned int code = ch;
842 (XCHAR_ID_TABLE (table)->table,
843 (unsigned char)(code >> 24)),
844 (unsigned char) (code >> 16)),
845 (unsigned char) (code >> 8)),
846 (unsigned char) code);
849 void put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table);
851 put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table)
853 unsigned int code = ch;
854 Lisp_Object table1, table2, table3, table4;
856 table1 = XCHAR_ID_TABLE (table)->table;
857 table2 = get_byte_table (table1, (unsigned char)(code >> 24));
858 table3 = get_byte_table (table2, (unsigned char)(code >> 16));
859 table4 = get_byte_table (table3, (unsigned char)(code >> 8));
861 table4 = put_byte_table (table4, (unsigned char)code, value);
862 table3 = put_byte_table (table3, (unsigned char)(code >> 8), table4);
863 table2 = put_byte_table (table2, (unsigned char)(code >> 16), table3);
864 XCHAR_ID_TABLE (table)->table
865 = put_byte_table (table1, (unsigned char)(code >> 24), table2);
869 Lisp_Object Vchar_attribute_hash_table;
870 Lisp_Object Vcharacter_composition_table;
871 Lisp_Object Vcharacter_variant_table;
873 Lisp_Object Qideograph_daikanwa;
874 Lisp_Object Q_decomposition;
878 Lisp_Object Qisolated;
879 Lisp_Object Qinitial;
882 Lisp_Object Qvertical;
883 Lisp_Object QnoBreak;
884 Lisp_Object Qfraction;
894 Emchar to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
896 Lisp_Object put_char_ccs_code_point (Lisp_Object character,
897 Lisp_Object ccs, Lisp_Object value);
898 Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
901 to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
907 else if (EQ (v, Qcompat))
909 else if (EQ (v, Qisolated))
911 else if (EQ (v, Qinitial))
913 else if (EQ (v, Qmedial))
915 else if (EQ (v, Qfinal))
917 else if (EQ (v, Qvertical))
919 else if (EQ (v, QnoBreak))
921 else if (EQ (v, Qfraction))
923 else if (EQ (v, Qsuper))
925 else if (EQ (v, Qsub))
927 else if (EQ (v, Qcircle))
929 else if (EQ (v, Qsquare))
931 else if (EQ (v, Qwide))
933 else if (EQ (v, Qnarrow))
935 else if (EQ (v, Qsmall))
937 else if (EQ (v, Qfont))
940 signal_simple_error (err_msg, err_arg);
943 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
944 Return character corresponding with list.
948 Lisp_Object table = Vcharacter_composition_table;
949 Lisp_Object rest = list;
953 Lisp_Object v = Fcar (rest);
955 Emchar c = to_char_id (v, "Invalid value for composition", list);
957 ret = get_char_id_table (c, table);
962 if (!CHAR_ID_TABLE_P (ret))
967 else if (!CONSP (rest))
969 else if (CHAR_ID_TABLE_P (ret))
972 signal_simple_error ("Invalid table is found with", list);
974 signal_simple_error ("Invalid value for composition", list);
977 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
978 Return variants of CHARACTER.
982 CHECK_CHAR (character);
983 return Fcopy_list (get_char_id_table (XCHAR (character),
984 Vcharacter_variant_table));
988 /* We store the char-attributes in hash tables with the names as the
989 key and the actual char-id-table object as the value. Occasionally
990 we need to use them in a list format. These routines provide us
992 struct char_attribute_list_closure
994 Lisp_Object *char_attribute_list;
998 add_char_attribute_to_list_mapper (Lisp_Object key, Lisp_Object value,
999 void *char_attribute_list_closure)
1001 /* This function can GC */
1002 struct char_attribute_list_closure *calcl
1003 = (struct char_attribute_list_closure*) char_attribute_list_closure;
1004 Lisp_Object *char_attribute_list = calcl->char_attribute_list;
1006 *char_attribute_list = Fcons (key, *char_attribute_list);
1010 DEFUN ("char-attribute-list", Fchar_attribute_list, 0, 0, 0, /*
1011 Return the list of all existing character attributes except coded-charsets.
1015 Lisp_Object char_attribute_list = Qnil;
1016 struct gcpro gcpro1;
1017 struct char_attribute_list_closure char_attribute_list_closure;
1019 GCPRO1 (char_attribute_list);
1020 char_attribute_list_closure.char_attribute_list = &char_attribute_list;
1021 elisp_maphash (add_char_attribute_to_list_mapper,
1022 Vchar_attribute_hash_table,
1023 &char_attribute_list_closure);
1025 return char_attribute_list;
1028 DEFUN ("find-char-attribute-table", Ffind_char_attribute_table, 1, 1, 0, /*
1029 Return char-id-table corresponding to ATTRIBUTE.
1033 return Fgethash (attribute, Vchar_attribute_hash_table, Qnil);
1037 /* We store the char-id-tables in hash tables with the attributes as
1038 the key and the actual char-id-table object as the value. Each
1039 char-id-table stores values of an attribute corresponding with
1040 characters. Occasionally we need to get attributes of a character
1041 in a association-list format. These routines provide us with
1043 struct char_attribute_alist_closure
1046 Lisp_Object *char_attribute_alist;
1050 add_char_attribute_alist_mapper (Lisp_Object key, Lisp_Object value,
1051 void *char_attribute_alist_closure)
1053 /* This function can GC */
1054 struct char_attribute_alist_closure *caacl =
1055 (struct char_attribute_alist_closure*) char_attribute_alist_closure;
1056 Lisp_Object ret = get_char_id_table (caacl->char_id, value);
1057 if (!UNBOUNDP (ret))
1059 Lisp_Object *char_attribute_alist = caacl->char_attribute_alist;
1060 *char_attribute_alist
1061 = Fcons (Fcons (key, ret), *char_attribute_alist);
1066 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
1067 Return the alist of attributes of CHARACTER.
1071 Lisp_Object alist = Qnil;
1074 CHECK_CHAR (character);
1076 struct gcpro gcpro1;
1077 struct char_attribute_alist_closure char_attribute_alist_closure;
1080 char_attribute_alist_closure.char_id = XCHAR (character);
1081 char_attribute_alist_closure.char_attribute_alist = &alist;
1082 elisp_maphash (add_char_attribute_alist_mapper,
1083 Vchar_attribute_hash_table,
1084 &char_attribute_alist_closure);
1088 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1090 Lisp_Object ccs = chlook->charset_by_leading_byte[i];
1094 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1097 if ( CHAR_ID_TABLE_P (encoding_table)
1098 && INTP (cpos = get_char_id_table (XCHAR (character),
1101 alist = Fcons (Fcons (ccs, cpos), alist);
1108 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 3, 0, /*
1109 Return the value of CHARACTER's ATTRIBUTE.
1110 Return DEFAULT-VALUE if the value is not exist.
1112 (character, attribute, default_value))
1116 CHECK_CHAR (character);
1117 if (!NILP (ccs = Ffind_charset (attribute)))
1119 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1121 if (CHAR_ID_TABLE_P (encoding_table))
1122 return get_char_id_table (XCHAR (character), encoding_table);
1126 Lisp_Object table = Fgethash (attribute,
1127 Vchar_attribute_hash_table,
1129 if (!UNBOUNDP (table))
1131 Lisp_Object ret = get_char_id_table (XCHAR (character), table);
1132 if (!UNBOUNDP (ret))
1136 return default_value;
1139 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
1140 Store CHARACTER's ATTRIBUTE with VALUE.
1142 (character, attribute, value))
1146 CHECK_CHAR (character);
1147 ccs = Ffind_charset (attribute);
1150 return put_char_ccs_code_point (character, ccs, value);
1152 else if (EQ (attribute, Q_decomposition))
1157 signal_simple_error ("Invalid value for ->decomposition",
1160 if (CONSP (Fcdr (value)))
1162 Lisp_Object rest = value;
1163 Lisp_Object table = Vcharacter_composition_table;
1167 GET_EXTERNAL_LIST_LENGTH (rest, len);
1168 seq = make_vector (len, Qnil);
1170 while (CONSP (rest))
1172 Lisp_Object v = Fcar (rest);
1175 = to_char_id (v, "Invalid value for ->decomposition", value);
1178 XVECTOR_DATA(seq)[i++] = v;
1180 XVECTOR_DATA(seq)[i++] = make_char (c);
1184 put_char_id_table (c, character, table);
1189 ntable = get_char_id_table (c, table);
1190 if (!CHAR_ID_TABLE_P (ntable))
1192 ntable = make_char_id_table (Qnil);
1193 put_char_id_table (c, ntable, table);
1201 Lisp_Object v = Fcar (value);
1205 Emchar c = XINT (v);
1207 = get_char_id_table (c, Vcharacter_variant_table);
1209 if (NILP (Fmemq (v, ret)))
1211 put_char_id_table (c, Fcons (character, ret),
1212 Vcharacter_variant_table);
1215 seq = make_vector (1, v);
1219 else if (EQ (attribute, Q_ucs))
1225 signal_simple_error ("Invalid value for ->ucs", value);
1229 ret = get_char_id_table (c, Vcharacter_variant_table);
1230 if (NILP (Fmemq (character, ret)))
1232 put_char_id_table (c, Fcons (character, ret),
1233 Vcharacter_variant_table);
1237 Lisp_Object table = Fgethash (attribute,
1238 Vchar_attribute_hash_table,
1243 table = make_char_id_table (Qunbound);
1244 Fputhash (attribute, table, Vchar_attribute_hash_table);
1246 put_char_id_table (XCHAR (character), value, table);
1251 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
1252 Remove CHARACTER's ATTRIBUTE.
1254 (character, attribute))
1258 CHECK_CHAR (character);
1259 ccs = Ffind_charset (attribute);
1262 return remove_char_ccs (character, ccs);
1266 Lisp_Object table = Fgethash (attribute,
1267 Vchar_attribute_hash_table,
1269 if (!UNBOUNDP (table))
1271 put_char_id_table (XCHAR (character), Qunbound, table);
1278 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
1280 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
1282 /* ad-hoc method for `ascii' */
1283 if ((CHARSET_CHARS (cs) == 94) &&
1284 (CHARSET_BYTE_OFFSET (cs) != 33))
1285 return 128 - CHARSET_BYTE_OFFSET (cs);
1287 return CHARSET_CHARS (cs);
1290 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
1292 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
1294 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
1298 if (XVECTOR_LENGTH (v) > ccs_len)
1301 for (i = 0; i < XVECTOR_LENGTH (v); i++)
1303 Lisp_Object c = XVECTOR_DATA(v)[i];
1305 if (!NILP (c) && !CHARP (c))
1309 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
1321 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1324 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1334 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1335 nv = XVECTOR_DATA(v)[i];
1341 XVECTOR_DATA(v)[i] = Qnil;
1345 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1346 int code_point, Lisp_Object character);
1348 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1349 int code_point, Lisp_Object character)
1353 int ccs_len = XVECTOR_LENGTH (v);
1358 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1359 nv = XVECTOR_DATA(v)[i];
1363 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
1369 XVECTOR_DATA(v)[i] = character;
1373 put_char_ccs_code_point (Lisp_Object character,
1374 Lisp_Object ccs, Lisp_Object value)
1376 Lisp_Object encoding_table;
1378 if (!EQ (XCHARSET_NAME (ccs), Qucs)
1379 || (XCHAR (character) != XINT (value)))
1381 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
1382 int dim = XCHARSET_DIMENSION (ccs);
1383 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
1384 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
1388 { /* obsolete representation: value must be a list of bytes */
1389 Lisp_Object ret = Fcar (value);
1393 signal_simple_error ("Invalid value for coded-charset", value);
1394 code_point = XINT (ret);
1395 if (XCHARSET_GRAPHIC (ccs) == 1)
1397 rest = Fcdr (value);
1398 while (!NILP (rest))
1403 signal_simple_error ("Invalid value for coded-charset",
1407 signal_simple_error ("Invalid value for coded-charset",
1410 if (XCHARSET_GRAPHIC (ccs) == 1)
1412 code_point = (code_point << 8) | j;
1415 value = make_int (code_point);
1417 else if (INTP (value))
1419 code_point = XINT (value);
1420 if (XCHARSET_GRAPHIC (ccs) == 1)
1422 code_point &= 0x7F7F7F7F;
1423 value = make_int (code_point);
1427 signal_simple_error ("Invalid value for coded-charset", value);
1431 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1434 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
1439 XCHARSET_DECODING_TABLE (ccs)
1440 = v = make_older_vector (ccs_len, Qnil);
1443 decoding_table_put_char (v, dim, byte_offset, code_point, character);
1445 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
1447 XCHARSET_ENCODING_TABLE (ccs)
1448 = encoding_table = make_char_id_table (Qnil);
1450 put_char_id_table (XCHAR (character), value, encoding_table);
1455 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
1457 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1458 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1460 if (VECTORP (decoding_table))
1462 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1466 decoding_table_remove_char (decoding_table,
1467 XCHARSET_DIMENSION (ccs),
1468 XCHARSET_BYTE_OFFSET (ccs),
1472 if (CHAR_ID_TABLE_P (encoding_table))
1474 put_char_id_table (XCHAR (character), Qnil, encoding_table);
1479 EXFUN (Fmake_char, 3);
1480 EXFUN (Fdecode_char, 2);
1482 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
1483 Store character's ATTRIBUTES.
1487 Lisp_Object rest = attributes;
1488 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
1489 Lisp_Object character;
1491 Lisp_Object daikanwa = Qnil;
1496 while (CONSP (rest))
1498 Lisp_Object cell = Fcar (rest);
1502 signal_simple_error ("Invalid argument", attributes);
1503 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
1504 && ((XCHARSET_FINAL (ccs) != 0) ||
1505 (XCHARSET_UCS_MAX (ccs) > 0)) )
1509 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1511 character = Fdecode_char (ccs, cell);
1512 goto setup_attributes;
1516 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
1519 signal_simple_error ("Invalid argument", attributes);
1521 character = make_char (XINT (code) + 0x100000);
1522 goto setup_attributes;
1526 else if (!INTP (code))
1527 signal_simple_error ("Invalid argument", attributes);
1529 character = make_char (XINT (code));
1533 while (CONSP (rest))
1535 Lisp_Object cell = Fcar (rest);
1537 Lisp_Object key = Fcar (cell);
1538 Lisp_Object value = Fcdr (cell);
1542 signal_simple_error ("Invalid argument", attributes);
1545 if (EQ (key, Qmorohashi_daikanwa))
1548 GET_EXTERNAL_LIST_LENGTH (value, len);
1552 if (NILP (daikanwa))
1553 daikanwa = Fcdr (Fassq (Qideograph_daikanwa, rest));
1554 if (EQ (Fcar (value), daikanwa))
1558 else if (EQ (key, Qideograph_daikanwa))
1562 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
1571 Lisp_Object Vutf_2000_version;
1575 int leading_code_private_11;
1578 Lisp_Object Qcharsetp;
1580 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1581 Lisp_Object Qregistry, Qfinal, Qgraphic;
1582 Lisp_Object Qdirection;
1583 Lisp_Object Qreverse_direction_charset;
1584 Lisp_Object Qleading_byte;
1585 Lisp_Object Qshort_name, Qlong_name;
1599 Qcyrillic_iso8859_5,
1601 Qjapanese_jisx0208_1978,
1604 Qjapanese_jisx0208_1990,
1607 Qchinese_cns11643_1,
1608 Qchinese_cns11643_2,
1613 Qlatin_viscii_lower,
1614 Qlatin_viscii_upper,
1615 Qvietnamese_viscii_lower,
1616 Qvietnamese_viscii_upper,
1647 Lisp_Object Ql2r, Qr2l;
1649 Lisp_Object Vcharset_hash_table;
1651 /* Composite characters are characters constructed by overstriking two
1652 or more regular characters.
1654 1) The old Mule implementation involves storing composite characters
1655 in a buffer as a tag followed by all of the actual characters
1656 used to make up the composite character. I think this is a bad
1657 idea; it greatly complicates code that wants to handle strings
1658 one character at a time because it has to deal with the possibility
1659 of great big ungainly characters. It's much more reasonable to
1660 simply store an index into a table of composite characters.
1662 2) The current implementation only allows for 16,384 separate
1663 composite characters over the lifetime of the XEmacs process.
1664 This could become a potential problem if the user
1665 edited lots of different files that use composite characters.
1666 Due to FSF bogosity, increasing the number of allowable
1667 composite characters under Mule would decrease the number
1668 of possible faces that can exist. Mule already has shrunk
1669 this to 2048, and further shrinkage would become uncomfortable.
1670 No such problems exist in XEmacs.
1672 Composite characters could be represented as 0x80 C1 C2 C3,
1673 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1674 for slightly under 2^20 (one million) composite characters
1675 over the XEmacs process lifetime, and you only need to
1676 increase the size of a Mule character from 19 to 21 bits.
1677 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1678 85 million (slightly over 2^26) composite characters. */
1681 /************************************************************************/
1682 /* Basic Emchar functions */
1683 /************************************************************************/
1685 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1686 string in STR. Returns the number of bytes stored.
1687 Do not call this directly. Use the macro set_charptr_emchar() instead.
1691 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1697 Lisp_Object charset;
1706 else if ( c <= 0x7ff )
1708 *p++ = (c >> 6) | 0xc0;
1709 *p++ = (c & 0x3f) | 0x80;
1711 else if ( c <= 0xffff )
1713 *p++ = (c >> 12) | 0xe0;
1714 *p++ = ((c >> 6) & 0x3f) | 0x80;
1715 *p++ = (c & 0x3f) | 0x80;
1717 else if ( c <= 0x1fffff )
1719 *p++ = (c >> 18) | 0xf0;
1720 *p++ = ((c >> 12) & 0x3f) | 0x80;
1721 *p++ = ((c >> 6) & 0x3f) | 0x80;
1722 *p++ = (c & 0x3f) | 0x80;
1724 else if ( c <= 0x3ffffff )
1726 *p++ = (c >> 24) | 0xf8;
1727 *p++ = ((c >> 18) & 0x3f) | 0x80;
1728 *p++ = ((c >> 12) & 0x3f) | 0x80;
1729 *p++ = ((c >> 6) & 0x3f) | 0x80;
1730 *p++ = (c & 0x3f) | 0x80;
1734 *p++ = (c >> 30) | 0xfc;
1735 *p++ = ((c >> 24) & 0x3f) | 0x80;
1736 *p++ = ((c >> 18) & 0x3f) | 0x80;
1737 *p++ = ((c >> 12) & 0x3f) | 0x80;
1738 *p++ = ((c >> 6) & 0x3f) | 0x80;
1739 *p++ = (c & 0x3f) | 0x80;
1742 BREAKUP_CHAR (c, charset, c1, c2);
1743 lb = CHAR_LEADING_BYTE (c);
1744 if (LEADING_BYTE_PRIVATE_P (lb))
1745 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1747 if (EQ (charset, Vcharset_control_1))
1756 /* Return the first character from a Mule-encoded string in STR,
1757 assuming it's non-ASCII. Do not call this directly.
1758 Use the macro charptr_emchar() instead. */
1761 non_ascii_charptr_emchar (const Bufbyte *str)
1774 else if ( b >= 0xf8 )
1779 else if ( b >= 0xf0 )
1784 else if ( b >= 0xe0 )
1789 else if ( b >= 0xc0 )
1799 for( ; len > 0; len-- )
1802 ch = ( ch << 6 ) | ( b & 0x3f );
1806 Bufbyte i0 = *str, i1, i2 = 0;
1807 Lisp_Object charset;
1809 if (i0 == LEADING_BYTE_CONTROL_1)
1810 return (Emchar) (*++str - 0x20);
1812 if (LEADING_BYTE_PREFIX_P (i0))
1817 charset = CHARSET_BY_LEADING_BYTE (i0);
1818 if (XCHARSET_DIMENSION (charset) == 2)
1821 return MAKE_CHAR (charset, i1, i2);
1825 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1826 Do not call this directly. Use the macro valid_char_p() instead. */
1830 non_ascii_valid_char_p (Emchar ch)
1834 /* Must have only lowest 19 bits set */
1838 f1 = CHAR_FIELD1 (ch);
1839 f2 = CHAR_FIELD2 (ch);
1840 f3 = CHAR_FIELD3 (ch);
1844 Lisp_Object charset;
1846 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1847 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1848 f2 > MAX_CHAR_FIELD2_PRIVATE)
1853 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1854 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1858 NOTE: This takes advantage of the fact that
1859 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1860 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1862 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1863 if (EQ (charset, Qnil))
1865 return (XCHARSET_CHARS (charset) == 96);
1869 Lisp_Object charset;
1871 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1872 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1873 f1 > MAX_CHAR_FIELD1_PRIVATE)
1875 if (f2 < 0x20 || f3 < 0x20)
1878 #ifdef ENABLE_COMPOSITE_CHARS
1879 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1881 if (UNBOUNDP (Fgethash (make_int (ch),
1882 Vcomposite_char_char2string_hash_table,
1887 #endif /* ENABLE_COMPOSITE_CHARS */
1889 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1890 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1893 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1895 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1898 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1900 if (EQ (charset, Qnil))
1902 return (XCHARSET_CHARS (charset) == 96);
1908 /************************************************************************/
1909 /* Basic string functions */
1910 /************************************************************************/
1912 /* Copy the character pointed to by SRC into DST. Do not call this
1913 directly. Use the macro charptr_copy_char() instead.
1914 Return the number of bytes copied. */
1917 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
1919 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
1921 for (i = bytes; i; i--, dst++, src++)
1927 /************************************************************************/
1928 /* streams of Emchars */
1929 /************************************************************************/
1931 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1932 The functions below are not meant to be called directly; use
1933 the macros in insdel.h. */
1936 Lstream_get_emchar_1 (Lstream *stream, int ch)
1938 Bufbyte str[MAX_EMCHAR_LEN];
1939 Bufbyte *strptr = str;
1942 str[0] = (Bufbyte) ch;
1944 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
1946 int c = Lstream_getc (stream);
1947 bufpos_checking_assert (c >= 0);
1948 *++strptr = (Bufbyte) c;
1950 return charptr_emchar (str);
1954 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1956 Bufbyte str[MAX_EMCHAR_LEN];
1957 Bytecount len = set_charptr_emchar (str, ch);
1958 return Lstream_write (stream, str, len);
1962 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1964 Bufbyte str[MAX_EMCHAR_LEN];
1965 Bytecount len = set_charptr_emchar (str, ch);
1966 Lstream_unread (stream, str, len);
1970 /************************************************************************/
1971 /* charset object */
1972 /************************************************************************/
1975 mark_charset (Lisp_Object obj)
1977 Lisp_Charset *cs = XCHARSET (obj);
1979 mark_object (cs->short_name);
1980 mark_object (cs->long_name);
1981 mark_object (cs->doc_string);
1982 mark_object (cs->registry);
1983 mark_object (cs->ccl_program);
1985 mark_object (cs->encoding_table);
1986 /* mark_object (cs->decoding_table); */
1992 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1994 Lisp_Charset *cs = XCHARSET (obj);
1998 error ("printing unreadable object #<charset %s 0x%x>",
1999 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
2002 write_c_string ("#<charset ", printcharfun);
2003 print_internal (CHARSET_NAME (cs), printcharfun, 0);
2004 write_c_string (" ", printcharfun);
2005 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
2006 write_c_string (" ", printcharfun);
2007 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
2008 write_c_string (" ", printcharfun);
2009 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
2010 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
2012 CHARSET_DIMENSION (cs),
2013 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
2014 CHARSET_COLUMNS (cs),
2015 CHARSET_GRAPHIC (cs),
2016 CHARSET_FINAL (cs));
2017 write_c_string (buf, printcharfun);
2018 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
2019 sprintf (buf, " 0x%x>", cs->header.uid);
2020 write_c_string (buf, printcharfun);
2023 static const struct lrecord_description charset_description[] = {
2024 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
2025 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
2026 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
2027 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
2028 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
2029 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
2030 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
2032 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
2033 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
2038 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
2039 mark_charset, print_charset, 0, 0, 0,
2040 charset_description,
2043 /* Make a new charset. */
2044 /* #### SJT Should generic properties be allowed? */
2046 make_charset (Charset_ID id, Lisp_Object name,
2047 unsigned short chars, unsigned char dimension,
2048 unsigned char columns, unsigned char graphic,
2049 Bufbyte final, unsigned char direction, Lisp_Object short_name,
2050 Lisp_Object long_name, Lisp_Object doc,
2052 Lisp_Object decoding_table,
2053 Emchar ucs_min, Emchar ucs_max,
2054 Emchar code_offset, unsigned char byte_offset)
2057 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
2061 XSETCHARSET (obj, cs);
2063 CHARSET_ID (cs) = id;
2064 CHARSET_NAME (cs) = name;
2065 CHARSET_SHORT_NAME (cs) = short_name;
2066 CHARSET_LONG_NAME (cs) = long_name;
2067 CHARSET_CHARS (cs) = chars;
2068 CHARSET_DIMENSION (cs) = dimension;
2069 CHARSET_DIRECTION (cs) = direction;
2070 CHARSET_COLUMNS (cs) = columns;
2071 CHARSET_GRAPHIC (cs) = graphic;
2072 CHARSET_FINAL (cs) = final;
2073 CHARSET_DOC_STRING (cs) = doc;
2074 CHARSET_REGISTRY (cs) = reg;
2075 CHARSET_CCL_PROGRAM (cs) = Qnil;
2076 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
2078 CHARSET_DECODING_TABLE(cs) = Qnil;
2079 CHARSET_ENCODING_TABLE(cs) = Qnil;
2080 CHARSET_UCS_MIN(cs) = ucs_min;
2081 CHARSET_UCS_MAX(cs) = ucs_max;
2082 CHARSET_CODE_OFFSET(cs) = code_offset;
2083 CHARSET_BYTE_OFFSET(cs) = byte_offset;
2087 if (id == LEADING_BYTE_ASCII)
2088 CHARSET_REP_BYTES (cs) = 1;
2090 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
2092 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
2097 /* some charsets do not have final characters. This includes
2098 ASCII, Control-1, Composite, and the two faux private
2100 unsigned char iso2022_type
2101 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
2103 if (code_offset == 0)
2105 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
2106 chlook->charset_by_attributes[iso2022_type][final] = obj;
2110 (chlook->charset_by_attributes[iso2022_type][final][direction]));
2111 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
2115 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
2116 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
2118 /* Some charsets are "faux" and don't have names or really exist at
2119 all except in the leading-byte table. */
2121 Fputhash (name, obj, Vcharset_hash_table);
2126 get_unallocated_leading_byte (int dimension)
2131 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
2134 lb = chlook->next_allocated_leading_byte++;
2138 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
2141 lb = chlook->next_allocated_1_byte_leading_byte++;
2145 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
2148 lb = chlook->next_allocated_2_byte_leading_byte++;
2154 ("No more character sets free for this dimension",
2155 make_int (dimension));
2161 /* Number of Big5 characters which have the same code in 1st byte. */
2163 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2166 make_builtin_char (Lisp_Object charset, int c1, int c2)
2168 if (XCHARSET_UCS_MAX (charset))
2171 = (XCHARSET_DIMENSION (charset) == 1
2173 c1 - XCHARSET_BYTE_OFFSET (charset)
2175 (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
2176 + c2 - XCHARSET_BYTE_OFFSET (charset))
2177 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
2178 if ((code < XCHARSET_UCS_MIN (charset))
2179 || (XCHARSET_UCS_MAX (charset) < code))
2180 signal_simple_error ("Arguments makes invalid character",
2184 else if (XCHARSET_DIMENSION (charset) == 1)
2186 switch (XCHARSET_CHARS (charset))
2190 + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
2193 + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
2200 if (EQ (charset, Vcharset_chinese_big5))
2202 int B1 = c1, B2 = c2;
2204 = (B1 - 0xA1) * BIG5_SAME_ROW
2205 + B2 - (B2 < 0x7F ? 0x40 : 0x62);
2209 charset = Vcharset_chinese_big5_1;
2213 charset = Vcharset_chinese_big5_2;
2214 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
2219 switch (XCHARSET_CHARS (charset))
2222 return MIN_CHAR_94x94
2223 + (XCHARSET_FINAL (charset) - '0') * 94 * 94
2224 + (c1 - 33) * 94 + (c2 - 33);
2226 return MIN_CHAR_96x96
2227 + (XCHARSET_FINAL (charset) - '0') * 96 * 96
2228 + (c1 - 32) * 96 + (c2 - 32);
2236 range_charset_code_point (Lisp_Object charset, Emchar ch)
2240 if ((XCHARSET_UCS_MIN (charset) <= ch)
2241 && (ch <= XCHARSET_UCS_MAX (charset)))
2243 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
2245 if (XCHARSET_CHARS (charset) == 256)
2247 else if (XCHARSET_DIMENSION (charset) == 1)
2248 return d + XCHARSET_BYTE_OFFSET (charset);
2249 else if (XCHARSET_DIMENSION (charset) == 2)
2251 ((d / XCHARSET_CHARS (charset)
2252 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2253 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2254 else if (XCHARSET_DIMENSION (charset) == 3)
2256 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2257 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2258 | ((d / XCHARSET_CHARS (charset)
2259 % XCHARSET_CHARS (charset)
2260 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2261 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2262 else /* if (XCHARSET_DIMENSION (charset) == 4) */
2264 ((d / (XCHARSET_CHARS (charset)
2265 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2266 + XCHARSET_BYTE_OFFSET (charset)) << 24)
2267 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2268 % XCHARSET_CHARS (charset)
2269 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2270 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
2271 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2272 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2274 else if (XCHARSET_CODE_OFFSET (charset) == 0)
2276 if (XCHARSET_DIMENSION (charset) == 1)
2278 if (XCHARSET_CHARS (charset) == 94)
2280 if (((d = ch - (MIN_CHAR_94
2281 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
2285 else if (XCHARSET_CHARS (charset) == 96)
2287 if (((d = ch - (MIN_CHAR_96
2288 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
2295 else if (XCHARSET_DIMENSION (charset) == 2)
2297 if (XCHARSET_CHARS (charset) == 94)
2299 if (((d = ch - (MIN_CHAR_94x94
2300 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
2303 return (((d / 94) + 33) << 8) | (d % 94 + 33);
2305 else if (XCHARSET_CHARS (charset) == 96)
2307 if (((d = ch - (MIN_CHAR_96x96
2308 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
2311 return (((d / 96) + 32) << 8) | (d % 96 + 32);
2317 if (EQ (charset, Vcharset_mojikyo_2022_1)
2318 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
2320 int m = ch - MIN_CHAR_MOJIKYO - 1;
2321 int byte1 = m / (94 * 60) + 33;
2322 int byte2 = (m % (94 * 60)) / 94;
2323 int byte3 = m % 94 + 33;
2329 return (byte1 << 16) | (byte2 << 8) | byte3;
2335 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
2337 if (c <= MAX_CHAR_BASIC_LATIN)
2339 *charset = Vcharset_ascii;
2344 *charset = Vcharset_control_1;
2349 *charset = Vcharset_latin_iso8859_1;
2353 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
2355 *charset = Vcharset_greek_iso8859_7;
2356 return c - MIN_CHAR_GREEK + 0x20;
2358 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
2360 *charset = Vcharset_cyrillic_iso8859_5;
2361 return c - MIN_CHAR_CYRILLIC + 0x20;
2364 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
2366 *charset = Vcharset_hebrew_iso8859_8;
2367 return c - MIN_CHAR_HEBREW + 0x20;
2369 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
2371 *charset = Vcharset_thai_tis620;
2372 return c - MIN_CHAR_THAI + 0x20;
2375 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
2376 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
2378 return list2 (Vcharset_katakana_jisx0201,
2379 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
2382 else if (c <= MAX_CHAR_BMP)
2384 *charset = Vcharset_ucs_bmp;
2387 else if (c < MIN_CHAR_DAIKANWA)
2389 *charset = Vcharset_ucs;
2393 else if (c <= MAX_CHAR_DAIKANWA)
2395 *charset = Vcharset_ideograph_daikanwa;
2396 return c - MIN_CHAR_DAIKANWA;
2399 else if (c <= MAX_CHAR_MOJIKYO)
2401 *charset = Vcharset_mojikyo;
2402 return c - MIN_CHAR_MOJIKYO;
2404 else if (c < MIN_CHAR_94)
2406 *charset = Vcharset_ucs;
2409 else if (c <= MAX_CHAR_94)
2411 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
2412 ((c - MIN_CHAR_94) / 94) + '0',
2413 CHARSET_LEFT_TO_RIGHT);
2414 if (!NILP (*charset))
2415 return ((c - MIN_CHAR_94) % 94) + 33;
2418 *charset = Vcharset_ucs;
2422 else if (c <= MAX_CHAR_96)
2424 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
2425 ((c - MIN_CHAR_96) / 96) + '0',
2426 CHARSET_LEFT_TO_RIGHT);
2427 if (!NILP (*charset))
2428 return ((c - MIN_CHAR_96) % 96) + 32;
2431 *charset = Vcharset_ucs;
2435 else if (c <= MAX_CHAR_94x94)
2438 = CHARSET_BY_ATTRIBUTES (94, 2,
2439 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
2440 CHARSET_LEFT_TO_RIGHT);
2441 if (!NILP (*charset))
2442 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
2443 | (((c - MIN_CHAR_94x94) % 94) + 33);
2446 *charset = Vcharset_ucs;
2450 else if (c <= MAX_CHAR_96x96)
2453 = CHARSET_BY_ATTRIBUTES (96, 2,
2454 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
2455 CHARSET_LEFT_TO_RIGHT);
2456 if (!NILP (*charset))
2457 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
2458 | (((c - MIN_CHAR_96x96) % 96) + 32);
2461 *charset = Vcharset_ucs;
2467 *charset = Vcharset_ucs;
2472 Lisp_Object Vdefault_coded_charset_priority_list;
2476 /************************************************************************/
2477 /* Basic charset Lisp functions */
2478 /************************************************************************/
2480 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
2481 Return non-nil if OBJECT is a charset.
2485 return CHARSETP (object) ? Qt : Qnil;
2488 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
2489 Retrieve the charset of the given name.
2490 If CHARSET-OR-NAME is a charset object, it is simply returned.
2491 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
2492 nil is returned. Otherwise the associated charset object is returned.
2496 if (CHARSETP (charset_or_name))
2497 return charset_or_name;
2499 CHECK_SYMBOL (charset_or_name);
2500 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
2503 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
2504 Retrieve the charset of the given name.
2505 Same as `find-charset' except an error is signalled if there is no such
2506 charset instead of returning nil.
2510 Lisp_Object charset = Ffind_charset (name);
2513 signal_simple_error ("No such charset", name);
2517 /* We store the charsets in hash tables with the names as the key and the
2518 actual charset object as the value. Occasionally we need to use them
2519 in a list format. These routines provide us with that. */
2520 struct charset_list_closure
2522 Lisp_Object *charset_list;
2526 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
2527 void *charset_list_closure)
2529 /* This function can GC */
2530 struct charset_list_closure *chcl =
2531 (struct charset_list_closure*) charset_list_closure;
2532 Lisp_Object *charset_list = chcl->charset_list;
2534 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
2538 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
2539 Return a list of the names of all defined charsets.
2543 Lisp_Object charset_list = Qnil;
2544 struct gcpro gcpro1;
2545 struct charset_list_closure charset_list_closure;
2547 GCPRO1 (charset_list);
2548 charset_list_closure.charset_list = &charset_list;
2549 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
2550 &charset_list_closure);
2553 return charset_list;
2556 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
2557 Return the name of charset CHARSET.
2561 return XCHARSET_NAME (Fget_charset (charset));
2564 /* #### SJT Should generic properties be allowed? */
2565 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
2566 Define a new character set.
2567 This function is for use with Mule support.
2568 NAME is a symbol, the name by which the character set is normally referred.
2569 DOC-STRING is a string describing the character set.
2570 PROPS is a property list, describing the specific nature of the
2571 character set. Recognized properties are:
2573 'short-name Short version of the charset name (ex: Latin-1)
2574 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
2575 'registry A regular expression matching the font registry field for
2577 'dimension Number of octets used to index a character in this charset.
2578 Either 1 or 2. Defaults to 1.
2579 'columns Number of columns used to display a character in this charset.
2580 Only used in TTY mode. (Under X, the actual width of a
2581 character can be derived from the font used to display the
2582 characters.) If unspecified, defaults to the dimension
2583 (this is almost always the correct value).
2584 'chars Number of characters in each dimension (94 or 96).
2585 Defaults to 94. Note that if the dimension is 2, the
2586 character set thus described is 94x94 or 96x96.
2587 'final Final byte of ISO 2022 escape sequence. Must be
2588 supplied. Each combination of (DIMENSION, CHARS) defines a
2589 separate namespace for final bytes. Note that ISO
2590 2022 restricts the final byte to the range
2591 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2592 dimension == 2. Note also that final bytes in the range
2593 0x30 - 0x3F are reserved for user-defined (not official)
2595 'graphic 0 (use left half of font on output) or 1 (use right half
2596 of font on output). Defaults to 0. For example, for
2597 a font whose registry is ISO8859-1, the left half
2598 (octets 0x20 - 0x7F) is the `ascii' character set, while
2599 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2600 character set. With 'graphic set to 0, the octets
2601 will have their high bit cleared; with it set to 1,
2602 the octets will have their high bit set.
2603 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2605 'ccl-program A compiled CCL program used to convert a character in
2606 this charset into an index into the font. This is in
2607 addition to the 'graphic property. The CCL program
2608 is passed the octets of the character, with the high
2609 bit cleared and set depending upon whether the value
2610 of the 'graphic property is 0 or 1.
2612 (name, doc_string, props))
2614 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2615 int direction = CHARSET_LEFT_TO_RIGHT;
2616 Lisp_Object registry = Qnil;
2617 Lisp_Object charset;
2618 Lisp_Object ccl_program = Qnil;
2619 Lisp_Object short_name = Qnil, long_name = Qnil;
2620 int byte_offset = -1;
2622 CHECK_SYMBOL (name);
2623 if (!NILP (doc_string))
2624 CHECK_STRING (doc_string);
2626 charset = Ffind_charset (name);
2627 if (!NILP (charset))
2628 signal_simple_error ("Cannot redefine existing charset", name);
2631 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
2633 if (EQ (keyword, Qshort_name))
2635 CHECK_STRING (value);
2639 if (EQ (keyword, Qlong_name))
2641 CHECK_STRING (value);
2645 else if (EQ (keyword, Qdimension))
2648 dimension = XINT (value);
2649 if (dimension < 1 || dimension > 2)
2650 signal_simple_error ("Invalid value for 'dimension", value);
2653 else if (EQ (keyword, Qchars))
2656 chars = XINT (value);
2657 if (chars != 94 && chars != 96)
2658 signal_simple_error ("Invalid value for 'chars", value);
2661 else if (EQ (keyword, Qcolumns))
2664 columns = XINT (value);
2665 if (columns != 1 && columns != 2)
2666 signal_simple_error ("Invalid value for 'columns", value);
2669 else if (EQ (keyword, Qgraphic))
2672 graphic = XINT (value);
2674 if (graphic < 0 || graphic > 2)
2676 if (graphic < 0 || graphic > 1)
2678 signal_simple_error ("Invalid value for 'graphic", value);
2681 else if (EQ (keyword, Qregistry))
2683 CHECK_STRING (value);
2687 else if (EQ (keyword, Qdirection))
2689 if (EQ (value, Ql2r))
2690 direction = CHARSET_LEFT_TO_RIGHT;
2691 else if (EQ (value, Qr2l))
2692 direction = CHARSET_RIGHT_TO_LEFT;
2694 signal_simple_error ("Invalid value for 'direction", value);
2697 else if (EQ (keyword, Qfinal))
2699 CHECK_CHAR_COERCE_INT (value);
2700 final = XCHAR (value);
2701 if (final < '0' || final > '~')
2702 signal_simple_error ("Invalid value for 'final", value);
2705 else if (EQ (keyword, Qccl_program))
2707 struct ccl_program test_ccl;
2709 if (setup_ccl_program (&test_ccl, value) < 0)
2710 signal_simple_error ("Invalid value for 'ccl-program", value);
2711 ccl_program = value;
2715 signal_simple_error ("Unrecognized property", keyword);
2720 error ("'final must be specified");
2721 if (dimension == 2 && final > 0x5F)
2723 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2726 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2727 CHARSET_LEFT_TO_RIGHT)) ||
2728 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2729 CHARSET_RIGHT_TO_LEFT)))
2731 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2733 id = get_unallocated_leading_byte (dimension);
2735 if (NILP (doc_string))
2736 doc_string = build_string ("");
2738 if (NILP (registry))
2739 registry = build_string ("");
2741 if (NILP (short_name))
2742 XSETSTRING (short_name, XSYMBOL (name)->name);
2744 if (NILP (long_name))
2745 long_name = doc_string;
2748 columns = dimension;
2750 if (byte_offset < 0)
2754 else if (chars == 96)
2760 charset = make_charset (id, name, chars, dimension, columns, graphic,
2761 final, direction, short_name, long_name,
2762 doc_string, registry,
2763 Qnil, 0, 0, 0, byte_offset);
2764 if (!NILP (ccl_program))
2765 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2769 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2771 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2772 NEW-NAME is the name of the new charset. Return the new charset.
2774 (charset, new_name))
2776 Lisp_Object new_charset = Qnil;
2777 int id, chars, dimension, columns, graphic, final;
2779 Lisp_Object registry, doc_string, short_name, long_name;
2782 charset = Fget_charset (charset);
2783 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2784 signal_simple_error ("Charset already has reverse-direction charset",
2787 CHECK_SYMBOL (new_name);
2788 if (!NILP (Ffind_charset (new_name)))
2789 signal_simple_error ("Cannot redefine existing charset", new_name);
2791 cs = XCHARSET (charset);
2793 chars = CHARSET_CHARS (cs);
2794 dimension = CHARSET_DIMENSION (cs);
2795 columns = CHARSET_COLUMNS (cs);
2796 id = get_unallocated_leading_byte (dimension);
2798 graphic = CHARSET_GRAPHIC (cs);
2799 final = CHARSET_FINAL (cs);
2800 direction = CHARSET_RIGHT_TO_LEFT;
2801 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2802 direction = CHARSET_LEFT_TO_RIGHT;
2803 doc_string = CHARSET_DOC_STRING (cs);
2804 short_name = CHARSET_SHORT_NAME (cs);
2805 long_name = CHARSET_LONG_NAME (cs);
2806 registry = CHARSET_REGISTRY (cs);
2808 new_charset = make_charset (id, new_name, chars, dimension, columns,
2809 graphic, final, direction, short_name, long_name,
2810 doc_string, registry,
2812 CHARSET_DECODING_TABLE(cs),
2813 CHARSET_UCS_MIN(cs),
2814 CHARSET_UCS_MAX(cs),
2815 CHARSET_CODE_OFFSET(cs),
2816 CHARSET_BYTE_OFFSET(cs)
2822 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2823 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2828 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2829 Define symbol ALIAS as an alias for CHARSET.
2833 CHECK_SYMBOL (alias);
2834 charset = Fget_charset (charset);
2835 return Fputhash (alias, charset, Vcharset_hash_table);
2838 /* #### Reverse direction charsets not yet implemented. */
2840 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2842 Return the reverse-direction charset parallel to CHARSET, if any.
2843 This is the charset with the same properties (in particular, the same
2844 dimension, number of characters per dimension, and final byte) as
2845 CHARSET but whose characters are displayed in the opposite direction.
2849 charset = Fget_charset (charset);
2850 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2854 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2855 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2856 If DIRECTION is omitted, both directions will be checked (left-to-right
2857 will be returned if character sets exist for both directions).
2859 (dimension, chars, final, direction))
2861 int dm, ch, fi, di = -1;
2862 Lisp_Object obj = Qnil;
2864 CHECK_INT (dimension);
2865 dm = XINT (dimension);
2866 if (dm < 1 || dm > 2)
2867 signal_simple_error ("Invalid value for DIMENSION", dimension);
2871 if (ch != 94 && ch != 96)
2872 signal_simple_error ("Invalid value for CHARS", chars);
2874 CHECK_CHAR_COERCE_INT (final);
2876 if (fi < '0' || fi > '~')
2877 signal_simple_error ("Invalid value for FINAL", final);
2879 if (EQ (direction, Ql2r))
2880 di = CHARSET_LEFT_TO_RIGHT;
2881 else if (EQ (direction, Qr2l))
2882 di = CHARSET_RIGHT_TO_LEFT;
2883 else if (!NILP (direction))
2884 signal_simple_error ("Invalid value for DIRECTION", direction);
2886 if (dm == 2 && fi > 0x5F)
2888 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2892 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
2894 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
2897 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
2900 return XCHARSET_NAME (obj);
2904 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2905 Return short name of CHARSET.
2909 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2912 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2913 Return long name of CHARSET.
2917 return XCHARSET_LONG_NAME (Fget_charset (charset));
2920 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2921 Return description of CHARSET.
2925 return XCHARSET_DOC_STRING (Fget_charset (charset));
2928 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2929 Return dimension of CHARSET.
2933 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2936 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2937 Return property PROP of CHARSET, a charset object or symbol naming a charset.
2938 Recognized properties are those listed in `make-charset', as well as
2939 'name and 'doc-string.
2945 charset = Fget_charset (charset);
2946 cs = XCHARSET (charset);
2948 CHECK_SYMBOL (prop);
2949 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2950 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2951 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2952 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2953 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2954 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2955 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2956 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2957 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2958 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2959 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2960 if (EQ (prop, Qdirection))
2961 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2962 if (EQ (prop, Qreverse_direction_charset))
2964 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2965 /* #### Is this translation OK? If so, error checking sufficient? */
2966 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
2968 signal_simple_error ("Unrecognized charset property name", prop);
2969 return Qnil; /* not reached */
2972 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2973 Return charset identification number of CHARSET.
2977 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2980 /* #### We need to figure out which properties we really want to
2983 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2984 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2986 (charset, ccl_program))
2988 struct ccl_program test_ccl;
2990 charset = Fget_charset (charset);
2991 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
2992 signal_simple_error ("Invalid ccl-program", ccl_program);
2993 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2998 invalidate_charset_font_caches (Lisp_Object charset)
3000 /* Invalidate font cache entries for charset on all devices. */
3001 Lisp_Object devcons, concons, hash_table;
3002 DEVICE_LOOP_NO_BREAK (devcons, concons)
3004 struct device *d = XDEVICE (XCAR (devcons));
3005 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
3006 if (!UNBOUNDP (hash_table))
3007 Fclrhash (hash_table);
3011 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
3012 Set the 'registry property of CHARSET to REGISTRY.
3014 (charset, registry))
3016 charset = Fget_charset (charset);
3017 CHECK_STRING (registry);
3018 XCHARSET_REGISTRY (charset) = registry;
3019 invalidate_charset_font_caches (charset);
3020 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
3025 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
3026 Return mapping-table of CHARSET.
3030 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
3033 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
3034 Set mapping-table of CHARSET to TABLE.
3038 struct Lisp_Charset *cs;
3042 charset = Fget_charset (charset);
3043 cs = XCHARSET (charset);
3047 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
3048 make_vector_newer (CHARSET_DECODING_TABLE(cs));
3049 CHARSET_DECODING_TABLE(cs) = Qnil;
3052 else if (VECTORP (table))
3054 int ccs_len = CHARSET_BYTE_SIZE (cs);
3055 int ret = decoding_table_check_elements (table,
3056 CHARSET_DIMENSION (cs),
3061 signal_simple_error ("Too big table", table);
3063 signal_simple_error ("Invalid element is found", table);
3065 signal_simple_error ("Something wrong", table);
3067 CHARSET_DECODING_TABLE(cs) = Qnil;
3070 signal_error (Qwrong_type_argument,
3071 list2 (build_translated_string ("vector-or-nil-p"),
3074 byte_offset = CHARSET_BYTE_OFFSET (cs);
3075 switch (CHARSET_DIMENSION (cs))
3078 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3080 Lisp_Object c = XVECTOR_DATA(table)[i];
3083 put_char_ccs_code_point (c, charset,
3084 make_int (i + byte_offset));
3088 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3090 Lisp_Object v = XVECTOR_DATA(table)[i];
3096 for (j = 0; j < XVECTOR_LENGTH (v); j++)
3098 Lisp_Object c = XVECTOR_DATA(v)[j];
3101 put_char_ccs_code_point
3103 make_int ( ( (i + byte_offset) << 8 )
3109 put_char_ccs_code_point (v, charset,
3110 make_int (i + byte_offset));
3119 /************************************************************************/
3120 /* Lisp primitives for working with characters */
3121 /************************************************************************/
3124 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
3125 Make a character from CHARSET and code-point CODE.
3131 charset = Fget_charset (charset);
3134 if (XCHARSET_GRAPHIC (charset) == 1)
3136 return make_char (DECODE_CHAR (charset, c));
3139 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
3140 Make a builtin character from CHARSET and code-point CODE.
3147 charset = Fget_charset (charset);
3151 if ((final = XCHARSET_FINAL (charset)) >= '0')
3153 if (XCHARSET_DIMENSION (charset) == 1)
3155 switch (XCHARSET_CHARS (charset))
3159 make_char (MIN_CHAR_94 + (final - '0') * 94
3160 + ((c & 0x7F) - 33));
3163 make_char (MIN_CHAR_96 + (final - '0') * 96
3164 + ((c & 0x7F) - 32));
3166 return Fdecode_char (charset, code);
3171 switch (XCHARSET_CHARS (charset))
3175 make_char (MIN_CHAR_94x94
3176 + (final - '0') * 94 * 94
3177 + (((c >> 8) & 0x7F) - 33) * 94
3178 + ((c & 0x7F) - 33));
3181 make_char (MIN_CHAR_96x96
3182 + (final - '0') * 96 * 96
3183 + (((c >> 8) & 0x7F) - 32) * 96
3184 + ((c & 0x7F) - 32));
3186 return Fdecode_char (charset, code);
3190 else if (XCHARSET_UCS_MAX (charset))
3193 = (XCHARSET_DIMENSION (charset) == 1
3195 c - XCHARSET_BYTE_OFFSET (charset)
3197 ((c >> 8) - XCHARSET_BYTE_OFFSET (charset))
3198 * XCHARSET_CHARS (charset)
3199 + (c & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
3200 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
3201 if ((cid < XCHARSET_UCS_MIN (charset))
3202 || (XCHARSET_UCS_MAX (charset) < cid))
3203 return Fdecode_char (charset, code);
3204 return make_char (cid);
3207 return Fdecode_char (charset, code);
3211 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
3212 Make a character from CHARSET and octets ARG1 and ARG2.
3213 ARG2 is required only for characters from two-dimensional charsets.
3214 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
3215 character s with caron.
3217 (charset, arg1, arg2))
3221 int lowlim, highlim;
3223 charset = Fget_charset (charset);
3224 cs = XCHARSET (charset);
3226 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
3227 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
3229 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
3231 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
3232 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
3235 /* It is useful (and safe, according to Olivier Galibert) to strip
3236 the 8th bit off ARG1 and ARG2 because it allows programmers to
3237 write (make-char 'latin-iso8859-2 CODE) where code is the actual
3238 Latin 2 code of the character. */
3246 if (a1 < lowlim || a1 > highlim)
3247 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
3249 if (CHARSET_DIMENSION (cs) == 1)
3253 ("Charset is of dimension one; second octet must be nil", arg2);
3254 return make_char (MAKE_CHAR (charset, a1, 0));
3263 a2 = XINT (arg2) & 0x7f;
3265 if (a2 < lowlim || a2 > highlim)
3266 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
3268 return make_char (MAKE_CHAR (charset, a1, a2));
3271 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
3272 Return the character set of CHARACTER.
3276 CHECK_CHAR_COERCE_INT (character);
3278 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
3281 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
3282 Return the octet numbered N (should be 0 or 1) of CHARACTER.
3283 N defaults to 0 if omitted.
3287 Lisp_Object charset;
3290 CHECK_CHAR_COERCE_INT (character);
3292 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
3294 if (NILP (n) || EQ (n, Qzero))
3295 return make_int (octet0);
3296 else if (EQ (n, make_int (1)))
3297 return make_int (octet1);
3299 signal_simple_error ("Octet number must be 0 or 1", n);
3302 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
3303 Return list of charset and one or two position-codes of CHARACTER.
3307 /* This function can GC */
3308 struct gcpro gcpro1, gcpro2;
3309 Lisp_Object charset = Qnil;
3310 Lisp_Object rc = Qnil;
3318 GCPRO2 (charset, rc);
3319 CHECK_CHAR_COERCE_INT (character);
3322 code_point = ENCODE_CHAR (XCHAR (character), charset);
3323 dimension = XCHARSET_DIMENSION (charset);
3324 while (dimension > 0)
3326 rc = Fcons (make_int (code_point & 255), rc);
3330 rc = Fcons (XCHARSET_NAME (charset), rc);
3332 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
3334 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
3336 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
3340 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
3349 #ifdef ENABLE_COMPOSITE_CHARS
3350 /************************************************************************/
3351 /* composite character functions */
3352 /************************************************************************/
3355 lookup_composite_char (Bufbyte *str, int len)
3357 Lisp_Object lispstr = make_string (str, len);
3358 Lisp_Object ch = Fgethash (lispstr,
3359 Vcomposite_char_string2char_hash_table,
3365 if (composite_char_row_next >= 128)
3366 signal_simple_error ("No more composite chars available", lispstr);
3367 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
3368 composite_char_col_next);
3369 Fputhash (make_char (emch), lispstr,
3370 Vcomposite_char_char2string_hash_table);
3371 Fputhash (lispstr, make_char (emch),
3372 Vcomposite_char_string2char_hash_table);
3373 composite_char_col_next++;
3374 if (composite_char_col_next >= 128)
3376 composite_char_col_next = 32;
3377 composite_char_row_next++;
3386 composite_char_string (Emchar ch)
3388 Lisp_Object str = Fgethash (make_char (ch),
3389 Vcomposite_char_char2string_hash_table,
3391 assert (!UNBOUNDP (str));
3395 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
3396 Convert a string into a single composite character.
3397 The character is the result of overstriking all the characters in
3402 CHECK_STRING (string);
3403 return make_char (lookup_composite_char (XSTRING_DATA (string),
3404 XSTRING_LENGTH (string)));
3407 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3408 Return a string of the characters comprising a composite character.
3416 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
3417 signal_simple_error ("Must be composite char", ch);
3418 return composite_char_string (emch);
3420 #endif /* ENABLE_COMPOSITE_CHARS */
3423 /************************************************************************/
3424 /* initialization */
3425 /************************************************************************/
3428 syms_of_mule_charset (void)
3431 INIT_LRECORD_IMPLEMENTATION (uint8_byte_table);
3432 INIT_LRECORD_IMPLEMENTATION (uint16_byte_table);
3433 INIT_LRECORD_IMPLEMENTATION (byte_table);
3434 INIT_LRECORD_IMPLEMENTATION (char_id_table);
3436 INIT_LRECORD_IMPLEMENTATION (charset);
3438 DEFSUBR (Fcharsetp);
3439 DEFSUBR (Ffind_charset);
3440 DEFSUBR (Fget_charset);
3441 DEFSUBR (Fcharset_list);
3442 DEFSUBR (Fcharset_name);
3443 DEFSUBR (Fmake_charset);
3444 DEFSUBR (Fmake_reverse_direction_charset);
3445 /* DEFSUBR (Freverse_direction_charset); */
3446 DEFSUBR (Fdefine_charset_alias);
3447 DEFSUBR (Fcharset_from_attributes);
3448 DEFSUBR (Fcharset_short_name);
3449 DEFSUBR (Fcharset_long_name);
3450 DEFSUBR (Fcharset_description);
3451 DEFSUBR (Fcharset_dimension);
3452 DEFSUBR (Fcharset_property);
3453 DEFSUBR (Fcharset_id);
3454 DEFSUBR (Fset_charset_ccl_program);
3455 DEFSUBR (Fset_charset_registry);
3457 DEFSUBR (Fchar_attribute_list);
3458 DEFSUBR (Ffind_char_attribute_table);
3459 DEFSUBR (Fchar_attribute_alist);
3460 DEFSUBR (Fget_char_attribute);
3461 DEFSUBR (Fput_char_attribute);
3462 DEFSUBR (Fremove_char_attribute);
3463 DEFSUBR (Fdefine_char);
3464 DEFSUBR (Fchar_variants);
3465 DEFSUBR (Fget_composite_char);
3466 DEFSUBR (Fcharset_mapping_table);
3467 DEFSUBR (Fset_charset_mapping_table);
3471 DEFSUBR (Fdecode_char);
3472 DEFSUBR (Fdecode_builtin_char);
3474 DEFSUBR (Fmake_char);
3475 DEFSUBR (Fchar_charset);
3476 DEFSUBR (Fchar_octet);
3477 DEFSUBR (Fsplit_char);
3479 #ifdef ENABLE_COMPOSITE_CHARS
3480 DEFSUBR (Fmake_composite_char);
3481 DEFSUBR (Fcomposite_char_string);
3484 defsymbol (&Qcharsetp, "charsetp");
3485 defsymbol (&Qregistry, "registry");
3486 defsymbol (&Qfinal, "final");
3487 defsymbol (&Qgraphic, "graphic");
3488 defsymbol (&Qdirection, "direction");
3489 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3490 defsymbol (&Qshort_name, "short-name");
3491 defsymbol (&Qlong_name, "long-name");
3493 defsymbol (&Ql2r, "l2r");
3494 defsymbol (&Qr2l, "r2l");
3496 /* Charsets, compatible with FSF 20.3
3497 Naming convention is Script-Charset[-Edition] */
3498 defsymbol (&Qascii, "ascii");
3499 defsymbol (&Qcontrol_1, "control-1");
3500 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3501 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3502 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3503 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3504 defsymbol (&Qthai_tis620, "thai-tis620");
3505 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3506 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3507 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3508 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3509 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3510 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3511 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3512 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
3513 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
3514 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
3515 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
3516 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
3517 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
3518 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
3519 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
3521 defsymbol (&Q_ucs, "->ucs");
3522 defsymbol (&Q_decomposition, "->decomposition");
3523 defsymbol (&Qcompat, "compat");
3524 defsymbol (&Qisolated, "isolated");
3525 defsymbol (&Qinitial, "initial");
3526 defsymbol (&Qmedial, "medial");
3527 defsymbol (&Qfinal, "final");
3528 defsymbol (&Qvertical, "vertical");
3529 defsymbol (&QnoBreak, "noBreak");
3530 defsymbol (&Qfraction, "fraction");
3531 defsymbol (&Qsuper, "super");
3532 defsymbol (&Qsub, "sub");
3533 defsymbol (&Qcircle, "circle");
3534 defsymbol (&Qsquare, "square");
3535 defsymbol (&Qwide, "wide");
3536 defsymbol (&Qnarrow, "narrow");
3537 defsymbol (&Qsmall, "small");
3538 defsymbol (&Qfont, "font");
3539 defsymbol (&Qucs, "ucs");
3540 defsymbol (&Qucs_bmp, "ucs-bmp");
3541 defsymbol (&Qlatin_viscii, "latin-viscii");
3542 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3543 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3544 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3545 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3546 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3547 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
3548 defsymbol (&Qchinese_big5, "chinese-big5");
3549 defsymbol (&Qmojikyo, "mojikyo");
3550 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
3551 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
3552 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
3553 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
3554 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
3555 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
3556 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
3557 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
3558 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
3559 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
3560 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
3561 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
3562 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
3563 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
3564 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
3565 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
3566 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
3567 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
3568 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
3569 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
3570 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
3571 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
3572 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3574 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3575 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3577 defsymbol (&Qcomposite, "composite");
3581 vars_of_mule_charset (void)
3588 chlook = xnew (struct charset_lookup);
3589 dumpstruct (&chlook, &charset_lookup_description);
3591 /* Table of charsets indexed by leading byte. */
3592 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3593 chlook->charset_by_leading_byte[i] = Qnil;
3596 /* Table of charsets indexed by type/final-byte. */
3597 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3598 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3599 chlook->charset_by_attributes[i][j] = Qnil;
3601 /* Table of charsets indexed by type/final-byte/direction. */
3602 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3603 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3604 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3605 chlook->charset_by_attributes[i][j][k] = Qnil;
3609 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3611 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3612 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3616 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3617 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3618 Leading-code of private TYPE9N charset of column-width 1.
3620 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3624 Vutf_2000_version = build_string("0.17 (Hōryūji)");
3625 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
3626 Version number of UTF-2000.
3629 staticpro (&Vcharacter_composition_table);
3630 Vcharacter_composition_table = make_char_id_table (Qnil);
3632 staticpro (&Vcharacter_variant_table);
3633 Vcharacter_variant_table = make_char_id_table (Qnil);
3635 Vdefault_coded_charset_priority_list = Qnil;
3636 DEFVAR_LISP ("default-coded-charset-priority-list",
3637 &Vdefault_coded_charset_priority_list /*
3638 Default order of preferred coded-character-sets.
3644 complex_vars_of_mule_charset (void)
3646 staticpro (&Vcharset_hash_table);
3647 Vcharset_hash_table =
3648 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3650 /* Predefined character sets. We store them into variables for
3654 staticpro (&Vchar_attribute_hash_table);
3655 Vchar_attribute_hash_table
3656 = make_lisp_hash_table (16, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3658 staticpro (&Vcharset_ucs);
3660 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
3661 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3662 build_string ("UCS"),
3663 build_string ("UCS"),
3664 build_string ("ISO/IEC 10646"),
3666 Qnil, 0, 0xFFFFFFF, 0, 0);
3667 staticpro (&Vcharset_ucs_bmp);
3669 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3670 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3671 build_string ("BMP"),
3672 build_string ("BMP"),
3673 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3674 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
3675 Qnil, 0, 0xFFFF, 0, 0);
3677 # define MIN_CHAR_THAI 0
3678 # define MAX_CHAR_THAI 0
3679 # define MIN_CHAR_HEBREW 0
3680 # define MAX_CHAR_HEBREW 0
3681 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3682 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3684 staticpro (&Vcharset_ascii);
3686 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3687 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3688 build_string ("ASCII"),
3689 build_string ("ASCII)"),
3690 build_string ("ASCII (ISO646 IRV)"),
3691 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3692 Qnil, 0, 0x7F, 0, 0);
3693 staticpro (&Vcharset_control_1);
3694 Vcharset_control_1 =
3695 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3696 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3697 build_string ("C1"),
3698 build_string ("Control characters"),
3699 build_string ("Control characters 128-191"),
3701 Qnil, 0x80, 0x9F, 0, 0);
3702 staticpro (&Vcharset_latin_iso8859_1);
3703 Vcharset_latin_iso8859_1 =
3704 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3705 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3706 build_string ("Latin-1"),
3707 build_string ("ISO8859-1 (Latin-1)"),
3708 build_string ("ISO8859-1 (Latin-1)"),
3709 build_string ("iso8859-1"),
3710 Qnil, 0xA0, 0xFF, 0, 32);
3711 staticpro (&Vcharset_latin_iso8859_2);
3712 Vcharset_latin_iso8859_2 =
3713 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3714 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3715 build_string ("Latin-2"),
3716 build_string ("ISO8859-2 (Latin-2)"),
3717 build_string ("ISO8859-2 (Latin-2)"),
3718 build_string ("iso8859-2"),
3720 staticpro (&Vcharset_latin_iso8859_3);
3721 Vcharset_latin_iso8859_3 =
3722 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3723 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3724 build_string ("Latin-3"),
3725 build_string ("ISO8859-3 (Latin-3)"),
3726 build_string ("ISO8859-3 (Latin-3)"),
3727 build_string ("iso8859-3"),
3729 staticpro (&Vcharset_latin_iso8859_4);
3730 Vcharset_latin_iso8859_4 =
3731 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3732 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3733 build_string ("Latin-4"),
3734 build_string ("ISO8859-4 (Latin-4)"),
3735 build_string ("ISO8859-4 (Latin-4)"),
3736 build_string ("iso8859-4"),
3738 staticpro (&Vcharset_thai_tis620);
3739 Vcharset_thai_tis620 =
3740 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3741 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3742 build_string ("TIS620"),
3743 build_string ("TIS620 (Thai)"),
3744 build_string ("TIS620.2529 (Thai)"),
3745 build_string ("tis620"),
3746 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3747 staticpro (&Vcharset_greek_iso8859_7);
3748 Vcharset_greek_iso8859_7 =
3749 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3750 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3751 build_string ("ISO8859-7"),
3752 build_string ("ISO8859-7 (Greek)"),
3753 build_string ("ISO8859-7 (Greek)"),
3754 build_string ("iso8859-7"),
3756 0 /* MIN_CHAR_GREEK */,
3757 0 /* MAX_CHAR_GREEK */, 0, 32);
3758 staticpro (&Vcharset_arabic_iso8859_6);
3759 Vcharset_arabic_iso8859_6 =
3760 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3761 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3762 build_string ("ISO8859-6"),
3763 build_string ("ISO8859-6 (Arabic)"),
3764 build_string ("ISO8859-6 (Arabic)"),
3765 build_string ("iso8859-6"),
3767 staticpro (&Vcharset_hebrew_iso8859_8);
3768 Vcharset_hebrew_iso8859_8 =
3769 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3770 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3771 build_string ("ISO8859-8"),
3772 build_string ("ISO8859-8 (Hebrew)"),
3773 build_string ("ISO8859-8 (Hebrew)"),
3774 build_string ("iso8859-8"),
3775 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3776 staticpro (&Vcharset_katakana_jisx0201);
3777 Vcharset_katakana_jisx0201 =
3778 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3779 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3780 build_string ("JISX0201 Kana"),
3781 build_string ("JISX0201.1976 (Japanese Kana)"),
3782 build_string ("JISX0201.1976 Japanese Kana"),
3783 build_string ("jisx0201\\.1976"),
3785 staticpro (&Vcharset_latin_jisx0201);
3786 Vcharset_latin_jisx0201 =
3787 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3788 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3789 build_string ("JISX0201 Roman"),
3790 build_string ("JISX0201.1976 (Japanese Roman)"),
3791 build_string ("JISX0201.1976 Japanese Roman"),
3792 build_string ("jisx0201\\.1976"),
3794 staticpro (&Vcharset_cyrillic_iso8859_5);
3795 Vcharset_cyrillic_iso8859_5 =
3796 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3797 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3798 build_string ("ISO8859-5"),
3799 build_string ("ISO8859-5 (Cyrillic)"),
3800 build_string ("ISO8859-5 (Cyrillic)"),
3801 build_string ("iso8859-5"),
3803 0 /* MIN_CHAR_CYRILLIC */,
3804 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3805 staticpro (&Vcharset_latin_iso8859_9);
3806 Vcharset_latin_iso8859_9 =
3807 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3808 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3809 build_string ("Latin-5"),
3810 build_string ("ISO8859-9 (Latin-5)"),
3811 build_string ("ISO8859-9 (Latin-5)"),
3812 build_string ("iso8859-9"),
3814 staticpro (&Vcharset_japanese_jisx0208_1978);
3815 Vcharset_japanese_jisx0208_1978 =
3816 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3817 Qjapanese_jisx0208_1978, 94, 2,
3818 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3819 build_string ("JIS X0208:1978"),
3820 build_string ("JIS X0208:1978 (Japanese)"),
3822 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3823 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3825 staticpro (&Vcharset_chinese_gb2312);
3826 Vcharset_chinese_gb2312 =
3827 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3828 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3829 build_string ("GB2312"),
3830 build_string ("GB2312)"),
3831 build_string ("GB2312 Chinese simplified"),
3832 build_string ("gb2312"),
3834 staticpro (&Vcharset_japanese_jisx0208);
3835 Vcharset_japanese_jisx0208 =
3836 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3837 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3838 build_string ("JISX0208"),
3839 build_string ("JIS X0208:1983 (Japanese)"),
3840 build_string ("JIS X0208:1983 Japanese Kanji"),
3841 build_string ("jisx0208\\.1983"),
3844 staticpro (&Vcharset_japanese_jisx0208_1990);
3845 Vcharset_japanese_jisx0208_1990 =
3846 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3847 Qjapanese_jisx0208_1990, 94, 2,
3848 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3849 build_string ("JISX0208-1990"),
3850 build_string ("JIS X0208:1990 (Japanese)"),
3851 build_string ("JIS X0208:1990 Japanese Kanji"),
3852 build_string ("jisx0208\\.1990"),
3854 MIN_CHAR_JIS_X0208_1990,
3855 MAX_CHAR_JIS_X0208_1990, 0, 33);
3857 staticpro (&Vcharset_korean_ksc5601);
3858 Vcharset_korean_ksc5601 =
3859 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3860 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3861 build_string ("KSC5601"),
3862 build_string ("KSC5601 (Korean"),
3863 build_string ("KSC5601 Korean Hangul and Hanja"),
3864 build_string ("ksc5601"),
3866 staticpro (&Vcharset_japanese_jisx0212);
3867 Vcharset_japanese_jisx0212 =
3868 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3869 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3870 build_string ("JISX0212"),
3871 build_string ("JISX0212 (Japanese)"),
3872 build_string ("JISX0212 Japanese Supplement"),
3873 build_string ("jisx0212"),
3876 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3877 staticpro (&Vcharset_chinese_cns11643_1);
3878 Vcharset_chinese_cns11643_1 =
3879 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3880 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3881 build_string ("CNS11643-1"),
3882 build_string ("CNS11643-1 (Chinese traditional)"),
3884 ("CNS 11643 Plane 1 Chinese traditional"),
3885 build_string (CHINESE_CNS_PLANE_RE("1")),
3887 staticpro (&Vcharset_chinese_cns11643_2);
3888 Vcharset_chinese_cns11643_2 =
3889 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3890 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3891 build_string ("CNS11643-2"),
3892 build_string ("CNS11643-2 (Chinese traditional)"),
3894 ("CNS 11643 Plane 2 Chinese traditional"),
3895 build_string (CHINESE_CNS_PLANE_RE("2")),
3898 staticpro (&Vcharset_latin_tcvn5712);
3899 Vcharset_latin_tcvn5712 =
3900 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3901 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3902 build_string ("TCVN 5712"),
3903 build_string ("TCVN 5712 (VSCII-2)"),
3904 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3905 build_string ("tcvn5712-1"),
3907 staticpro (&Vcharset_latin_viscii_lower);
3908 Vcharset_latin_viscii_lower =
3909 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3910 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3911 build_string ("VISCII lower"),
3912 build_string ("VISCII lower (Vietnamese)"),
3913 build_string ("VISCII lower (Vietnamese)"),
3914 build_string ("MULEVISCII-LOWER"),
3916 staticpro (&Vcharset_latin_viscii_upper);
3917 Vcharset_latin_viscii_upper =
3918 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3919 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3920 build_string ("VISCII upper"),
3921 build_string ("VISCII upper (Vietnamese)"),
3922 build_string ("VISCII upper (Vietnamese)"),
3923 build_string ("MULEVISCII-UPPER"),
3925 staticpro (&Vcharset_latin_viscii);
3926 Vcharset_latin_viscii =
3927 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3928 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3929 build_string ("VISCII"),
3930 build_string ("VISCII 1.1 (Vietnamese)"),
3931 build_string ("VISCII 1.1 (Vietnamese)"),
3932 build_string ("VISCII1\\.1"),
3934 staticpro (&Vcharset_chinese_big5);
3935 Vcharset_chinese_big5 =
3936 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3937 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3938 build_string ("Big5"),
3939 build_string ("Big5"),
3940 build_string ("Big5 Chinese traditional"),
3941 build_string ("big5"),
3943 staticpro (&Vcharset_ideograph_daikanwa);
3944 Vcharset_ideograph_daikanwa =
3945 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
3946 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3947 build_string ("Daikanwa"),
3948 build_string ("Morohashi's Daikanwa"),
3949 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3950 build_string ("Daikanwa"),
3951 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3952 staticpro (&Vcharset_mojikyo);
3954 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
3955 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3956 build_string ("Mojikyo"),
3957 build_string ("Mojikyo"),
3958 build_string ("Konjaku-Mojikyo"),
3960 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
3961 staticpro (&Vcharset_mojikyo_2022_1);
3962 Vcharset_mojikyo_2022_1 =
3963 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
3964 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
3965 build_string ("Mojikyo-2022-1"),
3966 build_string ("Mojikyo ISO-2022 Part 1"),
3967 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
3971 #define DEF_MOJIKYO_PJ(n) \
3972 staticpro (&Vcharset_mojikyo_pj_##n); \
3973 Vcharset_mojikyo_pj_##n = \
3974 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
3975 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3976 build_string ("Mojikyo-PJ-"#n), \
3977 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
3979 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
3981 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
3993 DEF_MOJIKYO_PJ (10);
3994 DEF_MOJIKYO_PJ (11);
3995 DEF_MOJIKYO_PJ (12);
3996 DEF_MOJIKYO_PJ (13);
3997 DEF_MOJIKYO_PJ (14);
3998 DEF_MOJIKYO_PJ (15);
3999 DEF_MOJIKYO_PJ (16);
4000 DEF_MOJIKYO_PJ (17);
4001 DEF_MOJIKYO_PJ (18);
4002 DEF_MOJIKYO_PJ (19);
4003 DEF_MOJIKYO_PJ (20);
4004 DEF_MOJIKYO_PJ (21);
4006 staticpro (&Vcharset_ethiopic_ucs);
4007 Vcharset_ethiopic_ucs =
4008 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
4009 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4010 build_string ("Ethiopic (UCS)"),
4011 build_string ("Ethiopic (UCS)"),
4012 build_string ("Ethiopic of UCS"),
4013 build_string ("Ethiopic-Unicode"),
4014 Qnil, 0x1200, 0x137F, 0x1200, 0);
4016 staticpro (&Vcharset_chinese_big5_1);
4017 Vcharset_chinese_big5_1 =
4018 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
4019 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
4020 build_string ("Big5"),
4021 build_string ("Big5 (Level-1)"),
4023 ("Big5 Level-1 Chinese traditional"),
4024 build_string ("big5"),
4026 staticpro (&Vcharset_chinese_big5_2);
4027 Vcharset_chinese_big5_2 =
4028 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
4029 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
4030 build_string ("Big5"),
4031 build_string ("Big5 (Level-2)"),
4033 ("Big5 Level-2 Chinese traditional"),
4034 build_string ("big5"),
4037 #ifdef ENABLE_COMPOSITE_CHARS
4038 /* #### For simplicity, we put composite chars into a 96x96 charset.
4039 This is going to lead to problems because you can run out of
4040 room, esp. as we don't yet recycle numbers. */
4041 staticpro (&Vcharset_composite);
4042 Vcharset_composite =
4043 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
4044 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4045 build_string ("Composite"),
4046 build_string ("Composite characters"),
4047 build_string ("Composite characters"),
4050 /* #### not dumped properly */
4051 composite_char_row_next = 32;
4052 composite_char_col_next = 32;
4054 Vcomposite_char_string2char_hash_table =
4055 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
4056 Vcomposite_char_char2string_hash_table =
4057 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
4058 staticpro (&Vcomposite_char_string2char_hash_table);
4059 staticpro (&Vcomposite_char_char2string_hash_table);
4060 #endif /* ENABLE_COMPOSITE_CHARS */