1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_japanese_jisx0208;
60 Lisp_Object Vcharset_japanese_jisx0208_1990;
61 Lisp_Object Vcharset_korean_ksc5601;
62 Lisp_Object Vcharset_japanese_jisx0212;
63 Lisp_Object Vcharset_chinese_cns11643_1;
64 Lisp_Object Vcharset_chinese_cns11643_2;
66 Lisp_Object Vcharset_ucs;
67 Lisp_Object Vcharset_ucs_bmp;
68 Lisp_Object Vcharset_ucs_cns;
69 Lisp_Object Vcharset_latin_viscii;
70 Lisp_Object Vcharset_latin_tcvn5712;
71 Lisp_Object Vcharset_latin_viscii_lower;
72 Lisp_Object Vcharset_latin_viscii_upper;
73 Lisp_Object Vcharset_chinese_big5;
74 Lisp_Object Vcharset_ideograph_gt;
75 Lisp_Object Vcharset_ideograph_gt_pj_1;
76 Lisp_Object Vcharset_ideograph_gt_pj_2;
77 Lisp_Object Vcharset_ideograph_gt_pj_3;
78 Lisp_Object Vcharset_ideograph_gt_pj_4;
79 Lisp_Object Vcharset_ideograph_gt_pj_5;
80 Lisp_Object Vcharset_ideograph_gt_pj_6;
81 Lisp_Object Vcharset_ideograph_gt_pj_7;
82 Lisp_Object Vcharset_ideograph_gt_pj_8;
83 Lisp_Object Vcharset_ideograph_gt_pj_9;
84 Lisp_Object Vcharset_ideograph_gt_pj_10;
85 Lisp_Object Vcharset_ideograph_gt_pj_11;
86 Lisp_Object Vcharset_ideograph_daikanwa;
87 Lisp_Object Vcharset_mojikyo;
88 Lisp_Object Vcharset_mojikyo_2022_1;
89 Lisp_Object Vcharset_mojikyo_pj_1;
90 Lisp_Object Vcharset_mojikyo_pj_2;
91 Lisp_Object Vcharset_mojikyo_pj_3;
92 Lisp_Object Vcharset_mojikyo_pj_4;
93 Lisp_Object Vcharset_mojikyo_pj_5;
94 Lisp_Object Vcharset_mojikyo_pj_6;
95 Lisp_Object Vcharset_mojikyo_pj_7;
96 Lisp_Object Vcharset_mojikyo_pj_8;
97 Lisp_Object Vcharset_mojikyo_pj_9;
98 Lisp_Object Vcharset_mojikyo_pj_10;
99 Lisp_Object Vcharset_mojikyo_pj_11;
100 Lisp_Object Vcharset_mojikyo_pj_12;
101 Lisp_Object Vcharset_mojikyo_pj_13;
102 Lisp_Object Vcharset_mojikyo_pj_14;
103 Lisp_Object Vcharset_mojikyo_pj_15;
104 Lisp_Object Vcharset_mojikyo_pj_16;
105 Lisp_Object Vcharset_mojikyo_pj_17;
106 Lisp_Object Vcharset_mojikyo_pj_18;
107 Lisp_Object Vcharset_mojikyo_pj_19;
108 Lisp_Object Vcharset_mojikyo_pj_20;
109 Lisp_Object Vcharset_mojikyo_pj_21;
110 Lisp_Object Vcharset_ethiopic_ucs;
112 Lisp_Object Vcharset_chinese_big5_1;
113 Lisp_Object Vcharset_chinese_big5_2;
115 #ifdef ENABLE_COMPOSITE_CHARS
116 Lisp_Object Vcharset_composite;
118 /* Hash tables for composite chars. One maps string representing
119 composed chars to their equivalent chars; one goes the
121 Lisp_Object Vcomposite_char_char2string_hash_table;
122 Lisp_Object Vcomposite_char_string2char_hash_table;
124 static int composite_char_row_next;
125 static int composite_char_col_next;
127 #endif /* ENABLE_COMPOSITE_CHARS */
129 struct charset_lookup *chlook;
131 static const struct lrecord_description charset_lookup_description_1[] = {
132 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
141 static const struct struct_description charset_lookup_description = {
142 sizeof (struct charset_lookup),
143 charset_lookup_description_1
147 /* Table of number of bytes in the string representation of a character
148 indexed by the first byte of that representation.
150 rep_bytes_by_first_byte(c) is more efficient than the equivalent
151 canonical computation:
153 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
155 const Bytecount rep_bytes_by_first_byte[0xA0] =
156 { /* 0x00 - 0x7f are for straight ASCII */
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
165 /* 0x80 - 0x8f are for Dimension-1 official charsets */
167 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
169 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
171 /* 0x90 - 0x9d are for Dimension-2 official charsets */
172 /* 0x9e is for Dimension-1 private charsets */
173 /* 0x9f is for Dimension-2 private charsets */
174 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
180 #define BT_UINT8_MIN 0
181 #define BT_UINT8_MAX (UCHAR_MAX - 3)
182 #define BT_UINT8_t (UCHAR_MAX - 2)
183 #define BT_UINT8_nil (UCHAR_MAX - 1)
184 #define BT_UINT8_unbound UCHAR_MAX
186 INLINE_HEADER int INT_UINT8_P (Lisp_Object obj);
187 INLINE_HEADER int UINT8_VALUE_P (Lisp_Object obj);
188 INLINE_HEADER unsigned char UINT8_ENCODE (Lisp_Object obj);
189 INLINE_HEADER Lisp_Object UINT8_DECODE (unsigned char n);
190 INLINE_HEADER unsigned short UINT8_TO_UINT16 (unsigned char n);
193 INT_UINT8_P (Lisp_Object obj)
197 int num = XINT (obj);
199 return (BT_UINT8_MIN <= num) && (num <= BT_UINT8_MAX);
206 UINT8_VALUE_P (Lisp_Object obj)
208 return EQ (obj, Qunbound)
209 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT8_P (obj);
212 INLINE_HEADER unsigned char
213 UINT8_ENCODE (Lisp_Object obj)
215 if (EQ (obj, Qunbound))
216 return BT_UINT8_unbound;
217 else if (EQ (obj, Qnil))
219 else if (EQ (obj, Qt))
225 INLINE_HEADER Lisp_Object
226 UINT8_DECODE (unsigned char n)
228 if (n == BT_UINT8_unbound)
230 else if (n == BT_UINT8_nil)
232 else if (n == BT_UINT8_t)
239 mark_uint8_byte_table (Lisp_Object obj)
245 print_uint8_byte_table (Lisp_Object obj,
246 Lisp_Object printcharfun, int escapeflag)
248 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
250 struct gcpro gcpro1, gcpro2;
251 GCPRO2 (obj, printcharfun);
253 write_c_string ("\n#<uint8-byte-table", printcharfun);
254 for (i = 0; i < 256; i++)
256 unsigned char n = bte->property[i];
258 write_c_string ("\n ", printcharfun);
259 write_c_string (" ", printcharfun);
260 if (n == BT_UINT8_unbound)
261 write_c_string ("void", printcharfun);
262 else if (n == BT_UINT8_nil)
263 write_c_string ("nil", printcharfun);
264 else if (n == BT_UINT8_t)
265 write_c_string ("t", printcharfun);
270 sprintf (buf, "%hd", n);
271 write_c_string (buf, printcharfun);
275 write_c_string (">", printcharfun);
279 uint8_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
281 Lisp_Uint8_Byte_Table *te1 = XUINT8_BYTE_TABLE (obj1);
282 Lisp_Uint8_Byte_Table *te2 = XUINT8_BYTE_TABLE (obj2);
285 for (i = 0; i < 256; i++)
286 if (te1->property[i] != te2->property[i])
292 uint8_byte_table_hash (Lisp_Object obj, int depth)
294 Lisp_Uint8_Byte_Table *te = XUINT8_BYTE_TABLE (obj);
298 for (i = 0; i < 256; i++)
299 hash = HASH2 (hash, te->property[i]);
303 DEFINE_LRECORD_IMPLEMENTATION ("uint8-byte-table", uint8_byte_table,
304 mark_uint8_byte_table,
305 print_uint8_byte_table,
306 0, uint8_byte_table_equal,
307 uint8_byte_table_hash,
308 0 /* uint8_byte_table_description */,
309 Lisp_Uint8_Byte_Table);
312 make_uint8_byte_table (unsigned char initval)
316 Lisp_Uint8_Byte_Table *cte;
318 cte = alloc_lcrecord_type (Lisp_Uint8_Byte_Table,
319 &lrecord_uint8_byte_table);
321 for (i = 0; i < 256; i++)
322 cte->property[i] = initval;
324 XSETUINT8_BYTE_TABLE (obj, cte);
329 uint8_byte_table_same_value_p (Lisp_Object obj)
331 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
332 unsigned char v0 = bte->property[0];
335 for (i = 1; i < 256; i++)
337 if (bte->property[i] != v0)
344 #define BT_UINT16_MIN 0
345 #define BT_UINT16_MAX (USHRT_MAX - 3)
346 #define BT_UINT16_t (USHRT_MAX - 2)
347 #define BT_UINT16_nil (USHRT_MAX - 1)
348 #define BT_UINT16_unbound USHRT_MAX
350 INLINE_HEADER int INT_UINT16_P (Lisp_Object obj);
351 INLINE_HEADER int UINT16_VALUE_P (Lisp_Object obj);
352 INLINE_HEADER unsigned short UINT16_ENCODE (Lisp_Object obj);
353 INLINE_HEADER Lisp_Object UINT16_DECODE (unsigned short us);
356 INT_UINT16_P (Lisp_Object obj)
360 int num = XINT (obj);
362 return (BT_UINT16_MIN <= num) && (num <= BT_UINT16_MAX);
369 UINT16_VALUE_P (Lisp_Object obj)
371 return EQ (obj, Qunbound)
372 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT16_P (obj);
375 INLINE_HEADER unsigned short
376 UINT16_ENCODE (Lisp_Object obj)
378 if (EQ (obj, Qunbound))
379 return BT_UINT16_unbound;
380 else if (EQ (obj, Qnil))
381 return BT_UINT16_nil;
382 else if (EQ (obj, Qt))
388 INLINE_HEADER Lisp_Object
389 UINT16_DECODE (unsigned short n)
391 if (n == BT_UINT16_unbound)
393 else if (n == BT_UINT16_nil)
395 else if (n == BT_UINT16_t)
401 INLINE_HEADER unsigned short
402 UINT8_TO_UINT16 (unsigned char n)
404 if (n == BT_UINT8_unbound)
405 return BT_UINT16_unbound;
406 else if (n == BT_UINT8_nil)
407 return BT_UINT16_nil;
408 else if (n == BT_UINT8_t)
415 mark_uint16_byte_table (Lisp_Object obj)
421 print_uint16_byte_table (Lisp_Object obj,
422 Lisp_Object printcharfun, int escapeflag)
424 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
426 struct gcpro gcpro1, gcpro2;
427 GCPRO2 (obj, printcharfun);
429 write_c_string ("\n#<uint16-byte-table", printcharfun);
430 for (i = 0; i < 256; i++)
432 unsigned short n = bte->property[i];
434 write_c_string ("\n ", printcharfun);
435 write_c_string (" ", printcharfun);
436 if (n == BT_UINT16_unbound)
437 write_c_string ("void", printcharfun);
438 else if (n == BT_UINT16_nil)
439 write_c_string ("nil", printcharfun);
440 else if (n == BT_UINT16_t)
441 write_c_string ("t", printcharfun);
446 sprintf (buf, "%hd", n);
447 write_c_string (buf, printcharfun);
451 write_c_string (">", printcharfun);
455 uint16_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
457 Lisp_Uint16_Byte_Table *te1 = XUINT16_BYTE_TABLE (obj1);
458 Lisp_Uint16_Byte_Table *te2 = XUINT16_BYTE_TABLE (obj2);
461 for (i = 0; i < 256; i++)
462 if (te1->property[i] != te2->property[i])
468 uint16_byte_table_hash (Lisp_Object obj, int depth)
470 Lisp_Uint16_Byte_Table *te = XUINT16_BYTE_TABLE (obj);
474 for (i = 0; i < 256; i++)
475 hash = HASH2 (hash, te->property[i]);
479 DEFINE_LRECORD_IMPLEMENTATION ("uint16-byte-table", uint16_byte_table,
480 mark_uint16_byte_table,
481 print_uint16_byte_table,
482 0, uint16_byte_table_equal,
483 uint16_byte_table_hash,
484 0 /* uint16_byte_table_description */,
485 Lisp_Uint16_Byte_Table);
488 make_uint16_byte_table (unsigned short initval)
492 Lisp_Uint16_Byte_Table *cte;
494 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
495 &lrecord_uint16_byte_table);
497 for (i = 0; i < 256; i++)
498 cte->property[i] = initval;
500 XSETUINT16_BYTE_TABLE (obj, cte);
505 expand_uint8_byte_table_to_uint16 (Lisp_Object table)
509 Lisp_Uint8_Byte_Table* bte = XUINT8_BYTE_TABLE(table);
510 Lisp_Uint16_Byte_Table* cte;
512 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
513 &lrecord_uint16_byte_table);
514 for (i = 0; i < 256; i++)
516 cte->property[i] = UINT8_TO_UINT16 (bte->property[i]);
518 XSETUINT16_BYTE_TABLE (obj, cte);
523 uint16_byte_table_same_value_p (Lisp_Object obj)
525 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
526 unsigned short v0 = bte->property[0];
529 for (i = 1; i < 256; i++)
531 if (bte->property[i] != v0)
539 mark_byte_table (Lisp_Object obj)
541 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
544 for (i = 0; i < 256; i++)
546 mark_object (cte->property[i]);
552 print_byte_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
554 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
556 struct gcpro gcpro1, gcpro2;
557 GCPRO2 (obj, printcharfun);
559 write_c_string ("\n#<byte-table", printcharfun);
560 for (i = 0; i < 256; i++)
562 Lisp_Object elt = bte->property[i];
564 write_c_string ("\n ", printcharfun);
565 write_c_string (" ", printcharfun);
566 if (EQ (elt, Qunbound))
567 write_c_string ("void", printcharfun);
569 print_internal (elt, printcharfun, escapeflag);
572 write_c_string (">", printcharfun);
576 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
578 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
579 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
582 for (i = 0; i < 256; i++)
583 if (BYTE_TABLE_P (cte1->property[i]))
585 if (BYTE_TABLE_P (cte2->property[i]))
587 if (!byte_table_equal (cte1->property[i],
588 cte2->property[i], depth + 1))
595 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
601 byte_table_hash (Lisp_Object obj, int depth)
603 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
605 return internal_array_hash (cte->property, 256, depth);
608 static const struct lrecord_description byte_table_description[] = {
609 { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Byte_Table, property), 256 },
613 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
618 byte_table_description,
622 make_byte_table (Lisp_Object initval)
626 Lisp_Byte_Table *cte;
628 cte = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
630 for (i = 0; i < 256; i++)
631 cte->property[i] = initval;
633 XSETBYTE_TABLE (obj, cte);
638 byte_table_same_value_p (Lisp_Object obj)
640 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
641 Lisp_Object v0 = bte->property[0];
644 for (i = 1; i < 256; i++)
646 if (!internal_equal (bte->property[i], v0, 0))
653 Lisp_Object get_byte_table (Lisp_Object table, unsigned char idx);
654 Lisp_Object put_byte_table (Lisp_Object table, unsigned char idx,
658 get_byte_table (Lisp_Object table, unsigned char idx)
660 if (UINT8_BYTE_TABLE_P (table))
661 return UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[idx]);
662 else if (UINT16_BYTE_TABLE_P (table))
663 return UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[idx]);
664 else if (BYTE_TABLE_P (table))
665 return XBYTE_TABLE(table)->property[idx];
671 put_byte_table (Lisp_Object table, unsigned char idx, Lisp_Object value)
673 if (UINT8_BYTE_TABLE_P (table))
675 if (UINT8_VALUE_P (value))
677 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
678 if (!UINT8_BYTE_TABLE_P (value) &&
679 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
680 && uint8_byte_table_same_value_p (table))
685 else if (UINT16_VALUE_P (value))
687 Lisp_Object new = expand_uint8_byte_table_to_uint16 (table);
689 XUINT16_BYTE_TABLE(new)->property[idx] = UINT16_ENCODE (value);
694 Lisp_Object new = make_byte_table (Qnil);
697 for (i = 0; i < 256; i++)
699 XBYTE_TABLE(new)->property[i]
700 = UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[i]);
702 XBYTE_TABLE(new)->property[idx] = value;
706 else if (UINT16_BYTE_TABLE_P (table))
708 if (UINT16_VALUE_P (value))
710 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
711 if (!UINT8_BYTE_TABLE_P (value) &&
712 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
713 && uint16_byte_table_same_value_p (table))
720 Lisp_Object new = make_byte_table (Qnil);
723 for (i = 0; i < 256; i++)
725 XBYTE_TABLE(new)->property[i]
726 = UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[i]);
728 XBYTE_TABLE(new)->property[idx] = value;
732 else if (BYTE_TABLE_P (table))
734 XBYTE_TABLE(table)->property[idx] = value;
735 if (!UINT8_BYTE_TABLE_P (value) &&
736 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
737 && byte_table_same_value_p (table))
742 else if (!internal_equal (table, value, 0))
744 if (UINT8_VALUE_P (table) && UINT8_VALUE_P (value))
746 table = make_uint8_byte_table (UINT8_ENCODE (table));
747 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
749 else if (UINT16_VALUE_P (table) && UINT16_VALUE_P (value))
751 table = make_uint16_byte_table (UINT16_ENCODE (table));
752 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
756 table = make_byte_table (table);
757 XBYTE_TABLE(table)->property[idx] = value;
764 mark_char_id_table (Lisp_Object obj)
766 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
772 print_char_id_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
774 Lisp_Object table = XCHAR_ID_TABLE (obj)->table;
776 struct gcpro gcpro1, gcpro2;
777 GCPRO2 (obj, printcharfun);
779 write_c_string ("#<char-id-table ", printcharfun);
780 for (i = 0; i < 256; i++)
782 Lisp_Object elt = get_byte_table (table, i);
783 if (i != 0) write_c_string ("\n ", printcharfun);
784 if (EQ (elt, Qunbound))
785 write_c_string ("void", printcharfun);
787 print_internal (elt, printcharfun, escapeflag);
790 write_c_string (">", printcharfun);
794 char_id_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
796 Lisp_Object table1 = XCHAR_ID_TABLE (obj1)->table;
797 Lisp_Object table2 = XCHAR_ID_TABLE (obj2)->table;
800 for (i = 0; i < 256; i++)
802 if (!internal_equal (get_byte_table (table1, i),
803 get_byte_table (table2, i), 0))
810 char_id_table_hash (Lisp_Object obj, int depth)
812 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
814 return char_id_table_hash (cte->table, depth + 1);
817 static const struct lrecord_description char_id_table_description[] = {
818 { XD_LISP_OBJECT, offsetof(Lisp_Char_ID_Table, table) },
822 DEFINE_LRECORD_IMPLEMENTATION ("char-id-table", char_id_table,
825 0, char_id_table_equal,
827 char_id_table_description,
831 make_char_id_table (Lisp_Object initval)
834 Lisp_Char_ID_Table *cte;
836 cte = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
838 cte->table = make_byte_table (initval);
840 XSETCHAR_ID_TABLE (obj, cte);
846 get_char_id_table (Emchar ch, Lisp_Object table)
848 unsigned int code = ch;
855 (XCHAR_ID_TABLE (table)->table,
856 (unsigned char)(code >> 24)),
857 (unsigned char) (code >> 16)),
858 (unsigned char) (code >> 8)),
859 (unsigned char) code);
862 void put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table);
864 put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table)
866 unsigned int code = ch;
867 Lisp_Object table1, table2, table3, table4;
869 table1 = XCHAR_ID_TABLE (table)->table;
870 table2 = get_byte_table (table1, (unsigned char)(code >> 24));
871 table3 = get_byte_table (table2, (unsigned char)(code >> 16));
872 table4 = get_byte_table (table3, (unsigned char)(code >> 8));
874 table4 = put_byte_table (table4, (unsigned char)code, value);
875 table3 = put_byte_table (table3, (unsigned char)(code >> 8), table4);
876 table2 = put_byte_table (table2, (unsigned char)(code >> 16), table3);
877 XCHAR_ID_TABLE (table)->table
878 = put_byte_table (table1, (unsigned char)(code >> 24), table2);
882 Lisp_Object Vchar_attribute_hash_table;
883 Lisp_Object Vcharacter_composition_table;
884 Lisp_Object Vcharacter_variant_table;
886 Lisp_Object Qideograph_daikanwa;
887 Lisp_Object Q_decomposition;
891 Lisp_Object Qisolated;
892 Lisp_Object Qinitial;
895 Lisp_Object Qvertical;
896 Lisp_Object QnoBreak;
897 Lisp_Object Qfraction;
907 Emchar to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
909 Lisp_Object put_char_ccs_code_point (Lisp_Object character,
910 Lisp_Object ccs, Lisp_Object value);
911 Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
914 to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
920 else if (EQ (v, Qcompat))
922 else if (EQ (v, Qisolated))
924 else if (EQ (v, Qinitial))
926 else if (EQ (v, Qmedial))
928 else if (EQ (v, Qfinal))
930 else if (EQ (v, Qvertical))
932 else if (EQ (v, QnoBreak))
934 else if (EQ (v, Qfraction))
936 else if (EQ (v, Qsuper))
938 else if (EQ (v, Qsub))
940 else if (EQ (v, Qcircle))
942 else if (EQ (v, Qsquare))
944 else if (EQ (v, Qwide))
946 else if (EQ (v, Qnarrow))
948 else if (EQ (v, Qsmall))
950 else if (EQ (v, Qfont))
953 signal_simple_error (err_msg, err_arg);
956 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
957 Return character corresponding with list.
961 Lisp_Object table = Vcharacter_composition_table;
962 Lisp_Object rest = list;
966 Lisp_Object v = Fcar (rest);
968 Emchar c = to_char_id (v, "Invalid value for composition", list);
970 ret = get_char_id_table (c, table);
975 if (!CHAR_ID_TABLE_P (ret))
980 else if (!CONSP (rest))
982 else if (CHAR_ID_TABLE_P (ret))
985 signal_simple_error ("Invalid table is found with", list);
987 signal_simple_error ("Invalid value for composition", list);
990 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
991 Return variants of CHARACTER.
995 CHECK_CHAR (character);
996 return Fcopy_list (get_char_id_table (XCHAR (character),
997 Vcharacter_variant_table));
1001 /* We store the char-attributes in hash tables with the names as the
1002 key and the actual char-id-table object as the value. Occasionally
1003 we need to use them in a list format. These routines provide us
1005 struct char_attribute_list_closure
1007 Lisp_Object *char_attribute_list;
1011 add_char_attribute_to_list_mapper (Lisp_Object key, Lisp_Object value,
1012 void *char_attribute_list_closure)
1014 /* This function can GC */
1015 struct char_attribute_list_closure *calcl
1016 = (struct char_attribute_list_closure*) char_attribute_list_closure;
1017 Lisp_Object *char_attribute_list = calcl->char_attribute_list;
1019 *char_attribute_list = Fcons (key, *char_attribute_list);
1023 DEFUN ("char-attribute-list", Fchar_attribute_list, 0, 0, 0, /*
1024 Return the list of all existing character attributes except coded-charsets.
1028 Lisp_Object char_attribute_list = Qnil;
1029 struct gcpro gcpro1;
1030 struct char_attribute_list_closure char_attribute_list_closure;
1032 GCPRO1 (char_attribute_list);
1033 char_attribute_list_closure.char_attribute_list = &char_attribute_list;
1034 elisp_maphash (add_char_attribute_to_list_mapper,
1035 Vchar_attribute_hash_table,
1036 &char_attribute_list_closure);
1038 return char_attribute_list;
1041 DEFUN ("find-char-attribute-table", Ffind_char_attribute_table, 1, 1, 0, /*
1042 Return char-id-table corresponding to ATTRIBUTE.
1046 return Fgethash (attribute, Vchar_attribute_hash_table, Qnil);
1050 /* We store the char-id-tables in hash tables with the attributes as
1051 the key and the actual char-id-table object as the value. Each
1052 char-id-table stores values of an attribute corresponding with
1053 characters. Occasionally we need to get attributes of a character
1054 in a association-list format. These routines provide us with
1056 struct char_attribute_alist_closure
1059 Lisp_Object *char_attribute_alist;
1063 add_char_attribute_alist_mapper (Lisp_Object key, Lisp_Object value,
1064 void *char_attribute_alist_closure)
1066 /* This function can GC */
1067 struct char_attribute_alist_closure *caacl =
1068 (struct char_attribute_alist_closure*) char_attribute_alist_closure;
1069 Lisp_Object ret = get_char_id_table (caacl->char_id, value);
1070 if (!UNBOUNDP (ret))
1072 Lisp_Object *char_attribute_alist = caacl->char_attribute_alist;
1073 *char_attribute_alist
1074 = Fcons (Fcons (key, ret), *char_attribute_alist);
1079 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
1080 Return the alist of attributes of CHARACTER.
1084 Lisp_Object alist = Qnil;
1087 CHECK_CHAR (character);
1089 struct gcpro gcpro1;
1090 struct char_attribute_alist_closure char_attribute_alist_closure;
1093 char_attribute_alist_closure.char_id = XCHAR (character);
1094 char_attribute_alist_closure.char_attribute_alist = &alist;
1095 elisp_maphash (add_char_attribute_alist_mapper,
1096 Vchar_attribute_hash_table,
1097 &char_attribute_alist_closure);
1101 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1103 Lisp_Object ccs = chlook->charset_by_leading_byte[i];
1107 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1110 if ( CHAR_ID_TABLE_P (encoding_table)
1111 && INTP (cpos = get_char_id_table (XCHAR (character),
1114 alist = Fcons (Fcons (ccs, cpos), alist);
1121 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 3, 0, /*
1122 Return the value of CHARACTER's ATTRIBUTE.
1123 Return DEFAULT-VALUE if the value is not exist.
1125 (character, attribute, default_value))
1129 CHECK_CHAR (character);
1130 if (!NILP (ccs = Ffind_charset (attribute)))
1132 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1134 if (CHAR_ID_TABLE_P (encoding_table))
1135 return get_char_id_table (XCHAR (character), encoding_table);
1139 Lisp_Object table = Fgethash (attribute,
1140 Vchar_attribute_hash_table,
1142 if (!UNBOUNDP (table))
1144 Lisp_Object ret = get_char_id_table (XCHAR (character), table);
1145 if (!UNBOUNDP (ret))
1149 return default_value;
1152 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
1153 Store CHARACTER's ATTRIBUTE with VALUE.
1155 (character, attribute, value))
1159 CHECK_CHAR (character);
1160 ccs = Ffind_charset (attribute);
1163 return put_char_ccs_code_point (character, ccs, value);
1165 else if (EQ (attribute, Q_decomposition))
1170 signal_simple_error ("Invalid value for ->decomposition",
1173 if (CONSP (Fcdr (value)))
1175 Lisp_Object rest = value;
1176 Lisp_Object table = Vcharacter_composition_table;
1180 GET_EXTERNAL_LIST_LENGTH (rest, len);
1181 seq = make_vector (len, Qnil);
1183 while (CONSP (rest))
1185 Lisp_Object v = Fcar (rest);
1188 = to_char_id (v, "Invalid value for ->decomposition", value);
1191 XVECTOR_DATA(seq)[i++] = v;
1193 XVECTOR_DATA(seq)[i++] = make_char (c);
1197 put_char_id_table (c, character, table);
1202 ntable = get_char_id_table (c, table);
1203 if (!CHAR_ID_TABLE_P (ntable))
1205 ntable = make_char_id_table (Qnil);
1206 put_char_id_table (c, ntable, table);
1214 Lisp_Object v = Fcar (value);
1218 Emchar c = XINT (v);
1220 = get_char_id_table (c, Vcharacter_variant_table);
1222 if (NILP (Fmemq (v, ret)))
1224 put_char_id_table (c, Fcons (character, ret),
1225 Vcharacter_variant_table);
1228 seq = make_vector (1, v);
1232 else if (EQ (attribute, Q_ucs))
1238 signal_simple_error ("Invalid value for ->ucs", value);
1242 ret = get_char_id_table (c, Vcharacter_variant_table);
1243 if (NILP (Fmemq (character, ret)))
1245 put_char_id_table (c, Fcons (character, ret),
1246 Vcharacter_variant_table);
1250 Lisp_Object table = Fgethash (attribute,
1251 Vchar_attribute_hash_table,
1256 table = make_char_id_table (Qunbound);
1257 Fputhash (attribute, table, Vchar_attribute_hash_table);
1259 put_char_id_table (XCHAR (character), value, table);
1264 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
1265 Remove CHARACTER's ATTRIBUTE.
1267 (character, attribute))
1271 CHECK_CHAR (character);
1272 ccs = Ffind_charset (attribute);
1275 return remove_char_ccs (character, ccs);
1279 Lisp_Object table = Fgethash (attribute,
1280 Vchar_attribute_hash_table,
1282 if (!UNBOUNDP (table))
1284 put_char_id_table (XCHAR (character), Qunbound, table);
1291 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
1293 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
1295 /* ad-hoc method for `ascii' */
1296 if ((CHARSET_CHARS (cs) == 94) &&
1297 (CHARSET_BYTE_OFFSET (cs) != 33))
1298 return 128 - CHARSET_BYTE_OFFSET (cs);
1300 return CHARSET_CHARS (cs);
1303 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
1305 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
1307 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
1311 if (XVECTOR_LENGTH (v) > ccs_len)
1314 for (i = 0; i < XVECTOR_LENGTH (v); i++)
1316 Lisp_Object c = XVECTOR_DATA(v)[i];
1318 if (!NILP (c) && !CHARP (c))
1322 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
1334 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1337 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1347 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1348 nv = XVECTOR_DATA(v)[i];
1354 XVECTOR_DATA(v)[i] = Qnil;
1358 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1359 int code_point, Lisp_Object character);
1361 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1362 int code_point, Lisp_Object character)
1366 int ccs_len = XVECTOR_LENGTH (v);
1371 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1372 nv = XVECTOR_DATA(v)[i];
1376 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
1382 XVECTOR_DATA(v)[i] = character;
1386 put_char_ccs_code_point (Lisp_Object character,
1387 Lisp_Object ccs, Lisp_Object value)
1389 Lisp_Object encoding_table;
1391 if (!EQ (XCHARSET_NAME (ccs), Qucs)
1392 || (XCHAR (character) != XINT (value)))
1394 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
1395 int dim = XCHARSET_DIMENSION (ccs);
1396 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
1397 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
1401 { /* obsolete representation: value must be a list of bytes */
1402 Lisp_Object ret = Fcar (value);
1406 signal_simple_error ("Invalid value for coded-charset", value);
1407 code_point = XINT (ret);
1408 if (XCHARSET_GRAPHIC (ccs) == 1)
1410 rest = Fcdr (value);
1411 while (!NILP (rest))
1416 signal_simple_error ("Invalid value for coded-charset",
1420 signal_simple_error ("Invalid value for coded-charset",
1423 if (XCHARSET_GRAPHIC (ccs) == 1)
1425 code_point = (code_point << 8) | j;
1428 value = make_int (code_point);
1430 else if (INTP (value))
1432 code_point = XINT (value);
1433 if (XCHARSET_GRAPHIC (ccs) == 1)
1435 code_point &= 0x7F7F7F7F;
1436 value = make_int (code_point);
1440 signal_simple_error ("Invalid value for coded-charset", value);
1444 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1447 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
1452 XCHARSET_DECODING_TABLE (ccs)
1453 = v = make_older_vector (ccs_len, Qnil);
1456 decoding_table_put_char (v, dim, byte_offset, code_point, character);
1458 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
1460 XCHARSET_ENCODING_TABLE (ccs)
1461 = encoding_table = make_char_id_table (Qnil);
1463 put_char_id_table (XCHAR (character), value, encoding_table);
1468 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
1470 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1471 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1473 if (VECTORP (decoding_table))
1475 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1479 decoding_table_remove_char (decoding_table,
1480 XCHARSET_DIMENSION (ccs),
1481 XCHARSET_BYTE_OFFSET (ccs),
1485 if (CHAR_ID_TABLE_P (encoding_table))
1487 put_char_id_table (XCHAR (character), Qnil, encoding_table);
1492 EXFUN (Fmake_char, 3);
1493 EXFUN (Fdecode_char, 2);
1495 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
1496 Store character's ATTRIBUTES.
1500 Lisp_Object rest = attributes;
1501 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
1502 Lisp_Object character;
1506 while (CONSP (rest))
1508 Lisp_Object cell = Fcar (rest);
1512 signal_simple_error ("Invalid argument", attributes);
1513 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
1514 && ((XCHARSET_FINAL (ccs) != 0) ||
1515 (XCHARSET_UCS_MAX (ccs) > 0)) )
1519 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1521 character = Fdecode_char (ccs, cell);
1522 if (!NILP (character))
1523 goto setup_attributes;
1527 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
1530 signal_simple_error ("Invalid argument", attributes);
1532 character = make_char (XINT (code) + 0x100000);
1533 goto setup_attributes;
1537 else if (!INTP (code))
1538 signal_simple_error ("Invalid argument", attributes);
1540 character = make_char (XINT (code));
1544 while (CONSP (rest))
1546 Lisp_Object cell = Fcar (rest);
1549 signal_simple_error ("Invalid argument", attributes);
1551 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
1557 Lisp_Object Vutf_2000_version;
1561 int leading_code_private_11;
1564 Lisp_Object Qcharsetp;
1566 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1567 Lisp_Object Qregistry, Qfinal, Qgraphic;
1568 Lisp_Object Qdirection;
1569 Lisp_Object Qreverse_direction_charset;
1570 Lisp_Object Qleading_byte;
1571 Lisp_Object Qshort_name, Qlong_name;
1585 Qcyrillic_iso8859_5,
1587 Qjapanese_jisx0208_1978,
1590 Qjapanese_jisx0208_1990,
1593 Qchinese_cns11643_1,
1594 Qchinese_cns11643_2,
1600 Qlatin_viscii_lower,
1601 Qlatin_viscii_upper,
1602 Qvietnamese_viscii_lower,
1603 Qvietnamese_viscii_upper,
1615 Qideograph_gt_pj_10,
1616 Qideograph_gt_pj_11,
1646 Lisp_Object Ql2r, Qr2l;
1648 Lisp_Object Vcharset_hash_table;
1650 /* Composite characters are characters constructed by overstriking two
1651 or more regular characters.
1653 1) The old Mule implementation involves storing composite characters
1654 in a buffer as a tag followed by all of the actual characters
1655 used to make up the composite character. I think this is a bad
1656 idea; it greatly complicates code that wants to handle strings
1657 one character at a time because it has to deal with the possibility
1658 of great big ungainly characters. It's much more reasonable to
1659 simply store an index into a table of composite characters.
1661 2) The current implementation only allows for 16,384 separate
1662 composite characters over the lifetime of the XEmacs process.
1663 This could become a potential problem if the user
1664 edited lots of different files that use composite characters.
1665 Due to FSF bogosity, increasing the number of allowable
1666 composite characters under Mule would decrease the number
1667 of possible faces that can exist. Mule already has shrunk
1668 this to 2048, and further shrinkage would become uncomfortable.
1669 No such problems exist in XEmacs.
1671 Composite characters could be represented as 0x80 C1 C2 C3,
1672 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1673 for slightly under 2^20 (one million) composite characters
1674 over the XEmacs process lifetime, and you only need to
1675 increase the size of a Mule character from 19 to 21 bits.
1676 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1677 85 million (slightly over 2^26) composite characters. */
1680 /************************************************************************/
1681 /* Basic Emchar functions */
1682 /************************************************************************/
1684 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1685 string in STR. Returns the number of bytes stored.
1686 Do not call this directly. Use the macro set_charptr_emchar() instead.
1690 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1696 Lisp_Object charset;
1705 else if ( c <= 0x7ff )
1707 *p++ = (c >> 6) | 0xc0;
1708 *p++ = (c & 0x3f) | 0x80;
1710 else if ( c <= 0xffff )
1712 *p++ = (c >> 12) | 0xe0;
1713 *p++ = ((c >> 6) & 0x3f) | 0x80;
1714 *p++ = (c & 0x3f) | 0x80;
1716 else if ( c <= 0x1fffff )
1718 *p++ = (c >> 18) | 0xf0;
1719 *p++ = ((c >> 12) & 0x3f) | 0x80;
1720 *p++ = ((c >> 6) & 0x3f) | 0x80;
1721 *p++ = (c & 0x3f) | 0x80;
1723 else if ( c <= 0x3ffffff )
1725 *p++ = (c >> 24) | 0xf8;
1726 *p++ = ((c >> 18) & 0x3f) | 0x80;
1727 *p++ = ((c >> 12) & 0x3f) | 0x80;
1728 *p++ = ((c >> 6) & 0x3f) | 0x80;
1729 *p++ = (c & 0x3f) | 0x80;
1733 *p++ = (c >> 30) | 0xfc;
1734 *p++ = ((c >> 24) & 0x3f) | 0x80;
1735 *p++ = ((c >> 18) & 0x3f) | 0x80;
1736 *p++ = ((c >> 12) & 0x3f) | 0x80;
1737 *p++ = ((c >> 6) & 0x3f) | 0x80;
1738 *p++ = (c & 0x3f) | 0x80;
1741 BREAKUP_CHAR (c, charset, c1, c2);
1742 lb = CHAR_LEADING_BYTE (c);
1743 if (LEADING_BYTE_PRIVATE_P (lb))
1744 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1746 if (EQ (charset, Vcharset_control_1))
1755 /* Return the first character from a Mule-encoded string in STR,
1756 assuming it's non-ASCII. Do not call this directly.
1757 Use the macro charptr_emchar() instead. */
1760 non_ascii_charptr_emchar (const Bufbyte *str)
1773 else if ( b >= 0xf8 )
1778 else if ( b >= 0xf0 )
1783 else if ( b >= 0xe0 )
1788 else if ( b >= 0xc0 )
1798 for( ; len > 0; len-- )
1801 ch = ( ch << 6 ) | ( b & 0x3f );
1805 Bufbyte i0 = *str, i1, i2 = 0;
1806 Lisp_Object charset;
1808 if (i0 == LEADING_BYTE_CONTROL_1)
1809 return (Emchar) (*++str - 0x20);
1811 if (LEADING_BYTE_PREFIX_P (i0))
1816 charset = CHARSET_BY_LEADING_BYTE (i0);
1817 if (XCHARSET_DIMENSION (charset) == 2)
1820 return MAKE_CHAR (charset, i1, i2);
1824 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1825 Do not call this directly. Use the macro valid_char_p() instead. */
1829 non_ascii_valid_char_p (Emchar ch)
1833 /* Must have only lowest 19 bits set */
1837 f1 = CHAR_FIELD1 (ch);
1838 f2 = CHAR_FIELD2 (ch);
1839 f3 = CHAR_FIELD3 (ch);
1843 Lisp_Object charset;
1845 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1846 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1847 f2 > MAX_CHAR_FIELD2_PRIVATE)
1852 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1853 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1857 NOTE: This takes advantage of the fact that
1858 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1859 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1861 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1862 if (EQ (charset, Qnil))
1864 return (XCHARSET_CHARS (charset) == 96);
1868 Lisp_Object charset;
1870 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1871 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1872 f1 > MAX_CHAR_FIELD1_PRIVATE)
1874 if (f2 < 0x20 || f3 < 0x20)
1877 #ifdef ENABLE_COMPOSITE_CHARS
1878 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1880 if (UNBOUNDP (Fgethash (make_int (ch),
1881 Vcomposite_char_char2string_hash_table,
1886 #endif /* ENABLE_COMPOSITE_CHARS */
1888 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1889 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1892 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1894 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1897 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1899 if (EQ (charset, Qnil))
1901 return (XCHARSET_CHARS (charset) == 96);
1907 /************************************************************************/
1908 /* Basic string functions */
1909 /************************************************************************/
1911 /* Copy the character pointed to by SRC into DST. Do not call this
1912 directly. Use the macro charptr_copy_char() instead.
1913 Return the number of bytes copied. */
1916 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
1918 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
1920 for (i = bytes; i; i--, dst++, src++)
1926 /************************************************************************/
1927 /* streams of Emchars */
1928 /************************************************************************/
1930 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1931 The functions below are not meant to be called directly; use
1932 the macros in insdel.h. */
1935 Lstream_get_emchar_1 (Lstream *stream, int ch)
1937 Bufbyte str[MAX_EMCHAR_LEN];
1938 Bufbyte *strptr = str;
1941 str[0] = (Bufbyte) ch;
1943 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
1945 int c = Lstream_getc (stream);
1946 bufpos_checking_assert (c >= 0);
1947 *++strptr = (Bufbyte) c;
1949 return charptr_emchar (str);
1953 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1955 Bufbyte str[MAX_EMCHAR_LEN];
1956 Bytecount len = set_charptr_emchar (str, ch);
1957 return Lstream_write (stream, str, len);
1961 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1963 Bufbyte str[MAX_EMCHAR_LEN];
1964 Bytecount len = set_charptr_emchar (str, ch);
1965 Lstream_unread (stream, str, len);
1969 /************************************************************************/
1970 /* charset object */
1971 /************************************************************************/
1974 mark_charset (Lisp_Object obj)
1976 Lisp_Charset *cs = XCHARSET (obj);
1978 mark_object (cs->short_name);
1979 mark_object (cs->long_name);
1980 mark_object (cs->doc_string);
1981 mark_object (cs->registry);
1982 mark_object (cs->ccl_program);
1984 mark_object (cs->encoding_table);
1985 /* mark_object (cs->decoding_table); */
1991 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1993 Lisp_Charset *cs = XCHARSET (obj);
1997 error ("printing unreadable object #<charset %s 0x%x>",
1998 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
2001 write_c_string ("#<charset ", printcharfun);
2002 print_internal (CHARSET_NAME (cs), printcharfun, 0);
2003 write_c_string (" ", printcharfun);
2004 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
2005 write_c_string (" ", printcharfun);
2006 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
2007 write_c_string (" ", printcharfun);
2008 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
2009 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
2011 CHARSET_DIMENSION (cs),
2012 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
2013 CHARSET_COLUMNS (cs),
2014 CHARSET_GRAPHIC (cs),
2015 CHARSET_FINAL (cs));
2016 write_c_string (buf, printcharfun);
2017 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
2018 sprintf (buf, " 0x%x>", cs->header.uid);
2019 write_c_string (buf, printcharfun);
2022 static const struct lrecord_description charset_description[] = {
2023 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
2024 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
2025 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
2026 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
2027 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
2028 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
2029 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
2031 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
2032 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
2037 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
2038 mark_charset, print_charset, 0, 0, 0,
2039 charset_description,
2042 /* Make a new charset. */
2043 /* #### SJT Should generic properties be allowed? */
2045 make_charset (Charset_ID id, Lisp_Object name,
2046 unsigned short chars, unsigned char dimension,
2047 unsigned char columns, unsigned char graphic,
2048 Bufbyte final, unsigned char direction, Lisp_Object short_name,
2049 Lisp_Object long_name, Lisp_Object doc,
2051 Lisp_Object decoding_table,
2052 Emchar ucs_min, Emchar ucs_max,
2053 Emchar code_offset, unsigned char byte_offset)
2056 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
2060 XSETCHARSET (obj, cs);
2062 CHARSET_ID (cs) = id;
2063 CHARSET_NAME (cs) = name;
2064 CHARSET_SHORT_NAME (cs) = short_name;
2065 CHARSET_LONG_NAME (cs) = long_name;
2066 CHARSET_CHARS (cs) = chars;
2067 CHARSET_DIMENSION (cs) = dimension;
2068 CHARSET_DIRECTION (cs) = direction;
2069 CHARSET_COLUMNS (cs) = columns;
2070 CHARSET_GRAPHIC (cs) = graphic;
2071 CHARSET_FINAL (cs) = final;
2072 CHARSET_DOC_STRING (cs) = doc;
2073 CHARSET_REGISTRY (cs) = reg;
2074 CHARSET_CCL_PROGRAM (cs) = Qnil;
2075 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
2077 CHARSET_DECODING_TABLE(cs) = Qnil;
2078 CHARSET_ENCODING_TABLE(cs) = Qnil;
2079 CHARSET_UCS_MIN(cs) = ucs_min;
2080 CHARSET_UCS_MAX(cs) = ucs_max;
2081 CHARSET_CODE_OFFSET(cs) = code_offset;
2082 CHARSET_BYTE_OFFSET(cs) = byte_offset;
2086 if (id == LEADING_BYTE_ASCII)
2087 CHARSET_REP_BYTES (cs) = 1;
2089 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
2091 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
2096 /* some charsets do not have final characters. This includes
2097 ASCII, Control-1, Composite, and the two faux private
2099 unsigned char iso2022_type
2100 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
2102 if (code_offset == 0)
2104 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
2105 chlook->charset_by_attributes[iso2022_type][final] = obj;
2109 (chlook->charset_by_attributes[iso2022_type][final][direction]));
2110 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
2114 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
2115 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
2117 /* Some charsets are "faux" and don't have names or really exist at
2118 all except in the leading-byte table. */
2120 Fputhash (name, obj, Vcharset_hash_table);
2125 get_unallocated_leading_byte (int dimension)
2130 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
2133 lb = chlook->next_allocated_leading_byte++;
2137 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
2140 lb = chlook->next_allocated_1_byte_leading_byte++;
2144 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
2147 lb = chlook->next_allocated_2_byte_leading_byte++;
2153 ("No more character sets free for this dimension",
2154 make_int (dimension));
2160 /* Number of Big5 characters which have the same code in 1st byte. */
2162 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2165 make_builtin_char (Lisp_Object charset, int c1, int c2)
2167 if (XCHARSET_UCS_MAX (charset))
2170 = (XCHARSET_DIMENSION (charset) == 1
2172 c1 - XCHARSET_BYTE_OFFSET (charset)
2174 (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
2175 + c2 - XCHARSET_BYTE_OFFSET (charset))
2176 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
2177 if ((code < XCHARSET_UCS_MIN (charset))
2178 || (XCHARSET_UCS_MAX (charset) < code))
2179 signal_simple_error ("Arguments makes invalid character",
2183 else if (XCHARSET_DIMENSION (charset) == 1)
2185 if (XCHARSET_FINAL (charset) == 0)
2187 switch (XCHARSET_CHARS (charset))
2191 + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
2194 + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
2201 if (EQ (charset, Vcharset_chinese_big5))
2203 int B1 = c1, B2 = c2;
2205 = (B1 - 0xA1) * BIG5_SAME_ROW
2206 + B2 - (B2 < 0x7F ? 0x40 : 0x62);
2210 charset = Vcharset_chinese_big5_1;
2214 charset = Vcharset_chinese_big5_2;
2215 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
2220 if (XCHARSET_FINAL (charset) == 0)
2222 switch (XCHARSET_CHARS (charset))
2225 return MIN_CHAR_94x94
2226 + (XCHARSET_FINAL (charset) - '0') * 94 * 94
2227 + (c1 - 33) * 94 + (c2 - 33);
2229 return MIN_CHAR_96x96
2230 + (XCHARSET_FINAL (charset) - '0') * 96 * 96
2231 + (c1 - 32) * 96 + (c2 - 32);
2239 range_charset_code_point (Lisp_Object charset, Emchar ch)
2243 if ((XCHARSET_UCS_MIN (charset) <= ch)
2244 && (ch <= XCHARSET_UCS_MAX (charset)))
2246 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
2248 if (XCHARSET_CHARS (charset) == 256)
2250 else if (XCHARSET_DIMENSION (charset) == 1)
2251 return d + XCHARSET_BYTE_OFFSET (charset);
2252 else if (XCHARSET_DIMENSION (charset) == 2)
2254 ((d / XCHARSET_CHARS (charset)
2255 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2256 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2257 else if (XCHARSET_DIMENSION (charset) == 3)
2259 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2260 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2261 | ((d / XCHARSET_CHARS (charset)
2262 % XCHARSET_CHARS (charset)
2263 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2264 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2265 else /* if (XCHARSET_DIMENSION (charset) == 4) */
2267 ((d / (XCHARSET_CHARS (charset)
2268 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2269 + XCHARSET_BYTE_OFFSET (charset)) << 24)
2270 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2271 % XCHARSET_CHARS (charset)
2272 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2273 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
2274 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2275 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2277 else if (XCHARSET_CODE_OFFSET (charset) == 0)
2279 if (XCHARSET_DIMENSION (charset) == 1)
2281 if (XCHARSET_CHARS (charset) == 94)
2283 if (((d = ch - (MIN_CHAR_94
2284 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
2288 else if (XCHARSET_CHARS (charset) == 96)
2290 if (((d = ch - (MIN_CHAR_96
2291 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
2298 else if (XCHARSET_DIMENSION (charset) == 2)
2300 if (XCHARSET_CHARS (charset) == 94)
2302 if (((d = ch - (MIN_CHAR_94x94
2303 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
2306 return (((d / 94) + 33) << 8) | (d % 94 + 33);
2308 else if (XCHARSET_CHARS (charset) == 96)
2310 if (((d = ch - (MIN_CHAR_96x96
2311 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
2314 return (((d / 96) + 32) << 8) | (d % 96 + 32);
2320 if (EQ (charset, Vcharset_mojikyo_2022_1)
2321 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
2323 int m = ch - MIN_CHAR_MOJIKYO - 1;
2324 int byte1 = m / (94 * 60) + 33;
2325 int byte2 = (m % (94 * 60)) / 94;
2326 int byte3 = m % 94 + 33;
2332 return (byte1 << 16) | (byte2 << 8) | byte3;
2338 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
2340 if (c <= MAX_CHAR_BASIC_LATIN)
2342 *charset = Vcharset_ascii;
2347 *charset = Vcharset_control_1;
2352 *charset = Vcharset_latin_iso8859_1;
2356 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
2358 *charset = Vcharset_greek_iso8859_7;
2359 return c - MIN_CHAR_GREEK + 0x20;
2361 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
2363 *charset = Vcharset_cyrillic_iso8859_5;
2364 return c - MIN_CHAR_CYRILLIC + 0x20;
2367 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
2369 *charset = Vcharset_hebrew_iso8859_8;
2370 return c - MIN_CHAR_HEBREW + 0x20;
2372 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
2374 *charset = Vcharset_thai_tis620;
2375 return c - MIN_CHAR_THAI + 0x20;
2378 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
2379 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
2381 return list2 (Vcharset_katakana_jisx0201,
2382 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
2385 else if (c <= MAX_CHAR_BMP)
2387 *charset = Vcharset_ucs_bmp;
2390 else if (c < MIN_CHAR_DAIKANWA)
2392 *charset = Vcharset_ucs;
2395 else if (c <= MAX_CHAR_DAIKANWA)
2397 *charset = Vcharset_ideograph_daikanwa;
2398 return c - MIN_CHAR_DAIKANWA;
2400 else if (c <= MAX_CHAR_MOJIKYO_0)
2402 *charset = Vcharset_mojikyo;
2403 return c - MIN_CHAR_MOJIKYO_0;
2405 else if (c < MIN_CHAR_94)
2407 *charset = Vcharset_ucs;
2410 else if (c <= MAX_CHAR_94)
2412 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
2413 ((c - MIN_CHAR_94) / 94) + '0',
2414 CHARSET_LEFT_TO_RIGHT);
2415 if (!NILP (*charset))
2416 return ((c - MIN_CHAR_94) % 94) + 33;
2419 *charset = Vcharset_ucs;
2423 else if (c <= MAX_CHAR_96)
2425 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
2426 ((c - MIN_CHAR_96) / 96) + '0',
2427 CHARSET_LEFT_TO_RIGHT);
2428 if (!NILP (*charset))
2429 return ((c - MIN_CHAR_96) % 96) + 32;
2432 *charset = Vcharset_ucs;
2436 else if (c <= MAX_CHAR_94x94)
2439 = CHARSET_BY_ATTRIBUTES (94, 2,
2440 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
2441 CHARSET_LEFT_TO_RIGHT);
2442 if (!NILP (*charset))
2443 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
2444 | (((c - MIN_CHAR_94x94) % 94) + 33);
2447 *charset = Vcharset_ucs;
2451 else if (c <= MAX_CHAR_96x96)
2454 = CHARSET_BY_ATTRIBUTES (96, 2,
2455 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
2456 CHARSET_LEFT_TO_RIGHT);
2457 if (!NILP (*charset))
2458 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
2459 | (((c - MIN_CHAR_96x96) % 96) + 32);
2462 *charset = Vcharset_ucs;
2466 else if (c < MIN_CHAR_MOJIKYO)
2468 *charset = Vcharset_ucs;
2471 else if (c <= MAX_CHAR_MOJIKYO)
2473 *charset = Vcharset_mojikyo;
2474 return c - MIN_CHAR_MOJIKYO;
2478 *charset = Vcharset_ucs;
2483 Lisp_Object Vdefault_coded_charset_priority_list;
2487 /************************************************************************/
2488 /* Basic charset Lisp functions */
2489 /************************************************************************/
2491 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
2492 Return non-nil if OBJECT is a charset.
2496 return CHARSETP (object) ? Qt : Qnil;
2499 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
2500 Retrieve the charset of the given name.
2501 If CHARSET-OR-NAME is a charset object, it is simply returned.
2502 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
2503 nil is returned. Otherwise the associated charset object is returned.
2507 if (CHARSETP (charset_or_name))
2508 return charset_or_name;
2510 CHECK_SYMBOL (charset_or_name);
2511 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
2514 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
2515 Retrieve the charset of the given name.
2516 Same as `find-charset' except an error is signalled if there is no such
2517 charset instead of returning nil.
2521 Lisp_Object charset = Ffind_charset (name);
2524 signal_simple_error ("No such charset", name);
2528 /* We store the charsets in hash tables with the names as the key and the
2529 actual charset object as the value. Occasionally we need to use them
2530 in a list format. These routines provide us with that. */
2531 struct charset_list_closure
2533 Lisp_Object *charset_list;
2537 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
2538 void *charset_list_closure)
2540 /* This function can GC */
2541 struct charset_list_closure *chcl =
2542 (struct charset_list_closure*) charset_list_closure;
2543 Lisp_Object *charset_list = chcl->charset_list;
2545 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
2549 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
2550 Return a list of the names of all defined charsets.
2554 Lisp_Object charset_list = Qnil;
2555 struct gcpro gcpro1;
2556 struct charset_list_closure charset_list_closure;
2558 GCPRO1 (charset_list);
2559 charset_list_closure.charset_list = &charset_list;
2560 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
2561 &charset_list_closure);
2564 return charset_list;
2567 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
2568 Return the name of charset CHARSET.
2572 return XCHARSET_NAME (Fget_charset (charset));
2575 /* #### SJT Should generic properties be allowed? */
2576 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
2577 Define a new character set.
2578 This function is for use with Mule support.
2579 NAME is a symbol, the name by which the character set is normally referred.
2580 DOC-STRING is a string describing the character set.
2581 PROPS is a property list, describing the specific nature of the
2582 character set. Recognized properties are:
2584 'short-name Short version of the charset name (ex: Latin-1)
2585 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
2586 'registry A regular expression matching the font registry field for
2588 'dimension Number of octets used to index a character in this charset.
2589 Either 1 or 2. Defaults to 1.
2590 'columns Number of columns used to display a character in this charset.
2591 Only used in TTY mode. (Under X, the actual width of a
2592 character can be derived from the font used to display the
2593 characters.) If unspecified, defaults to the dimension
2594 (this is almost always the correct value).
2595 'chars Number of characters in each dimension (94 or 96).
2596 Defaults to 94. Note that if the dimension is 2, the
2597 character set thus described is 94x94 or 96x96.
2598 'final Final byte of ISO 2022 escape sequence. Must be
2599 supplied. Each combination of (DIMENSION, CHARS) defines a
2600 separate namespace for final bytes. Note that ISO
2601 2022 restricts the final byte to the range
2602 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2603 dimension == 2. Note also that final bytes in the range
2604 0x30 - 0x3F are reserved for user-defined (not official)
2606 'graphic 0 (use left half of font on output) or 1 (use right half
2607 of font on output). Defaults to 0. For example, for
2608 a font whose registry is ISO8859-1, the left half
2609 (octets 0x20 - 0x7F) is the `ascii' character set, while
2610 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2611 character set. With 'graphic set to 0, the octets
2612 will have their high bit cleared; with it set to 1,
2613 the octets will have their high bit set.
2614 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2616 'ccl-program A compiled CCL program used to convert a character in
2617 this charset into an index into the font. This is in
2618 addition to the 'graphic property. The CCL program
2619 is passed the octets of the character, with the high
2620 bit cleared and set depending upon whether the value
2621 of the 'graphic property is 0 or 1.
2623 (name, doc_string, props))
2625 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2626 int direction = CHARSET_LEFT_TO_RIGHT;
2627 Lisp_Object registry = Qnil;
2628 Lisp_Object charset;
2629 Lisp_Object ccl_program = Qnil;
2630 Lisp_Object short_name = Qnil, long_name = Qnil;
2631 int byte_offset = -1;
2633 CHECK_SYMBOL (name);
2634 if (!NILP (doc_string))
2635 CHECK_STRING (doc_string);
2637 charset = Ffind_charset (name);
2638 if (!NILP (charset))
2639 signal_simple_error ("Cannot redefine existing charset", name);
2642 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
2644 if (EQ (keyword, Qshort_name))
2646 CHECK_STRING (value);
2650 if (EQ (keyword, Qlong_name))
2652 CHECK_STRING (value);
2656 else if (EQ (keyword, Qdimension))
2659 dimension = XINT (value);
2660 if (dimension < 1 || dimension > 2)
2661 signal_simple_error ("Invalid value for 'dimension", value);
2664 else if (EQ (keyword, Qchars))
2667 chars = XINT (value);
2668 if (chars != 94 && chars != 96)
2669 signal_simple_error ("Invalid value for 'chars", value);
2672 else if (EQ (keyword, Qcolumns))
2675 columns = XINT (value);
2676 if (columns != 1 && columns != 2)
2677 signal_simple_error ("Invalid value for 'columns", value);
2680 else if (EQ (keyword, Qgraphic))
2683 graphic = XINT (value);
2685 if (graphic < 0 || graphic > 2)
2687 if (graphic < 0 || graphic > 1)
2689 signal_simple_error ("Invalid value for 'graphic", value);
2692 else if (EQ (keyword, Qregistry))
2694 CHECK_STRING (value);
2698 else if (EQ (keyword, Qdirection))
2700 if (EQ (value, Ql2r))
2701 direction = CHARSET_LEFT_TO_RIGHT;
2702 else if (EQ (value, Qr2l))
2703 direction = CHARSET_RIGHT_TO_LEFT;
2705 signal_simple_error ("Invalid value for 'direction", value);
2708 else if (EQ (keyword, Qfinal))
2710 CHECK_CHAR_COERCE_INT (value);
2711 final = XCHAR (value);
2712 if (final < '0' || final > '~')
2713 signal_simple_error ("Invalid value for 'final", value);
2716 else if (EQ (keyword, Qccl_program))
2718 struct ccl_program test_ccl;
2720 if (setup_ccl_program (&test_ccl, value) < 0)
2721 signal_simple_error ("Invalid value for 'ccl-program", value);
2722 ccl_program = value;
2726 signal_simple_error ("Unrecognized property", keyword);
2731 error ("'final must be specified");
2732 if (dimension == 2 && final > 0x5F)
2734 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2737 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2738 CHARSET_LEFT_TO_RIGHT)) ||
2739 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2740 CHARSET_RIGHT_TO_LEFT)))
2742 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2744 id = get_unallocated_leading_byte (dimension);
2746 if (NILP (doc_string))
2747 doc_string = build_string ("");
2749 if (NILP (registry))
2750 registry = build_string ("");
2752 if (NILP (short_name))
2753 XSETSTRING (short_name, XSYMBOL (name)->name);
2755 if (NILP (long_name))
2756 long_name = doc_string;
2759 columns = dimension;
2761 if (byte_offset < 0)
2765 else if (chars == 96)
2771 charset = make_charset (id, name, chars, dimension, columns, graphic,
2772 final, direction, short_name, long_name,
2773 doc_string, registry,
2774 Qnil, 0, 0, 0, byte_offset);
2775 if (!NILP (ccl_program))
2776 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2780 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2782 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2783 NEW-NAME is the name of the new charset. Return the new charset.
2785 (charset, new_name))
2787 Lisp_Object new_charset = Qnil;
2788 int id, chars, dimension, columns, graphic, final;
2790 Lisp_Object registry, doc_string, short_name, long_name;
2793 charset = Fget_charset (charset);
2794 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2795 signal_simple_error ("Charset already has reverse-direction charset",
2798 CHECK_SYMBOL (new_name);
2799 if (!NILP (Ffind_charset (new_name)))
2800 signal_simple_error ("Cannot redefine existing charset", new_name);
2802 cs = XCHARSET (charset);
2804 chars = CHARSET_CHARS (cs);
2805 dimension = CHARSET_DIMENSION (cs);
2806 columns = CHARSET_COLUMNS (cs);
2807 id = get_unallocated_leading_byte (dimension);
2809 graphic = CHARSET_GRAPHIC (cs);
2810 final = CHARSET_FINAL (cs);
2811 direction = CHARSET_RIGHT_TO_LEFT;
2812 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2813 direction = CHARSET_LEFT_TO_RIGHT;
2814 doc_string = CHARSET_DOC_STRING (cs);
2815 short_name = CHARSET_SHORT_NAME (cs);
2816 long_name = CHARSET_LONG_NAME (cs);
2817 registry = CHARSET_REGISTRY (cs);
2819 new_charset = make_charset (id, new_name, chars, dimension, columns,
2820 graphic, final, direction, short_name, long_name,
2821 doc_string, registry,
2823 CHARSET_DECODING_TABLE(cs),
2824 CHARSET_UCS_MIN(cs),
2825 CHARSET_UCS_MAX(cs),
2826 CHARSET_CODE_OFFSET(cs),
2827 CHARSET_BYTE_OFFSET(cs)
2833 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2834 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2839 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2840 Define symbol ALIAS as an alias for CHARSET.
2844 CHECK_SYMBOL (alias);
2845 charset = Fget_charset (charset);
2846 return Fputhash (alias, charset, Vcharset_hash_table);
2849 /* #### Reverse direction charsets not yet implemented. */
2851 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2853 Return the reverse-direction charset parallel to CHARSET, if any.
2854 This is the charset with the same properties (in particular, the same
2855 dimension, number of characters per dimension, and final byte) as
2856 CHARSET but whose characters are displayed in the opposite direction.
2860 charset = Fget_charset (charset);
2861 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2865 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2866 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2867 If DIRECTION is omitted, both directions will be checked (left-to-right
2868 will be returned if character sets exist for both directions).
2870 (dimension, chars, final, direction))
2872 int dm, ch, fi, di = -1;
2873 Lisp_Object obj = Qnil;
2875 CHECK_INT (dimension);
2876 dm = XINT (dimension);
2877 if (dm < 1 || dm > 2)
2878 signal_simple_error ("Invalid value for DIMENSION", dimension);
2882 if (ch != 94 && ch != 96)
2883 signal_simple_error ("Invalid value for CHARS", chars);
2885 CHECK_CHAR_COERCE_INT (final);
2887 if (fi < '0' || fi > '~')
2888 signal_simple_error ("Invalid value for FINAL", final);
2890 if (EQ (direction, Ql2r))
2891 di = CHARSET_LEFT_TO_RIGHT;
2892 else if (EQ (direction, Qr2l))
2893 di = CHARSET_RIGHT_TO_LEFT;
2894 else if (!NILP (direction))
2895 signal_simple_error ("Invalid value for DIRECTION", direction);
2897 if (dm == 2 && fi > 0x5F)
2899 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2903 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
2905 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
2908 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
2911 return XCHARSET_NAME (obj);
2915 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2916 Return short name of CHARSET.
2920 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2923 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2924 Return long name of CHARSET.
2928 return XCHARSET_LONG_NAME (Fget_charset (charset));
2931 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2932 Return description of CHARSET.
2936 return XCHARSET_DOC_STRING (Fget_charset (charset));
2939 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2940 Return dimension of CHARSET.
2944 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2947 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2948 Return property PROP of CHARSET, a charset object or symbol naming a charset.
2949 Recognized properties are those listed in `make-charset', as well as
2950 'name and 'doc-string.
2956 charset = Fget_charset (charset);
2957 cs = XCHARSET (charset);
2959 CHECK_SYMBOL (prop);
2960 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2961 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2962 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2963 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2964 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2965 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2966 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2967 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2968 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2969 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2970 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2971 if (EQ (prop, Qdirection))
2972 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2973 if (EQ (prop, Qreverse_direction_charset))
2975 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2976 /* #### Is this translation OK? If so, error checking sufficient? */
2977 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
2979 signal_simple_error ("Unrecognized charset property name", prop);
2980 return Qnil; /* not reached */
2983 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2984 Return charset identification number of CHARSET.
2988 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2991 /* #### We need to figure out which properties we really want to
2994 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2995 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2997 (charset, ccl_program))
2999 struct ccl_program test_ccl;
3001 charset = Fget_charset (charset);
3002 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
3003 signal_simple_error ("Invalid ccl-program", ccl_program);
3004 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
3009 invalidate_charset_font_caches (Lisp_Object charset)
3011 /* Invalidate font cache entries for charset on all devices. */
3012 Lisp_Object devcons, concons, hash_table;
3013 DEVICE_LOOP_NO_BREAK (devcons, concons)
3015 struct device *d = XDEVICE (XCAR (devcons));
3016 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
3017 if (!UNBOUNDP (hash_table))
3018 Fclrhash (hash_table);
3022 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
3023 Set the 'registry property of CHARSET to REGISTRY.
3025 (charset, registry))
3027 charset = Fget_charset (charset);
3028 CHECK_STRING (registry);
3029 XCHARSET_REGISTRY (charset) = registry;
3030 invalidate_charset_font_caches (charset);
3031 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
3036 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
3037 Return mapping-table of CHARSET.
3041 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
3044 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
3045 Set mapping-table of CHARSET to TABLE.
3049 struct Lisp_Charset *cs;
3053 charset = Fget_charset (charset);
3054 cs = XCHARSET (charset);
3058 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
3059 make_vector_newer (CHARSET_DECODING_TABLE(cs));
3060 CHARSET_DECODING_TABLE(cs) = Qnil;
3063 else if (VECTORP (table))
3065 int ccs_len = CHARSET_BYTE_SIZE (cs);
3066 int ret = decoding_table_check_elements (table,
3067 CHARSET_DIMENSION (cs),
3072 signal_simple_error ("Too big table", table);
3074 signal_simple_error ("Invalid element is found", table);
3076 signal_simple_error ("Something wrong", table);
3078 CHARSET_DECODING_TABLE(cs) = Qnil;
3081 signal_error (Qwrong_type_argument,
3082 list2 (build_translated_string ("vector-or-nil-p"),
3085 byte_offset = CHARSET_BYTE_OFFSET (cs);
3086 switch (CHARSET_DIMENSION (cs))
3089 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3091 Lisp_Object c = XVECTOR_DATA(table)[i];
3094 put_char_ccs_code_point (c, charset,
3095 make_int (i + byte_offset));
3099 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3101 Lisp_Object v = XVECTOR_DATA(table)[i];
3107 for (j = 0; j < XVECTOR_LENGTH (v); j++)
3109 Lisp_Object c = XVECTOR_DATA(v)[j];
3112 put_char_ccs_code_point
3114 make_int ( ( (i + byte_offset) << 8 )
3120 put_char_ccs_code_point (v, charset,
3121 make_int (i + byte_offset));
3130 /************************************************************************/
3131 /* Lisp primitives for working with characters */
3132 /************************************************************************/
3135 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
3136 Make a character from CHARSET and code-point CODE.
3142 charset = Fget_charset (charset);
3145 if (XCHARSET_GRAPHIC (charset) == 1)
3147 c = DECODE_CHAR (charset, c);
3148 return c ? make_char (c) : Qnil;
3151 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
3152 Make a builtin character from CHARSET and code-point CODE.
3159 charset = Fget_charset (charset);
3163 if ((final = XCHARSET_FINAL (charset)) >= '0')
3165 if (XCHARSET_DIMENSION (charset) == 1)
3167 switch (XCHARSET_CHARS (charset))
3171 make_char (MIN_CHAR_94 + (final - '0') * 94
3172 + ((c & 0x7F) - 33));
3175 make_char (MIN_CHAR_96 + (final - '0') * 96
3176 + ((c & 0x7F) - 32));
3178 return Fdecode_char (charset, code);
3183 switch (XCHARSET_CHARS (charset))
3187 make_char (MIN_CHAR_94x94
3188 + (final - '0') * 94 * 94
3189 + (((c >> 8) & 0x7F) - 33) * 94
3190 + ((c & 0x7F) - 33));
3193 make_char (MIN_CHAR_96x96
3194 + (final - '0') * 96 * 96
3195 + (((c >> 8) & 0x7F) - 32) * 96
3196 + ((c & 0x7F) - 32));
3198 return Fdecode_char (charset, code);
3202 else if (XCHARSET_UCS_MAX (charset))
3205 = (XCHARSET_DIMENSION (charset) == 1
3207 c - XCHARSET_BYTE_OFFSET (charset)
3209 ((c >> 8) - XCHARSET_BYTE_OFFSET (charset))
3210 * XCHARSET_CHARS (charset)
3211 + (c & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
3212 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
3213 if ((cid < XCHARSET_UCS_MIN (charset))
3214 || (XCHARSET_UCS_MAX (charset) < cid))
3215 return Fdecode_char (charset, code);
3216 return make_char (cid);
3219 return Fdecode_char (charset, code);
3223 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
3224 Make a character from CHARSET and octets ARG1 and ARG2.
3225 ARG2 is required only for characters from two-dimensional charsets.
3226 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
3227 character s with caron.
3229 (charset, arg1, arg2))
3233 int lowlim, highlim;
3235 charset = Fget_charset (charset);
3236 cs = XCHARSET (charset);
3238 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
3239 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
3241 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
3243 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
3244 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
3247 /* It is useful (and safe, according to Olivier Galibert) to strip
3248 the 8th bit off ARG1 and ARG2 because it allows programmers to
3249 write (make-char 'latin-iso8859-2 CODE) where code is the actual
3250 Latin 2 code of the character. */
3258 if (a1 < lowlim || a1 > highlim)
3259 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
3261 if (CHARSET_DIMENSION (cs) == 1)
3265 ("Charset is of dimension one; second octet must be nil", arg2);
3266 return make_char (MAKE_CHAR (charset, a1, 0));
3275 a2 = XINT (arg2) & 0x7f;
3277 if (a2 < lowlim || a2 > highlim)
3278 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
3280 return make_char (MAKE_CHAR (charset, a1, a2));
3283 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
3284 Return the character set of CHARACTER.
3288 CHECK_CHAR_COERCE_INT (character);
3290 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
3293 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
3294 Return the octet numbered N (should be 0 or 1) of CHARACTER.
3295 N defaults to 0 if omitted.
3299 Lisp_Object charset;
3302 CHECK_CHAR_COERCE_INT (character);
3304 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
3306 if (NILP (n) || EQ (n, Qzero))
3307 return make_int (octet0);
3308 else if (EQ (n, make_int (1)))
3309 return make_int (octet1);
3311 signal_simple_error ("Octet number must be 0 or 1", n);
3314 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
3315 Return list of charset and one or two position-codes of CHARACTER.
3319 /* This function can GC */
3320 struct gcpro gcpro1, gcpro2;
3321 Lisp_Object charset = Qnil;
3322 Lisp_Object rc = Qnil;
3330 GCPRO2 (charset, rc);
3331 CHECK_CHAR_COERCE_INT (character);
3334 code_point = ENCODE_CHAR (XCHAR (character), charset);
3335 dimension = XCHARSET_DIMENSION (charset);
3336 while (dimension > 0)
3338 rc = Fcons (make_int (code_point & 255), rc);
3342 rc = Fcons (XCHARSET_NAME (charset), rc);
3344 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
3346 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
3348 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
3352 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
3361 #ifdef ENABLE_COMPOSITE_CHARS
3362 /************************************************************************/
3363 /* composite character functions */
3364 /************************************************************************/
3367 lookup_composite_char (Bufbyte *str, int len)
3369 Lisp_Object lispstr = make_string (str, len);
3370 Lisp_Object ch = Fgethash (lispstr,
3371 Vcomposite_char_string2char_hash_table,
3377 if (composite_char_row_next >= 128)
3378 signal_simple_error ("No more composite chars available", lispstr);
3379 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
3380 composite_char_col_next);
3381 Fputhash (make_char (emch), lispstr,
3382 Vcomposite_char_char2string_hash_table);
3383 Fputhash (lispstr, make_char (emch),
3384 Vcomposite_char_string2char_hash_table);
3385 composite_char_col_next++;
3386 if (composite_char_col_next >= 128)
3388 composite_char_col_next = 32;
3389 composite_char_row_next++;
3398 composite_char_string (Emchar ch)
3400 Lisp_Object str = Fgethash (make_char (ch),
3401 Vcomposite_char_char2string_hash_table,
3403 assert (!UNBOUNDP (str));
3407 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
3408 Convert a string into a single composite character.
3409 The character is the result of overstriking all the characters in
3414 CHECK_STRING (string);
3415 return make_char (lookup_composite_char (XSTRING_DATA (string),
3416 XSTRING_LENGTH (string)));
3419 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3420 Return a string of the characters comprising a composite character.
3428 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
3429 signal_simple_error ("Must be composite char", ch);
3430 return composite_char_string (emch);
3432 #endif /* ENABLE_COMPOSITE_CHARS */
3435 /************************************************************************/
3436 /* initialization */
3437 /************************************************************************/
3440 syms_of_mule_charset (void)
3443 INIT_LRECORD_IMPLEMENTATION (uint8_byte_table);
3444 INIT_LRECORD_IMPLEMENTATION (uint16_byte_table);
3445 INIT_LRECORD_IMPLEMENTATION (byte_table);
3446 INIT_LRECORD_IMPLEMENTATION (char_id_table);
3448 INIT_LRECORD_IMPLEMENTATION (charset);
3450 DEFSUBR (Fcharsetp);
3451 DEFSUBR (Ffind_charset);
3452 DEFSUBR (Fget_charset);
3453 DEFSUBR (Fcharset_list);
3454 DEFSUBR (Fcharset_name);
3455 DEFSUBR (Fmake_charset);
3456 DEFSUBR (Fmake_reverse_direction_charset);
3457 /* DEFSUBR (Freverse_direction_charset); */
3458 DEFSUBR (Fdefine_charset_alias);
3459 DEFSUBR (Fcharset_from_attributes);
3460 DEFSUBR (Fcharset_short_name);
3461 DEFSUBR (Fcharset_long_name);
3462 DEFSUBR (Fcharset_description);
3463 DEFSUBR (Fcharset_dimension);
3464 DEFSUBR (Fcharset_property);
3465 DEFSUBR (Fcharset_id);
3466 DEFSUBR (Fset_charset_ccl_program);
3467 DEFSUBR (Fset_charset_registry);
3469 DEFSUBR (Fchar_attribute_list);
3470 DEFSUBR (Ffind_char_attribute_table);
3471 DEFSUBR (Fchar_attribute_alist);
3472 DEFSUBR (Fget_char_attribute);
3473 DEFSUBR (Fput_char_attribute);
3474 DEFSUBR (Fremove_char_attribute);
3475 DEFSUBR (Fdefine_char);
3476 DEFSUBR (Fchar_variants);
3477 DEFSUBR (Fget_composite_char);
3478 DEFSUBR (Fcharset_mapping_table);
3479 DEFSUBR (Fset_charset_mapping_table);
3483 DEFSUBR (Fdecode_char);
3484 DEFSUBR (Fdecode_builtin_char);
3486 DEFSUBR (Fmake_char);
3487 DEFSUBR (Fchar_charset);
3488 DEFSUBR (Fchar_octet);
3489 DEFSUBR (Fsplit_char);
3491 #ifdef ENABLE_COMPOSITE_CHARS
3492 DEFSUBR (Fmake_composite_char);
3493 DEFSUBR (Fcomposite_char_string);
3496 defsymbol (&Qcharsetp, "charsetp");
3497 defsymbol (&Qregistry, "registry");
3498 defsymbol (&Qfinal, "final");
3499 defsymbol (&Qgraphic, "graphic");
3500 defsymbol (&Qdirection, "direction");
3501 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3502 defsymbol (&Qshort_name, "short-name");
3503 defsymbol (&Qlong_name, "long-name");
3505 defsymbol (&Ql2r, "l2r");
3506 defsymbol (&Qr2l, "r2l");
3508 /* Charsets, compatible with FSF 20.3
3509 Naming convention is Script-Charset[-Edition] */
3510 defsymbol (&Qascii, "ascii");
3511 defsymbol (&Qcontrol_1, "control-1");
3512 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3513 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3514 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3515 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3516 defsymbol (&Qthai_tis620, "thai-tis620");
3517 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3518 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3519 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3520 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3521 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3522 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3523 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3524 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
3525 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
3526 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
3527 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
3528 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
3529 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
3530 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
3531 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
3533 defsymbol (&Q_ucs, "->ucs");
3534 defsymbol (&Q_decomposition, "->decomposition");
3535 defsymbol (&Qcompat, "compat");
3536 defsymbol (&Qisolated, "isolated");
3537 defsymbol (&Qinitial, "initial");
3538 defsymbol (&Qmedial, "medial");
3539 defsymbol (&Qfinal, "final");
3540 defsymbol (&Qvertical, "vertical");
3541 defsymbol (&QnoBreak, "noBreak");
3542 defsymbol (&Qfraction, "fraction");
3543 defsymbol (&Qsuper, "super");
3544 defsymbol (&Qsub, "sub");
3545 defsymbol (&Qcircle, "circle");
3546 defsymbol (&Qsquare, "square");
3547 defsymbol (&Qwide, "wide");
3548 defsymbol (&Qnarrow, "narrow");
3549 defsymbol (&Qsmall, "small");
3550 defsymbol (&Qfont, "font");
3551 defsymbol (&Qucs, "ucs");
3552 defsymbol (&Qucs_bmp, "ucs-bmp");
3553 defsymbol (&Qucs_cns, "ucs-cns");
3554 defsymbol (&Qlatin_viscii, "latin-viscii");
3555 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3556 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3557 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3558 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3559 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3560 defsymbol (&Qideograph_gt, "ideograph-gt");
3561 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
3562 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
3563 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
3564 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
3565 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
3566 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
3567 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
3568 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
3569 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
3570 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
3571 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
3572 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
3573 defsymbol (&Qchinese_big5, "chinese-big5");
3574 defsymbol (&Qmojikyo, "mojikyo");
3575 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
3576 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
3577 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
3578 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
3579 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
3580 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
3581 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
3582 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
3583 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
3584 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
3585 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
3586 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
3587 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
3588 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
3589 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
3590 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
3591 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
3592 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
3593 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
3594 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
3595 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
3596 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
3597 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3599 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3600 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3602 defsymbol (&Qcomposite, "composite");
3606 vars_of_mule_charset (void)
3613 chlook = xnew (struct charset_lookup);
3614 dumpstruct (&chlook, &charset_lookup_description);
3616 /* Table of charsets indexed by leading byte. */
3617 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3618 chlook->charset_by_leading_byte[i] = Qnil;
3621 /* Table of charsets indexed by type/final-byte. */
3622 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3623 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3624 chlook->charset_by_attributes[i][j] = Qnil;
3626 /* Table of charsets indexed by type/final-byte/direction. */
3627 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3628 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3629 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3630 chlook->charset_by_attributes[i][j][k] = Qnil;
3634 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3636 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3637 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3641 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3642 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3643 Leading-code of private TYPE9N charset of column-width 1.
3645 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3649 Vutf_2000_version = build_string("0.17 (Hōryūji)");
3650 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
3651 Version number of UTF-2000.
3654 staticpro (&Vcharacter_composition_table);
3655 Vcharacter_composition_table = make_char_id_table (Qnil);
3657 staticpro (&Vcharacter_variant_table);
3658 Vcharacter_variant_table = make_char_id_table (Qnil);
3660 Vdefault_coded_charset_priority_list = Qnil;
3661 DEFVAR_LISP ("default-coded-charset-priority-list",
3662 &Vdefault_coded_charset_priority_list /*
3663 Default order of preferred coded-character-sets.
3669 complex_vars_of_mule_charset (void)
3671 staticpro (&Vcharset_hash_table);
3672 Vcharset_hash_table =
3673 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3675 /* Predefined character sets. We store them into variables for
3679 staticpro (&Vchar_attribute_hash_table);
3680 Vchar_attribute_hash_table
3681 = make_lisp_hash_table (16, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3683 staticpro (&Vcharset_ucs);
3685 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
3686 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3687 build_string ("UCS"),
3688 build_string ("UCS"),
3689 build_string ("ISO/IEC 10646"),
3691 Qnil, 0, 0xFFFFFFF, 0, 0);
3692 staticpro (&Vcharset_ucs_bmp);
3694 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3695 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3696 build_string ("BMP"),
3697 build_string ("BMP"),
3698 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3699 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
3700 Qnil, 0, 0xFFFF, 0, 0);
3701 staticpro (&Vcharset_ucs_cns);
3703 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
3704 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3705 build_string ("UCS for CNS"),
3706 build_string ("UCS for CNS 11643"),
3707 build_string ("ISO/IEC 10646 for CNS 11643"),
3711 # define MIN_CHAR_THAI 0
3712 # define MAX_CHAR_THAI 0
3713 # define MIN_CHAR_HEBREW 0
3714 # define MAX_CHAR_HEBREW 0
3715 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3716 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3718 staticpro (&Vcharset_ascii);
3720 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3721 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3722 build_string ("ASCII"),
3723 build_string ("ASCII)"),
3724 build_string ("ASCII (ISO646 IRV)"),
3725 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3726 Qnil, 0, 0x7F, 0, 0);
3727 staticpro (&Vcharset_control_1);
3728 Vcharset_control_1 =
3729 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3730 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3731 build_string ("C1"),
3732 build_string ("Control characters"),
3733 build_string ("Control characters 128-191"),
3735 Qnil, 0x80, 0x9F, 0, 0);
3736 staticpro (&Vcharset_latin_iso8859_1);
3737 Vcharset_latin_iso8859_1 =
3738 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3739 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3740 build_string ("Latin-1"),
3741 build_string ("ISO8859-1 (Latin-1)"),
3742 build_string ("ISO8859-1 (Latin-1)"),
3743 build_string ("iso8859-1"),
3744 Qnil, 0xA0, 0xFF, 0, 32);
3745 staticpro (&Vcharset_latin_iso8859_2);
3746 Vcharset_latin_iso8859_2 =
3747 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3748 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3749 build_string ("Latin-2"),
3750 build_string ("ISO8859-2 (Latin-2)"),
3751 build_string ("ISO8859-2 (Latin-2)"),
3752 build_string ("iso8859-2"),
3754 staticpro (&Vcharset_latin_iso8859_3);
3755 Vcharset_latin_iso8859_3 =
3756 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3757 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3758 build_string ("Latin-3"),
3759 build_string ("ISO8859-3 (Latin-3)"),
3760 build_string ("ISO8859-3 (Latin-3)"),
3761 build_string ("iso8859-3"),
3763 staticpro (&Vcharset_latin_iso8859_4);
3764 Vcharset_latin_iso8859_4 =
3765 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3766 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3767 build_string ("Latin-4"),
3768 build_string ("ISO8859-4 (Latin-4)"),
3769 build_string ("ISO8859-4 (Latin-4)"),
3770 build_string ("iso8859-4"),
3772 staticpro (&Vcharset_thai_tis620);
3773 Vcharset_thai_tis620 =
3774 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3775 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3776 build_string ("TIS620"),
3777 build_string ("TIS620 (Thai)"),
3778 build_string ("TIS620.2529 (Thai)"),
3779 build_string ("tis620"),
3780 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3781 staticpro (&Vcharset_greek_iso8859_7);
3782 Vcharset_greek_iso8859_7 =
3783 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3784 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3785 build_string ("ISO8859-7"),
3786 build_string ("ISO8859-7 (Greek)"),
3787 build_string ("ISO8859-7 (Greek)"),
3788 build_string ("iso8859-7"),
3790 0 /* MIN_CHAR_GREEK */,
3791 0 /* MAX_CHAR_GREEK */, 0, 32);
3792 staticpro (&Vcharset_arabic_iso8859_6);
3793 Vcharset_arabic_iso8859_6 =
3794 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3795 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3796 build_string ("ISO8859-6"),
3797 build_string ("ISO8859-6 (Arabic)"),
3798 build_string ("ISO8859-6 (Arabic)"),
3799 build_string ("iso8859-6"),
3801 staticpro (&Vcharset_hebrew_iso8859_8);
3802 Vcharset_hebrew_iso8859_8 =
3803 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3804 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3805 build_string ("ISO8859-8"),
3806 build_string ("ISO8859-8 (Hebrew)"),
3807 build_string ("ISO8859-8 (Hebrew)"),
3808 build_string ("iso8859-8"),
3809 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3810 staticpro (&Vcharset_katakana_jisx0201);
3811 Vcharset_katakana_jisx0201 =
3812 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3813 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3814 build_string ("JISX0201 Kana"),
3815 build_string ("JISX0201.1976 (Japanese Kana)"),
3816 build_string ("JISX0201.1976 Japanese Kana"),
3817 build_string ("jisx0201\\.1976"),
3819 staticpro (&Vcharset_latin_jisx0201);
3820 Vcharset_latin_jisx0201 =
3821 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3822 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3823 build_string ("JISX0201 Roman"),
3824 build_string ("JISX0201.1976 (Japanese Roman)"),
3825 build_string ("JISX0201.1976 Japanese Roman"),
3826 build_string ("jisx0201\\.1976"),
3828 staticpro (&Vcharset_cyrillic_iso8859_5);
3829 Vcharset_cyrillic_iso8859_5 =
3830 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3831 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3832 build_string ("ISO8859-5"),
3833 build_string ("ISO8859-5 (Cyrillic)"),
3834 build_string ("ISO8859-5 (Cyrillic)"),
3835 build_string ("iso8859-5"),
3837 0 /* MIN_CHAR_CYRILLIC */,
3838 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3839 staticpro (&Vcharset_latin_iso8859_9);
3840 Vcharset_latin_iso8859_9 =
3841 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3842 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3843 build_string ("Latin-5"),
3844 build_string ("ISO8859-9 (Latin-5)"),
3845 build_string ("ISO8859-9 (Latin-5)"),
3846 build_string ("iso8859-9"),
3848 staticpro (&Vcharset_japanese_jisx0208_1978);
3849 Vcharset_japanese_jisx0208_1978 =
3850 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3851 Qjapanese_jisx0208_1978, 94, 2,
3852 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3853 build_string ("JIS X0208:1978"),
3854 build_string ("JIS X0208:1978 (Japanese)"),
3856 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3857 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3859 staticpro (&Vcharset_chinese_gb2312);
3860 Vcharset_chinese_gb2312 =
3861 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3862 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3863 build_string ("GB2312"),
3864 build_string ("GB2312)"),
3865 build_string ("GB2312 Chinese simplified"),
3866 build_string ("gb2312"),
3868 staticpro (&Vcharset_japanese_jisx0208);
3869 Vcharset_japanese_jisx0208 =
3870 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3871 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3872 build_string ("JISX0208"),
3873 build_string ("JIS X0208:1983 (Japanese)"),
3874 build_string ("JIS X0208:1983 Japanese Kanji"),
3875 build_string ("jisx0208\\.1983"),
3878 staticpro (&Vcharset_japanese_jisx0208_1990);
3879 Vcharset_japanese_jisx0208_1990 =
3880 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3881 Qjapanese_jisx0208_1990, 94, 2,
3882 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3883 build_string ("JISX0208-1990"),
3884 build_string ("JIS X0208:1990 (Japanese)"),
3885 build_string ("JIS X0208:1990 Japanese Kanji"),
3886 build_string ("jisx0208\\.1990"),
3888 MIN_CHAR_JIS_X0208_1990,
3889 MAX_CHAR_JIS_X0208_1990, 0, 33);
3891 staticpro (&Vcharset_korean_ksc5601);
3892 Vcharset_korean_ksc5601 =
3893 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3894 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3895 build_string ("KSC5601"),
3896 build_string ("KSC5601 (Korean"),
3897 build_string ("KSC5601 Korean Hangul and Hanja"),
3898 build_string ("ksc5601"),
3900 staticpro (&Vcharset_japanese_jisx0212);
3901 Vcharset_japanese_jisx0212 =
3902 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3903 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3904 build_string ("JISX0212"),
3905 build_string ("JISX0212 (Japanese)"),
3906 build_string ("JISX0212 Japanese Supplement"),
3907 build_string ("jisx0212"),
3910 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3911 staticpro (&Vcharset_chinese_cns11643_1);
3912 Vcharset_chinese_cns11643_1 =
3913 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3914 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3915 build_string ("CNS11643-1"),
3916 build_string ("CNS11643-1 (Chinese traditional)"),
3918 ("CNS 11643 Plane 1 Chinese traditional"),
3919 build_string (CHINESE_CNS_PLANE_RE("1")),
3921 staticpro (&Vcharset_chinese_cns11643_2);
3922 Vcharset_chinese_cns11643_2 =
3923 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3924 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3925 build_string ("CNS11643-2"),
3926 build_string ("CNS11643-2 (Chinese traditional)"),
3928 ("CNS 11643 Plane 2 Chinese traditional"),
3929 build_string (CHINESE_CNS_PLANE_RE("2")),
3932 staticpro (&Vcharset_latin_tcvn5712);
3933 Vcharset_latin_tcvn5712 =
3934 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3935 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3936 build_string ("TCVN 5712"),
3937 build_string ("TCVN 5712 (VSCII-2)"),
3938 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3939 build_string ("tcvn5712-1"),
3941 staticpro (&Vcharset_latin_viscii_lower);
3942 Vcharset_latin_viscii_lower =
3943 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3944 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3945 build_string ("VISCII lower"),
3946 build_string ("VISCII lower (Vietnamese)"),
3947 build_string ("VISCII lower (Vietnamese)"),
3948 build_string ("MULEVISCII-LOWER"),
3950 staticpro (&Vcharset_latin_viscii_upper);
3951 Vcharset_latin_viscii_upper =
3952 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3953 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3954 build_string ("VISCII upper"),
3955 build_string ("VISCII upper (Vietnamese)"),
3956 build_string ("VISCII upper (Vietnamese)"),
3957 build_string ("MULEVISCII-UPPER"),
3959 staticpro (&Vcharset_latin_viscii);
3960 Vcharset_latin_viscii =
3961 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3962 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3963 build_string ("VISCII"),
3964 build_string ("VISCII 1.1 (Vietnamese)"),
3965 build_string ("VISCII 1.1 (Vietnamese)"),
3966 build_string ("VISCII1\\.1"),
3968 staticpro (&Vcharset_chinese_big5);
3969 Vcharset_chinese_big5 =
3970 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3971 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3972 build_string ("Big5"),
3973 build_string ("Big5"),
3974 build_string ("Big5 Chinese traditional"),
3975 build_string ("big5"),
3977 staticpro (&Vcharset_ideograph_gt);
3978 Vcharset_ideograph_gt =
3979 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
3980 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3981 build_string ("GT"),
3982 build_string ("GT"),
3983 build_string ("GT"),
3985 Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0);
3986 #define DEF_GT_PJ(n) \
3987 staticpro (&Vcharset_ideograph_gt_pj_##n); \
3988 Vcharset_ideograph_gt_pj_##n = \
3989 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
3990 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3991 build_string ("GT-PJ-"#n), \
3992 build_string ("GT (pseudo JIS encoding) part "#n), \
3993 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
3995 ("\\(GT2000PJ-"#n "\\|jisx0208\\.GT2000-"#n "\\)$"), \
4009 staticpro (&Vcharset_ideograph_daikanwa);
4010 Vcharset_ideograph_daikanwa =
4011 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
4012 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4013 build_string ("Daikanwa"),
4014 build_string ("Morohashi's Daikanwa"),
4015 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
4016 build_string ("Daikanwa"),
4017 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
4018 staticpro (&Vcharset_mojikyo);
4020 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
4021 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4022 build_string ("Mojikyo"),
4023 build_string ("Mojikyo"),
4024 build_string ("Konjaku-Mojikyo"),
4026 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
4027 staticpro (&Vcharset_mojikyo_2022_1);
4028 Vcharset_mojikyo_2022_1 =
4029 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
4030 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
4031 build_string ("Mojikyo-2022-1"),
4032 build_string ("Mojikyo ISO-2022 Part 1"),
4033 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
4037 #define DEF_MOJIKYO_PJ(n) \
4038 staticpro (&Vcharset_mojikyo_pj_##n); \
4039 Vcharset_mojikyo_pj_##n = \
4040 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
4041 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
4042 build_string ("Mojikyo-PJ-"#n), \
4043 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
4045 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
4047 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
4059 DEF_MOJIKYO_PJ (10);
4060 DEF_MOJIKYO_PJ (11);
4061 DEF_MOJIKYO_PJ (12);
4062 DEF_MOJIKYO_PJ (13);
4063 DEF_MOJIKYO_PJ (14);
4064 DEF_MOJIKYO_PJ (15);
4065 DEF_MOJIKYO_PJ (16);
4066 DEF_MOJIKYO_PJ (17);
4067 DEF_MOJIKYO_PJ (18);
4068 DEF_MOJIKYO_PJ (19);
4069 DEF_MOJIKYO_PJ (20);
4070 DEF_MOJIKYO_PJ (21);
4072 staticpro (&Vcharset_ethiopic_ucs);
4073 Vcharset_ethiopic_ucs =
4074 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
4075 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4076 build_string ("Ethiopic (UCS)"),
4077 build_string ("Ethiopic (UCS)"),
4078 build_string ("Ethiopic of UCS"),
4079 build_string ("Ethiopic-Unicode"),
4080 Qnil, 0x1200, 0x137F, 0x1200, 0);
4082 staticpro (&Vcharset_chinese_big5_1);
4083 Vcharset_chinese_big5_1 =
4084 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
4085 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
4086 build_string ("Big5"),
4087 build_string ("Big5 (Level-1)"),
4089 ("Big5 Level-1 Chinese traditional"),
4090 build_string ("big5"),
4092 staticpro (&Vcharset_chinese_big5_2);
4093 Vcharset_chinese_big5_2 =
4094 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
4095 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
4096 build_string ("Big5"),
4097 build_string ("Big5 (Level-2)"),
4099 ("Big5 Level-2 Chinese traditional"),
4100 build_string ("big5"),
4103 #ifdef ENABLE_COMPOSITE_CHARS
4104 /* #### For simplicity, we put composite chars into a 96x96 charset.
4105 This is going to lead to problems because you can run out of
4106 room, esp. as we don't yet recycle numbers. */
4107 staticpro (&Vcharset_composite);
4108 Vcharset_composite =
4109 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
4110 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4111 build_string ("Composite"),
4112 build_string ("Composite characters"),
4113 build_string ("Composite characters"),
4116 /* #### not dumped properly */
4117 composite_char_row_next = 32;
4118 composite_char_col_next = 32;
4120 Vcomposite_char_string2char_hash_table =
4121 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
4122 Vcomposite_char_char2string_hash_table =
4123 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
4124 staticpro (&Vcomposite_char_string2char_hash_table);
4125 staticpro (&Vcomposite_char_char2string_hash_table);
4126 #endif /* ENABLE_COMPOSITE_CHARS */