1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_japanese_jisx0208;
60 Lisp_Object Vcharset_japanese_jisx0208_1990;
61 Lisp_Object Vcharset_korean_ksc5601;
62 Lisp_Object Vcharset_japanese_jisx0212;
63 Lisp_Object Vcharset_chinese_cns11643_1;
64 Lisp_Object Vcharset_chinese_cns11643_2;
66 Lisp_Object Vcharset_ucs;
67 Lisp_Object Vcharset_ucs_bmp;
68 Lisp_Object Vcharset_ucs_cns;
69 Lisp_Object Vcharset_latin_viscii;
70 Lisp_Object Vcharset_latin_tcvn5712;
71 Lisp_Object Vcharset_latin_viscii_lower;
72 Lisp_Object Vcharset_latin_viscii_upper;
73 Lisp_Object Vcharset_chinese_big5;
74 Lisp_Object Vcharset_ideograph_gt;
75 Lisp_Object Vcharset_ideograph_gt_pj_1;
76 Lisp_Object Vcharset_ideograph_gt_pj_2;
77 Lisp_Object Vcharset_ideograph_gt_pj_3;
78 Lisp_Object Vcharset_ideograph_gt_pj_4;
79 Lisp_Object Vcharset_ideograph_gt_pj_5;
80 Lisp_Object Vcharset_ideograph_gt_pj_6;
81 Lisp_Object Vcharset_ideograph_gt_pj_7;
82 Lisp_Object Vcharset_ideograph_gt_pj_8;
83 Lisp_Object Vcharset_ideograph_gt_pj_9;
84 Lisp_Object Vcharset_ideograph_gt_pj_10;
85 Lisp_Object Vcharset_ideograph_gt_pj_11;
86 Lisp_Object Vcharset_ideograph_daikanwa;
87 Lisp_Object Vcharset_mojikyo;
88 Lisp_Object Vcharset_mojikyo_2022_1;
89 Lisp_Object Vcharset_mojikyo_pj_1;
90 Lisp_Object Vcharset_mojikyo_pj_2;
91 Lisp_Object Vcharset_mojikyo_pj_3;
92 Lisp_Object Vcharset_mojikyo_pj_4;
93 Lisp_Object Vcharset_mojikyo_pj_5;
94 Lisp_Object Vcharset_mojikyo_pj_6;
95 Lisp_Object Vcharset_mojikyo_pj_7;
96 Lisp_Object Vcharset_mojikyo_pj_8;
97 Lisp_Object Vcharset_mojikyo_pj_9;
98 Lisp_Object Vcharset_mojikyo_pj_10;
99 Lisp_Object Vcharset_mojikyo_pj_11;
100 Lisp_Object Vcharset_mojikyo_pj_12;
101 Lisp_Object Vcharset_mojikyo_pj_13;
102 Lisp_Object Vcharset_mojikyo_pj_14;
103 Lisp_Object Vcharset_mojikyo_pj_15;
104 Lisp_Object Vcharset_mojikyo_pj_16;
105 Lisp_Object Vcharset_mojikyo_pj_17;
106 Lisp_Object Vcharset_mojikyo_pj_18;
107 Lisp_Object Vcharset_mojikyo_pj_19;
108 Lisp_Object Vcharset_mojikyo_pj_20;
109 Lisp_Object Vcharset_mojikyo_pj_21;
110 Lisp_Object Vcharset_ethiopic_ucs;
112 Lisp_Object Vcharset_chinese_big5_1;
113 Lisp_Object Vcharset_chinese_big5_2;
115 #ifdef ENABLE_COMPOSITE_CHARS
116 Lisp_Object Vcharset_composite;
118 /* Hash tables for composite chars. One maps string representing
119 composed chars to their equivalent chars; one goes the
121 Lisp_Object Vcomposite_char_char2string_hash_table;
122 Lisp_Object Vcomposite_char_string2char_hash_table;
124 static int composite_char_row_next;
125 static int composite_char_col_next;
127 #endif /* ENABLE_COMPOSITE_CHARS */
129 struct charset_lookup *chlook;
131 static const struct lrecord_description charset_lookup_description_1[] = {
132 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
141 static const struct struct_description charset_lookup_description = {
142 sizeof (struct charset_lookup),
143 charset_lookup_description_1
147 /* Table of number of bytes in the string representation of a character
148 indexed by the first byte of that representation.
150 rep_bytes_by_first_byte(c) is more efficient than the equivalent
151 canonical computation:
153 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
155 const Bytecount rep_bytes_by_first_byte[0xA0] =
156 { /* 0x00 - 0x7f are for straight ASCII */
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
165 /* 0x80 - 0x8f are for Dimension-1 official charsets */
167 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
169 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
171 /* 0x90 - 0x9d are for Dimension-2 official charsets */
172 /* 0x9e is for Dimension-1 private charsets */
173 /* 0x9f is for Dimension-2 private charsets */
174 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
180 #define BT_UINT8_MIN 0
181 #define BT_UINT8_MAX (UCHAR_MAX - 3)
182 #define BT_UINT8_t (UCHAR_MAX - 2)
183 #define BT_UINT8_nil (UCHAR_MAX - 1)
184 #define BT_UINT8_unbound UCHAR_MAX
186 INLINE_HEADER int INT_UINT8_P (Lisp_Object obj);
187 INLINE_HEADER int UINT8_VALUE_P (Lisp_Object obj);
188 INLINE_HEADER unsigned char UINT8_ENCODE (Lisp_Object obj);
189 INLINE_HEADER Lisp_Object UINT8_DECODE (unsigned char n);
190 INLINE_HEADER unsigned short UINT8_TO_UINT16 (unsigned char n);
193 INT_UINT8_P (Lisp_Object obj)
197 int num = XINT (obj);
199 return (BT_UINT8_MIN <= num) && (num <= BT_UINT8_MAX);
206 UINT8_VALUE_P (Lisp_Object obj)
208 return EQ (obj, Qunbound)
209 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT8_P (obj);
212 INLINE_HEADER unsigned char
213 UINT8_ENCODE (Lisp_Object obj)
215 if (EQ (obj, Qunbound))
216 return BT_UINT8_unbound;
217 else if (EQ (obj, Qnil))
219 else if (EQ (obj, Qt))
225 INLINE_HEADER Lisp_Object
226 UINT8_DECODE (unsigned char n)
228 if (n == BT_UINT8_unbound)
230 else if (n == BT_UINT8_nil)
232 else if (n == BT_UINT8_t)
239 mark_uint8_byte_table (Lisp_Object obj)
245 print_uint8_byte_table (Lisp_Object obj,
246 Lisp_Object printcharfun, int escapeflag)
248 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
250 struct gcpro gcpro1, gcpro2;
251 GCPRO2 (obj, printcharfun);
253 write_c_string ("\n#<uint8-byte-table", printcharfun);
254 for (i = 0; i < 256; i++)
256 unsigned char n = bte->property[i];
258 write_c_string ("\n ", printcharfun);
259 write_c_string (" ", printcharfun);
260 if (n == BT_UINT8_unbound)
261 write_c_string ("void", printcharfun);
262 else if (n == BT_UINT8_nil)
263 write_c_string ("nil", printcharfun);
264 else if (n == BT_UINT8_t)
265 write_c_string ("t", printcharfun);
270 sprintf (buf, "%hd", n);
271 write_c_string (buf, printcharfun);
275 write_c_string (">", printcharfun);
279 uint8_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
281 Lisp_Uint8_Byte_Table *te1 = XUINT8_BYTE_TABLE (obj1);
282 Lisp_Uint8_Byte_Table *te2 = XUINT8_BYTE_TABLE (obj2);
285 for (i = 0; i < 256; i++)
286 if (te1->property[i] != te2->property[i])
292 uint8_byte_table_hash (Lisp_Object obj, int depth)
294 Lisp_Uint8_Byte_Table *te = XUINT8_BYTE_TABLE (obj);
298 for (i = 0; i < 256; i++)
299 hash = HASH2 (hash, te->property[i]);
303 DEFINE_LRECORD_IMPLEMENTATION ("uint8-byte-table", uint8_byte_table,
304 mark_uint8_byte_table,
305 print_uint8_byte_table,
306 0, uint8_byte_table_equal,
307 uint8_byte_table_hash,
308 0 /* uint8_byte_table_description */,
309 Lisp_Uint8_Byte_Table);
312 make_uint8_byte_table (unsigned char initval)
316 Lisp_Uint8_Byte_Table *cte;
318 cte = alloc_lcrecord_type (Lisp_Uint8_Byte_Table,
319 &lrecord_uint8_byte_table);
321 for (i = 0; i < 256; i++)
322 cte->property[i] = initval;
324 XSETUINT8_BYTE_TABLE (obj, cte);
329 uint8_byte_table_same_value_p (Lisp_Object obj)
331 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
332 unsigned char v0 = bte->property[0];
335 for (i = 1; i < 256; i++)
337 if (bte->property[i] != v0)
344 #define BT_UINT16_MIN 0
345 #define BT_UINT16_MAX (USHRT_MAX - 3)
346 #define BT_UINT16_t (USHRT_MAX - 2)
347 #define BT_UINT16_nil (USHRT_MAX - 1)
348 #define BT_UINT16_unbound USHRT_MAX
350 INLINE_HEADER int INT_UINT16_P (Lisp_Object obj);
351 INLINE_HEADER int UINT16_VALUE_P (Lisp_Object obj);
352 INLINE_HEADER unsigned short UINT16_ENCODE (Lisp_Object obj);
353 INLINE_HEADER Lisp_Object UINT16_DECODE (unsigned short us);
356 INT_UINT16_P (Lisp_Object obj)
360 int num = XINT (obj);
362 return (BT_UINT16_MIN <= num) && (num <= BT_UINT16_MAX);
369 UINT16_VALUE_P (Lisp_Object obj)
371 return EQ (obj, Qunbound)
372 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT16_P (obj);
375 INLINE_HEADER unsigned short
376 UINT16_ENCODE (Lisp_Object obj)
378 if (EQ (obj, Qunbound))
379 return BT_UINT16_unbound;
380 else if (EQ (obj, Qnil))
381 return BT_UINT16_nil;
382 else if (EQ (obj, Qt))
388 INLINE_HEADER Lisp_Object
389 UINT16_DECODE (unsigned short n)
391 if (n == BT_UINT16_unbound)
393 else if (n == BT_UINT16_nil)
395 else if (n == BT_UINT16_t)
401 INLINE_HEADER unsigned short
402 UINT8_TO_UINT16 (unsigned char n)
404 if (n == BT_UINT8_unbound)
405 return BT_UINT16_unbound;
406 else if (n == BT_UINT8_nil)
407 return BT_UINT16_nil;
408 else if (n == BT_UINT8_t)
415 mark_uint16_byte_table (Lisp_Object obj)
421 print_uint16_byte_table (Lisp_Object obj,
422 Lisp_Object printcharfun, int escapeflag)
424 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
426 struct gcpro gcpro1, gcpro2;
427 GCPRO2 (obj, printcharfun);
429 write_c_string ("\n#<uint16-byte-table", printcharfun);
430 for (i = 0; i < 256; i++)
432 unsigned short n = bte->property[i];
434 write_c_string ("\n ", printcharfun);
435 write_c_string (" ", printcharfun);
436 if (n == BT_UINT16_unbound)
437 write_c_string ("void", printcharfun);
438 else if (n == BT_UINT16_nil)
439 write_c_string ("nil", printcharfun);
440 else if (n == BT_UINT16_t)
441 write_c_string ("t", printcharfun);
446 sprintf (buf, "%hd", n);
447 write_c_string (buf, printcharfun);
451 write_c_string (">", printcharfun);
455 uint16_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
457 Lisp_Uint16_Byte_Table *te1 = XUINT16_BYTE_TABLE (obj1);
458 Lisp_Uint16_Byte_Table *te2 = XUINT16_BYTE_TABLE (obj2);
461 for (i = 0; i < 256; i++)
462 if (te1->property[i] != te2->property[i])
468 uint16_byte_table_hash (Lisp_Object obj, int depth)
470 Lisp_Uint16_Byte_Table *te = XUINT16_BYTE_TABLE (obj);
474 for (i = 0; i < 256; i++)
475 hash = HASH2 (hash, te->property[i]);
479 DEFINE_LRECORD_IMPLEMENTATION ("uint16-byte-table", uint16_byte_table,
480 mark_uint16_byte_table,
481 print_uint16_byte_table,
482 0, uint16_byte_table_equal,
483 uint16_byte_table_hash,
484 0 /* uint16_byte_table_description */,
485 Lisp_Uint16_Byte_Table);
488 make_uint16_byte_table (unsigned short initval)
492 Lisp_Uint16_Byte_Table *cte;
494 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
495 &lrecord_uint16_byte_table);
497 for (i = 0; i < 256; i++)
498 cte->property[i] = initval;
500 XSETUINT16_BYTE_TABLE (obj, cte);
505 expand_uint8_byte_table_to_uint16 (Lisp_Object table)
509 Lisp_Uint8_Byte_Table* bte = XUINT8_BYTE_TABLE(table);
510 Lisp_Uint16_Byte_Table* cte;
512 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
513 &lrecord_uint16_byte_table);
514 for (i = 0; i < 256; i++)
516 cte->property[i] = UINT8_TO_UINT16 (bte->property[i]);
518 XSETUINT16_BYTE_TABLE (obj, cte);
523 uint16_byte_table_same_value_p (Lisp_Object obj)
525 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
526 unsigned short v0 = bte->property[0];
529 for (i = 1; i < 256; i++)
531 if (bte->property[i] != v0)
539 mark_byte_table (Lisp_Object obj)
541 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
544 for (i = 0; i < 256; i++)
546 mark_object (cte->property[i]);
552 print_byte_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
554 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
556 struct gcpro gcpro1, gcpro2;
557 GCPRO2 (obj, printcharfun);
559 write_c_string ("\n#<byte-table", printcharfun);
560 for (i = 0; i < 256; i++)
562 Lisp_Object elt = bte->property[i];
564 write_c_string ("\n ", printcharfun);
565 write_c_string (" ", printcharfun);
566 if (EQ (elt, Qunbound))
567 write_c_string ("void", printcharfun);
569 print_internal (elt, printcharfun, escapeflag);
572 write_c_string (">", printcharfun);
576 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
578 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
579 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
582 for (i = 0; i < 256; i++)
583 if (BYTE_TABLE_P (cte1->property[i]))
585 if (BYTE_TABLE_P (cte2->property[i]))
587 if (!byte_table_equal (cte1->property[i],
588 cte2->property[i], depth + 1))
595 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
601 byte_table_hash (Lisp_Object obj, int depth)
603 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
605 return internal_array_hash (cte->property, 256, depth);
608 static const struct lrecord_description byte_table_description[] = {
609 { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Byte_Table, property), 256 },
613 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
618 byte_table_description,
622 make_byte_table (Lisp_Object initval)
626 Lisp_Byte_Table *cte;
628 cte = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
630 for (i = 0; i < 256; i++)
631 cte->property[i] = initval;
633 XSETBYTE_TABLE (obj, cte);
638 byte_table_same_value_p (Lisp_Object obj)
640 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
641 Lisp_Object v0 = bte->property[0];
644 for (i = 1; i < 256; i++)
646 if (!internal_equal (bte->property[i], v0, 0))
653 Lisp_Object get_byte_table (Lisp_Object table, unsigned char idx);
654 Lisp_Object put_byte_table (Lisp_Object table, unsigned char idx,
658 get_byte_table (Lisp_Object table, unsigned char idx)
660 if (UINT8_BYTE_TABLE_P (table))
661 return UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[idx]);
662 else if (UINT16_BYTE_TABLE_P (table))
663 return UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[idx]);
664 else if (BYTE_TABLE_P (table))
665 return XBYTE_TABLE(table)->property[idx];
671 put_byte_table (Lisp_Object table, unsigned char idx, Lisp_Object value)
673 if (UINT8_BYTE_TABLE_P (table))
675 if (UINT8_VALUE_P (value))
677 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
678 if (!UINT8_BYTE_TABLE_P (value) &&
679 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
680 && uint8_byte_table_same_value_p (table))
685 else if (UINT16_VALUE_P (value))
687 Lisp_Object new = expand_uint8_byte_table_to_uint16 (table);
689 XUINT16_BYTE_TABLE(new)->property[idx] = UINT16_ENCODE (value);
694 Lisp_Object new = make_byte_table (Qnil);
697 for (i = 0; i < 256; i++)
699 XBYTE_TABLE(new)->property[i]
700 = UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[i]);
702 XBYTE_TABLE(new)->property[idx] = value;
706 else if (UINT16_BYTE_TABLE_P (table))
708 if (UINT16_VALUE_P (value))
710 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
711 if (!UINT8_BYTE_TABLE_P (value) &&
712 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
713 && uint16_byte_table_same_value_p (table))
720 Lisp_Object new = make_byte_table (Qnil);
723 for (i = 0; i < 256; i++)
725 XBYTE_TABLE(new)->property[i]
726 = UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[i]);
728 XBYTE_TABLE(new)->property[idx] = value;
732 else if (BYTE_TABLE_P (table))
734 XBYTE_TABLE(table)->property[idx] = value;
735 if (!UINT8_BYTE_TABLE_P (value) &&
736 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
737 && byte_table_same_value_p (table))
742 else if (!internal_equal (table, value, 0))
744 if (UINT8_VALUE_P (table) && UINT8_VALUE_P (value))
746 table = make_uint8_byte_table (UINT8_ENCODE (table));
747 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
749 else if (UINT16_VALUE_P (table) && UINT16_VALUE_P (value))
751 table = make_uint16_byte_table (UINT16_ENCODE (table));
752 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
756 table = make_byte_table (table);
757 XBYTE_TABLE(table)->property[idx] = value;
764 mark_char_id_table (Lisp_Object obj)
766 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
772 print_char_id_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
774 Lisp_Object table = XCHAR_ID_TABLE (obj)->table;
776 struct gcpro gcpro1, gcpro2;
777 GCPRO2 (obj, printcharfun);
779 write_c_string ("#<char-id-table ", printcharfun);
780 for (i = 0; i < 256; i++)
782 Lisp_Object elt = get_byte_table (table, i);
783 if (i != 0) write_c_string ("\n ", printcharfun);
784 if (EQ (elt, Qunbound))
785 write_c_string ("void", printcharfun);
787 print_internal (elt, printcharfun, escapeflag);
790 write_c_string (">", printcharfun);
794 char_id_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
796 Lisp_Object table1 = XCHAR_ID_TABLE (obj1)->table;
797 Lisp_Object table2 = XCHAR_ID_TABLE (obj2)->table;
800 for (i = 0; i < 256; i++)
802 if (!internal_equal (get_byte_table (table1, i),
803 get_byte_table (table2, i), 0))
810 char_id_table_hash (Lisp_Object obj, int depth)
812 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
814 return char_id_table_hash (cte->table, depth + 1);
817 static const struct lrecord_description char_id_table_description[] = {
818 { XD_LISP_OBJECT, offsetof(Lisp_Char_ID_Table, table) },
822 DEFINE_LRECORD_IMPLEMENTATION ("char-id-table", char_id_table,
825 0, char_id_table_equal,
827 char_id_table_description,
831 make_char_id_table (Lisp_Object initval)
834 Lisp_Char_ID_Table *cte;
836 cte = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
838 cte->table = make_byte_table (initval);
840 XSETCHAR_ID_TABLE (obj, cte);
846 get_char_id_table (Emchar ch, Lisp_Object table)
848 unsigned int code = ch;
855 (XCHAR_ID_TABLE (table)->table,
856 (unsigned char)(code >> 24)),
857 (unsigned char) (code >> 16)),
858 (unsigned char) (code >> 8)),
859 (unsigned char) code);
862 void put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table);
864 put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table)
866 unsigned int code = ch;
867 Lisp_Object table1, table2, table3, table4;
869 table1 = XCHAR_ID_TABLE (table)->table;
870 table2 = get_byte_table (table1, (unsigned char)(code >> 24));
871 table3 = get_byte_table (table2, (unsigned char)(code >> 16));
872 table4 = get_byte_table (table3, (unsigned char)(code >> 8));
874 table4 = put_byte_table (table4, (unsigned char)code, value);
875 table3 = put_byte_table (table3, (unsigned char)(code >> 8), table4);
876 table2 = put_byte_table (table2, (unsigned char)(code >> 16), table3);
877 XCHAR_ID_TABLE (table)->table
878 = put_byte_table (table1, (unsigned char)(code >> 24), table2);
882 Lisp_Object Vchar_attribute_hash_table;
883 Lisp_Object Vcharacter_composition_table;
884 Lisp_Object Vcharacter_variant_table;
886 Lisp_Object Qideograph_daikanwa;
887 Lisp_Object Q_decomposition;
891 Lisp_Object Qisolated;
892 Lisp_Object Qinitial;
895 Lisp_Object Qvertical;
896 Lisp_Object QnoBreak;
897 Lisp_Object Qfraction;
907 Emchar to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
909 Lisp_Object put_char_ccs_code_point (Lisp_Object character,
910 Lisp_Object ccs, Lisp_Object value);
911 Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
914 to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
920 else if (EQ (v, Qcompat))
922 else if (EQ (v, Qisolated))
924 else if (EQ (v, Qinitial))
926 else if (EQ (v, Qmedial))
928 else if (EQ (v, Qfinal))
930 else if (EQ (v, Qvertical))
932 else if (EQ (v, QnoBreak))
934 else if (EQ (v, Qfraction))
936 else if (EQ (v, Qsuper))
938 else if (EQ (v, Qsub))
940 else if (EQ (v, Qcircle))
942 else if (EQ (v, Qsquare))
944 else if (EQ (v, Qwide))
946 else if (EQ (v, Qnarrow))
948 else if (EQ (v, Qsmall))
950 else if (EQ (v, Qfont))
953 signal_simple_error (err_msg, err_arg);
956 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
957 Return character corresponding with list.
961 Lisp_Object table = Vcharacter_composition_table;
962 Lisp_Object rest = list;
966 Lisp_Object v = Fcar (rest);
968 Emchar c = to_char_id (v, "Invalid value for composition", list);
970 ret = get_char_id_table (c, table);
975 if (!CHAR_ID_TABLE_P (ret))
980 else if (!CONSP (rest))
982 else if (CHAR_ID_TABLE_P (ret))
985 signal_simple_error ("Invalid table is found with", list);
987 signal_simple_error ("Invalid value for composition", list);
990 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
991 Return variants of CHARACTER.
995 CHECK_CHAR (character);
996 return Fcopy_list (get_char_id_table (XCHAR (character),
997 Vcharacter_variant_table));
1001 /* We store the char-attributes in hash tables with the names as the
1002 key and the actual char-id-table object as the value. Occasionally
1003 we need to use them in a list format. These routines provide us
1005 struct char_attribute_list_closure
1007 Lisp_Object *char_attribute_list;
1011 add_char_attribute_to_list_mapper (Lisp_Object key, Lisp_Object value,
1012 void *char_attribute_list_closure)
1014 /* This function can GC */
1015 struct char_attribute_list_closure *calcl
1016 = (struct char_attribute_list_closure*) char_attribute_list_closure;
1017 Lisp_Object *char_attribute_list = calcl->char_attribute_list;
1019 *char_attribute_list = Fcons (key, *char_attribute_list);
1023 DEFUN ("char-attribute-list", Fchar_attribute_list, 0, 0, 0, /*
1024 Return the list of all existing character attributes except coded-charsets.
1028 Lisp_Object char_attribute_list = Qnil;
1029 struct gcpro gcpro1;
1030 struct char_attribute_list_closure char_attribute_list_closure;
1032 GCPRO1 (char_attribute_list);
1033 char_attribute_list_closure.char_attribute_list = &char_attribute_list;
1034 elisp_maphash (add_char_attribute_to_list_mapper,
1035 Vchar_attribute_hash_table,
1036 &char_attribute_list_closure);
1038 return char_attribute_list;
1041 DEFUN ("find-char-attribute-table", Ffind_char_attribute_table, 1, 1, 0, /*
1042 Return char-id-table corresponding to ATTRIBUTE.
1046 return Fgethash (attribute, Vchar_attribute_hash_table, Qnil);
1050 /* We store the char-id-tables in hash tables with the attributes as
1051 the key and the actual char-id-table object as the value. Each
1052 char-id-table stores values of an attribute corresponding with
1053 characters. Occasionally we need to get attributes of a character
1054 in a association-list format. These routines provide us with
1056 struct char_attribute_alist_closure
1059 Lisp_Object *char_attribute_alist;
1063 add_char_attribute_alist_mapper (Lisp_Object key, Lisp_Object value,
1064 void *char_attribute_alist_closure)
1066 /* This function can GC */
1067 struct char_attribute_alist_closure *caacl =
1068 (struct char_attribute_alist_closure*) char_attribute_alist_closure;
1069 Lisp_Object ret = get_char_id_table (caacl->char_id, value);
1070 if (!UNBOUNDP (ret))
1072 Lisp_Object *char_attribute_alist = caacl->char_attribute_alist;
1073 *char_attribute_alist
1074 = Fcons (Fcons (key, ret), *char_attribute_alist);
1079 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
1080 Return the alist of attributes of CHARACTER.
1084 Lisp_Object alist = Qnil;
1087 CHECK_CHAR (character);
1089 struct gcpro gcpro1;
1090 struct char_attribute_alist_closure char_attribute_alist_closure;
1093 char_attribute_alist_closure.char_id = XCHAR (character);
1094 char_attribute_alist_closure.char_attribute_alist = &alist;
1095 elisp_maphash (add_char_attribute_alist_mapper,
1096 Vchar_attribute_hash_table,
1097 &char_attribute_alist_closure);
1101 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1103 Lisp_Object ccs = chlook->charset_by_leading_byte[i];
1107 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1110 if ( CHAR_ID_TABLE_P (encoding_table)
1111 && INTP (cpos = get_char_id_table (XCHAR (character),
1114 alist = Fcons (Fcons (ccs, cpos), alist);
1121 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 3, 0, /*
1122 Return the value of CHARACTER's ATTRIBUTE.
1123 Return DEFAULT-VALUE if the value is not exist.
1125 (character, attribute, default_value))
1129 CHECK_CHAR (character);
1130 if (!NILP (ccs = Ffind_charset (attribute)))
1132 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1134 if (CHAR_ID_TABLE_P (encoding_table))
1135 return get_char_id_table (XCHAR (character), encoding_table);
1139 Lisp_Object table = Fgethash (attribute,
1140 Vchar_attribute_hash_table,
1142 if (!UNBOUNDP (table))
1144 Lisp_Object ret = get_char_id_table (XCHAR (character), table);
1145 if (!UNBOUNDP (ret))
1149 return default_value;
1152 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
1153 Store CHARACTER's ATTRIBUTE with VALUE.
1155 (character, attribute, value))
1159 CHECK_CHAR (character);
1160 ccs = Ffind_charset (attribute);
1163 return put_char_ccs_code_point (character, ccs, value);
1165 else if (EQ (attribute, Q_decomposition))
1170 signal_simple_error ("Invalid value for ->decomposition",
1173 if (CONSP (Fcdr (value)))
1175 Lisp_Object rest = value;
1176 Lisp_Object table = Vcharacter_composition_table;
1180 GET_EXTERNAL_LIST_LENGTH (rest, len);
1181 seq = make_vector (len, Qnil);
1183 while (CONSP (rest))
1185 Lisp_Object v = Fcar (rest);
1188 = to_char_id (v, "Invalid value for ->decomposition", value);
1191 XVECTOR_DATA(seq)[i++] = v;
1193 XVECTOR_DATA(seq)[i++] = make_char (c);
1197 put_char_id_table (c, character, table);
1202 ntable = get_char_id_table (c, table);
1203 if (!CHAR_ID_TABLE_P (ntable))
1205 ntable = make_char_id_table (Qnil);
1206 put_char_id_table (c, ntable, table);
1214 Lisp_Object v = Fcar (value);
1218 Emchar c = XINT (v);
1220 = get_char_id_table (c, Vcharacter_variant_table);
1222 if (NILP (Fmemq (v, ret)))
1224 put_char_id_table (c, Fcons (character, ret),
1225 Vcharacter_variant_table);
1228 seq = make_vector (1, v);
1232 else if (EQ (attribute, Q_ucs))
1238 signal_simple_error ("Invalid value for ->ucs", value);
1242 ret = get_char_id_table (c, Vcharacter_variant_table);
1243 if (NILP (Fmemq (character, ret)))
1245 put_char_id_table (c, Fcons (character, ret),
1246 Vcharacter_variant_table);
1250 Lisp_Object table = Fgethash (attribute,
1251 Vchar_attribute_hash_table,
1256 table = make_char_id_table (Qunbound);
1257 Fputhash (attribute, table, Vchar_attribute_hash_table);
1259 put_char_id_table (XCHAR (character), value, table);
1264 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
1265 Remove CHARACTER's ATTRIBUTE.
1267 (character, attribute))
1271 CHECK_CHAR (character);
1272 ccs = Ffind_charset (attribute);
1275 return remove_char_ccs (character, ccs);
1279 Lisp_Object table = Fgethash (attribute,
1280 Vchar_attribute_hash_table,
1282 if (!UNBOUNDP (table))
1284 put_char_id_table (XCHAR (character), Qunbound, table);
1291 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
1293 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
1295 /* ad-hoc method for `ascii' */
1296 if ((CHARSET_CHARS (cs) == 94) &&
1297 (CHARSET_BYTE_OFFSET (cs) != 33))
1298 return 128 - CHARSET_BYTE_OFFSET (cs);
1300 return CHARSET_CHARS (cs);
1303 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
1305 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
1307 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
1311 if (XVECTOR_LENGTH (v) > ccs_len)
1314 for (i = 0; i < XVECTOR_LENGTH (v); i++)
1316 Lisp_Object c = XVECTOR_DATA(v)[i];
1318 if (!NILP (c) && !CHARP (c))
1322 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
1334 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1337 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1347 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1348 nv = XVECTOR_DATA(v)[i];
1354 XVECTOR_DATA(v)[i] = Qnil;
1358 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1359 int code_point, Lisp_Object character);
1361 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1362 int code_point, Lisp_Object character)
1366 int ccs_len = XVECTOR_LENGTH (v);
1371 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1372 nv = XVECTOR_DATA(v)[i];
1376 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
1382 XVECTOR_DATA(v)[i] = character;
1386 put_char_ccs_code_point (Lisp_Object character,
1387 Lisp_Object ccs, Lisp_Object value)
1389 Lisp_Object encoding_table;
1391 if (!EQ (XCHARSET_NAME (ccs), Qucs)
1392 || (XCHAR (character) != XINT (value)))
1394 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
1395 int dim = XCHARSET_DIMENSION (ccs);
1396 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
1397 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
1401 { /* obsolete representation: value must be a list of bytes */
1402 Lisp_Object ret = Fcar (value);
1406 signal_simple_error ("Invalid value for coded-charset", value);
1407 code_point = XINT (ret);
1408 if (XCHARSET_GRAPHIC (ccs) == 1)
1410 rest = Fcdr (value);
1411 while (!NILP (rest))
1416 signal_simple_error ("Invalid value for coded-charset",
1420 signal_simple_error ("Invalid value for coded-charset",
1423 if (XCHARSET_GRAPHIC (ccs) == 1)
1425 code_point = (code_point << 8) | j;
1428 value = make_int (code_point);
1430 else if (INTP (value))
1432 code_point = XINT (value);
1433 if (XCHARSET_GRAPHIC (ccs) == 1)
1435 code_point &= 0x7F7F7F7F;
1436 value = make_int (code_point);
1440 signal_simple_error ("Invalid value for coded-charset", value);
1444 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1447 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
1452 XCHARSET_DECODING_TABLE (ccs)
1453 = v = make_older_vector (ccs_len, Qnil);
1456 decoding_table_put_char (v, dim, byte_offset, code_point, character);
1458 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
1460 XCHARSET_ENCODING_TABLE (ccs)
1461 = encoding_table = make_char_id_table (Qnil);
1463 put_char_id_table (XCHAR (character), value, encoding_table);
1468 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
1470 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1471 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1473 if (VECTORP (decoding_table))
1475 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1479 decoding_table_remove_char (decoding_table,
1480 XCHARSET_DIMENSION (ccs),
1481 XCHARSET_BYTE_OFFSET (ccs),
1485 if (CHAR_ID_TABLE_P (encoding_table))
1487 put_char_id_table (XCHAR (character), Qnil, encoding_table);
1492 EXFUN (Fmake_char, 3);
1493 EXFUN (Fdecode_char, 2);
1495 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
1496 Store character's ATTRIBUTES.
1500 Lisp_Object rest = attributes;
1501 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
1502 Lisp_Object character;
1506 while (CONSP (rest))
1508 Lisp_Object cell = Fcar (rest);
1512 signal_simple_error ("Invalid argument", attributes);
1513 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
1514 && ((XCHARSET_FINAL (ccs) != 0) ||
1515 (XCHARSET_UCS_MAX (ccs) > 0)) )
1519 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1521 character = Fdecode_char (ccs, cell);
1522 goto setup_attributes;
1526 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
1529 signal_simple_error ("Invalid argument", attributes);
1531 character = make_char (XINT (code) + 0x100000);
1532 goto setup_attributes;
1536 else if (!INTP (code))
1537 signal_simple_error ("Invalid argument", attributes);
1539 character = make_char (XINT (code));
1543 while (CONSP (rest))
1545 Lisp_Object cell = Fcar (rest);
1548 signal_simple_error ("Invalid argument", attributes);
1550 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
1556 Lisp_Object Vutf_2000_version;
1560 int leading_code_private_11;
1563 Lisp_Object Qcharsetp;
1565 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1566 Lisp_Object Qregistry, Qfinal, Qgraphic;
1567 Lisp_Object Qdirection;
1568 Lisp_Object Qreverse_direction_charset;
1569 Lisp_Object Qleading_byte;
1570 Lisp_Object Qshort_name, Qlong_name;
1584 Qcyrillic_iso8859_5,
1586 Qjapanese_jisx0208_1978,
1589 Qjapanese_jisx0208_1990,
1592 Qchinese_cns11643_1,
1593 Qchinese_cns11643_2,
1599 Qlatin_viscii_lower,
1600 Qlatin_viscii_upper,
1601 Qvietnamese_viscii_lower,
1602 Qvietnamese_viscii_upper,
1614 Qideograph_gt_pj_10,
1615 Qideograph_gt_pj_11,
1645 Lisp_Object Ql2r, Qr2l;
1647 Lisp_Object Vcharset_hash_table;
1649 /* Composite characters are characters constructed by overstriking two
1650 or more regular characters.
1652 1) The old Mule implementation involves storing composite characters
1653 in a buffer as a tag followed by all of the actual characters
1654 used to make up the composite character. I think this is a bad
1655 idea; it greatly complicates code that wants to handle strings
1656 one character at a time because it has to deal with the possibility
1657 of great big ungainly characters. It's much more reasonable to
1658 simply store an index into a table of composite characters.
1660 2) The current implementation only allows for 16,384 separate
1661 composite characters over the lifetime of the XEmacs process.
1662 This could become a potential problem if the user
1663 edited lots of different files that use composite characters.
1664 Due to FSF bogosity, increasing the number of allowable
1665 composite characters under Mule would decrease the number
1666 of possible faces that can exist. Mule already has shrunk
1667 this to 2048, and further shrinkage would become uncomfortable.
1668 No such problems exist in XEmacs.
1670 Composite characters could be represented as 0x80 C1 C2 C3,
1671 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1672 for slightly under 2^20 (one million) composite characters
1673 over the XEmacs process lifetime, and you only need to
1674 increase the size of a Mule character from 19 to 21 bits.
1675 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1676 85 million (slightly over 2^26) composite characters. */
1679 /************************************************************************/
1680 /* Basic Emchar functions */
1681 /************************************************************************/
1683 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1684 string in STR. Returns the number of bytes stored.
1685 Do not call this directly. Use the macro set_charptr_emchar() instead.
1689 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1695 Lisp_Object charset;
1704 else if ( c <= 0x7ff )
1706 *p++ = (c >> 6) | 0xc0;
1707 *p++ = (c & 0x3f) | 0x80;
1709 else if ( c <= 0xffff )
1711 *p++ = (c >> 12) | 0xe0;
1712 *p++ = ((c >> 6) & 0x3f) | 0x80;
1713 *p++ = (c & 0x3f) | 0x80;
1715 else if ( c <= 0x1fffff )
1717 *p++ = (c >> 18) | 0xf0;
1718 *p++ = ((c >> 12) & 0x3f) | 0x80;
1719 *p++ = ((c >> 6) & 0x3f) | 0x80;
1720 *p++ = (c & 0x3f) | 0x80;
1722 else if ( c <= 0x3ffffff )
1724 *p++ = (c >> 24) | 0xf8;
1725 *p++ = ((c >> 18) & 0x3f) | 0x80;
1726 *p++ = ((c >> 12) & 0x3f) | 0x80;
1727 *p++ = ((c >> 6) & 0x3f) | 0x80;
1728 *p++ = (c & 0x3f) | 0x80;
1732 *p++ = (c >> 30) | 0xfc;
1733 *p++ = ((c >> 24) & 0x3f) | 0x80;
1734 *p++ = ((c >> 18) & 0x3f) | 0x80;
1735 *p++ = ((c >> 12) & 0x3f) | 0x80;
1736 *p++ = ((c >> 6) & 0x3f) | 0x80;
1737 *p++ = (c & 0x3f) | 0x80;
1740 BREAKUP_CHAR (c, charset, c1, c2);
1741 lb = CHAR_LEADING_BYTE (c);
1742 if (LEADING_BYTE_PRIVATE_P (lb))
1743 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1745 if (EQ (charset, Vcharset_control_1))
1754 /* Return the first character from a Mule-encoded string in STR,
1755 assuming it's non-ASCII. Do not call this directly.
1756 Use the macro charptr_emchar() instead. */
1759 non_ascii_charptr_emchar (const Bufbyte *str)
1772 else if ( b >= 0xf8 )
1777 else if ( b >= 0xf0 )
1782 else if ( b >= 0xe0 )
1787 else if ( b >= 0xc0 )
1797 for( ; len > 0; len-- )
1800 ch = ( ch << 6 ) | ( b & 0x3f );
1804 Bufbyte i0 = *str, i1, i2 = 0;
1805 Lisp_Object charset;
1807 if (i0 == LEADING_BYTE_CONTROL_1)
1808 return (Emchar) (*++str - 0x20);
1810 if (LEADING_BYTE_PREFIX_P (i0))
1815 charset = CHARSET_BY_LEADING_BYTE (i0);
1816 if (XCHARSET_DIMENSION (charset) == 2)
1819 return MAKE_CHAR (charset, i1, i2);
1823 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1824 Do not call this directly. Use the macro valid_char_p() instead. */
1828 non_ascii_valid_char_p (Emchar ch)
1832 /* Must have only lowest 19 bits set */
1836 f1 = CHAR_FIELD1 (ch);
1837 f2 = CHAR_FIELD2 (ch);
1838 f3 = CHAR_FIELD3 (ch);
1842 Lisp_Object charset;
1844 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1845 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1846 f2 > MAX_CHAR_FIELD2_PRIVATE)
1851 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1852 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1856 NOTE: This takes advantage of the fact that
1857 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1858 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1860 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1861 if (EQ (charset, Qnil))
1863 return (XCHARSET_CHARS (charset) == 96);
1867 Lisp_Object charset;
1869 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1870 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1871 f1 > MAX_CHAR_FIELD1_PRIVATE)
1873 if (f2 < 0x20 || f3 < 0x20)
1876 #ifdef ENABLE_COMPOSITE_CHARS
1877 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1879 if (UNBOUNDP (Fgethash (make_int (ch),
1880 Vcomposite_char_char2string_hash_table,
1885 #endif /* ENABLE_COMPOSITE_CHARS */
1887 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1888 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1891 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1893 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1896 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1898 if (EQ (charset, Qnil))
1900 return (XCHARSET_CHARS (charset) == 96);
1906 /************************************************************************/
1907 /* Basic string functions */
1908 /************************************************************************/
1910 /* Copy the character pointed to by SRC into DST. Do not call this
1911 directly. Use the macro charptr_copy_char() instead.
1912 Return the number of bytes copied. */
1915 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
1917 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
1919 for (i = bytes; i; i--, dst++, src++)
1925 /************************************************************************/
1926 /* streams of Emchars */
1927 /************************************************************************/
1929 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1930 The functions below are not meant to be called directly; use
1931 the macros in insdel.h. */
1934 Lstream_get_emchar_1 (Lstream *stream, int ch)
1936 Bufbyte str[MAX_EMCHAR_LEN];
1937 Bufbyte *strptr = str;
1940 str[0] = (Bufbyte) ch;
1942 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
1944 int c = Lstream_getc (stream);
1945 bufpos_checking_assert (c >= 0);
1946 *++strptr = (Bufbyte) c;
1948 return charptr_emchar (str);
1952 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1954 Bufbyte str[MAX_EMCHAR_LEN];
1955 Bytecount len = set_charptr_emchar (str, ch);
1956 return Lstream_write (stream, str, len);
1960 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1962 Bufbyte str[MAX_EMCHAR_LEN];
1963 Bytecount len = set_charptr_emchar (str, ch);
1964 Lstream_unread (stream, str, len);
1968 /************************************************************************/
1969 /* charset object */
1970 /************************************************************************/
1973 mark_charset (Lisp_Object obj)
1975 Lisp_Charset *cs = XCHARSET (obj);
1977 mark_object (cs->short_name);
1978 mark_object (cs->long_name);
1979 mark_object (cs->doc_string);
1980 mark_object (cs->registry);
1981 mark_object (cs->ccl_program);
1983 mark_object (cs->encoding_table);
1984 /* mark_object (cs->decoding_table); */
1990 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1992 Lisp_Charset *cs = XCHARSET (obj);
1996 error ("printing unreadable object #<charset %s 0x%x>",
1997 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
2000 write_c_string ("#<charset ", printcharfun);
2001 print_internal (CHARSET_NAME (cs), printcharfun, 0);
2002 write_c_string (" ", printcharfun);
2003 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
2004 write_c_string (" ", printcharfun);
2005 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
2006 write_c_string (" ", printcharfun);
2007 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
2008 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
2010 CHARSET_DIMENSION (cs),
2011 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
2012 CHARSET_COLUMNS (cs),
2013 CHARSET_GRAPHIC (cs),
2014 CHARSET_FINAL (cs));
2015 write_c_string (buf, printcharfun);
2016 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
2017 sprintf (buf, " 0x%x>", cs->header.uid);
2018 write_c_string (buf, printcharfun);
2021 static const struct lrecord_description charset_description[] = {
2022 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
2023 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
2024 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
2025 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
2026 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
2027 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
2028 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
2030 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
2031 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
2036 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
2037 mark_charset, print_charset, 0, 0, 0,
2038 charset_description,
2041 /* Make a new charset. */
2042 /* #### SJT Should generic properties be allowed? */
2044 make_charset (Charset_ID id, Lisp_Object name,
2045 unsigned short chars, unsigned char dimension,
2046 unsigned char columns, unsigned char graphic,
2047 Bufbyte final, unsigned char direction, Lisp_Object short_name,
2048 Lisp_Object long_name, Lisp_Object doc,
2050 Lisp_Object decoding_table,
2051 Emchar ucs_min, Emchar ucs_max,
2052 Emchar code_offset, unsigned char byte_offset)
2055 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
2059 XSETCHARSET (obj, cs);
2061 CHARSET_ID (cs) = id;
2062 CHARSET_NAME (cs) = name;
2063 CHARSET_SHORT_NAME (cs) = short_name;
2064 CHARSET_LONG_NAME (cs) = long_name;
2065 CHARSET_CHARS (cs) = chars;
2066 CHARSET_DIMENSION (cs) = dimension;
2067 CHARSET_DIRECTION (cs) = direction;
2068 CHARSET_COLUMNS (cs) = columns;
2069 CHARSET_GRAPHIC (cs) = graphic;
2070 CHARSET_FINAL (cs) = final;
2071 CHARSET_DOC_STRING (cs) = doc;
2072 CHARSET_REGISTRY (cs) = reg;
2073 CHARSET_CCL_PROGRAM (cs) = Qnil;
2074 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
2076 CHARSET_DECODING_TABLE(cs) = Qnil;
2077 CHARSET_ENCODING_TABLE(cs) = Qnil;
2078 CHARSET_UCS_MIN(cs) = ucs_min;
2079 CHARSET_UCS_MAX(cs) = ucs_max;
2080 CHARSET_CODE_OFFSET(cs) = code_offset;
2081 CHARSET_BYTE_OFFSET(cs) = byte_offset;
2085 if (id == LEADING_BYTE_ASCII)
2086 CHARSET_REP_BYTES (cs) = 1;
2088 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
2090 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
2095 /* some charsets do not have final characters. This includes
2096 ASCII, Control-1, Composite, and the two faux private
2098 unsigned char iso2022_type
2099 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
2101 if (code_offset == 0)
2103 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
2104 chlook->charset_by_attributes[iso2022_type][final] = obj;
2108 (chlook->charset_by_attributes[iso2022_type][final][direction]));
2109 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
2113 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
2114 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
2116 /* Some charsets are "faux" and don't have names or really exist at
2117 all except in the leading-byte table. */
2119 Fputhash (name, obj, Vcharset_hash_table);
2124 get_unallocated_leading_byte (int dimension)
2129 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
2132 lb = chlook->next_allocated_leading_byte++;
2136 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
2139 lb = chlook->next_allocated_1_byte_leading_byte++;
2143 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
2146 lb = chlook->next_allocated_2_byte_leading_byte++;
2152 ("No more character sets free for this dimension",
2153 make_int (dimension));
2159 /* Number of Big5 characters which have the same code in 1st byte. */
2161 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2164 make_builtin_char (Lisp_Object charset, int c1, int c2)
2166 if (XCHARSET_UCS_MAX (charset))
2169 = (XCHARSET_DIMENSION (charset) == 1
2171 c1 - XCHARSET_BYTE_OFFSET (charset)
2173 (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
2174 + c2 - XCHARSET_BYTE_OFFSET (charset))
2175 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
2176 if ((code < XCHARSET_UCS_MIN (charset))
2177 || (XCHARSET_UCS_MAX (charset) < code))
2178 signal_simple_error ("Arguments makes invalid character",
2182 else if (XCHARSET_DIMENSION (charset) == 1)
2184 switch (XCHARSET_CHARS (charset))
2188 + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
2191 + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
2198 if (EQ (charset, Vcharset_chinese_big5))
2200 int B1 = c1, B2 = c2;
2202 = (B1 - 0xA1) * BIG5_SAME_ROW
2203 + B2 - (B2 < 0x7F ? 0x40 : 0x62);
2207 charset = Vcharset_chinese_big5_1;
2211 charset = Vcharset_chinese_big5_2;
2212 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
2217 switch (XCHARSET_CHARS (charset))
2220 return MIN_CHAR_94x94
2221 + (XCHARSET_FINAL (charset) - '0') * 94 * 94
2222 + (c1 - 33) * 94 + (c2 - 33);
2224 return MIN_CHAR_96x96
2225 + (XCHARSET_FINAL (charset) - '0') * 96 * 96
2226 + (c1 - 32) * 96 + (c2 - 32);
2234 range_charset_code_point (Lisp_Object charset, Emchar ch)
2238 if ((XCHARSET_UCS_MIN (charset) <= ch)
2239 && (ch <= XCHARSET_UCS_MAX (charset)))
2241 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
2243 if (XCHARSET_CHARS (charset) == 256)
2245 else if (XCHARSET_DIMENSION (charset) == 1)
2246 return d + XCHARSET_BYTE_OFFSET (charset);
2247 else if (XCHARSET_DIMENSION (charset) == 2)
2249 ((d / XCHARSET_CHARS (charset)
2250 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2251 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2252 else if (XCHARSET_DIMENSION (charset) == 3)
2254 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2255 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2256 | ((d / XCHARSET_CHARS (charset)
2257 % XCHARSET_CHARS (charset)
2258 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2259 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2260 else /* if (XCHARSET_DIMENSION (charset) == 4) */
2262 ((d / (XCHARSET_CHARS (charset)
2263 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2264 + XCHARSET_BYTE_OFFSET (charset)) << 24)
2265 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2266 % XCHARSET_CHARS (charset)
2267 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2268 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
2269 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2270 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2272 else if (XCHARSET_CODE_OFFSET (charset) == 0)
2274 if (XCHARSET_DIMENSION (charset) == 1)
2276 if (XCHARSET_CHARS (charset) == 94)
2278 if (((d = ch - (MIN_CHAR_94
2279 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
2283 else if (XCHARSET_CHARS (charset) == 96)
2285 if (((d = ch - (MIN_CHAR_96
2286 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
2293 else if (XCHARSET_DIMENSION (charset) == 2)
2295 if (XCHARSET_CHARS (charset) == 94)
2297 if (((d = ch - (MIN_CHAR_94x94
2298 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
2301 return (((d / 94) + 33) << 8) | (d % 94 + 33);
2303 else if (XCHARSET_CHARS (charset) == 96)
2305 if (((d = ch - (MIN_CHAR_96x96
2306 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
2309 return (((d / 96) + 32) << 8) | (d % 96 + 32);
2315 if (EQ (charset, Vcharset_mojikyo_2022_1)
2316 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
2318 int m = ch - MIN_CHAR_MOJIKYO - 1;
2319 int byte1 = m / (94 * 60) + 33;
2320 int byte2 = (m % (94 * 60)) / 94;
2321 int byte3 = m % 94 + 33;
2327 return (byte1 << 16) | (byte2 << 8) | byte3;
2333 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
2335 if (c <= MAX_CHAR_BASIC_LATIN)
2337 *charset = Vcharset_ascii;
2342 *charset = Vcharset_control_1;
2347 *charset = Vcharset_latin_iso8859_1;
2351 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
2353 *charset = Vcharset_greek_iso8859_7;
2354 return c - MIN_CHAR_GREEK + 0x20;
2356 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
2358 *charset = Vcharset_cyrillic_iso8859_5;
2359 return c - MIN_CHAR_CYRILLIC + 0x20;
2362 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
2364 *charset = Vcharset_hebrew_iso8859_8;
2365 return c - MIN_CHAR_HEBREW + 0x20;
2367 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
2369 *charset = Vcharset_thai_tis620;
2370 return c - MIN_CHAR_THAI + 0x20;
2373 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
2374 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
2376 return list2 (Vcharset_katakana_jisx0201,
2377 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
2380 else if (c <= MAX_CHAR_BMP)
2382 *charset = Vcharset_ucs_bmp;
2385 else if (c < MIN_CHAR_DAIKANWA)
2387 *charset = Vcharset_ucs;
2390 else if (c <= MAX_CHAR_DAIKANWA)
2392 *charset = Vcharset_ideograph_daikanwa;
2393 return c - MIN_CHAR_DAIKANWA;
2395 else if (c <= MAX_CHAR_MOJIKYO_0)
2397 *charset = Vcharset_mojikyo;
2398 return c - MIN_CHAR_MOJIKYO_0;
2400 else if (c < MIN_CHAR_94)
2402 *charset = Vcharset_ucs;
2405 else if (c <= MAX_CHAR_94)
2407 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
2408 ((c - MIN_CHAR_94) / 94) + '0',
2409 CHARSET_LEFT_TO_RIGHT);
2410 if (!NILP (*charset))
2411 return ((c - MIN_CHAR_94) % 94) + 33;
2414 *charset = Vcharset_ucs;
2418 else if (c <= MAX_CHAR_96)
2420 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
2421 ((c - MIN_CHAR_96) / 96) + '0',
2422 CHARSET_LEFT_TO_RIGHT);
2423 if (!NILP (*charset))
2424 return ((c - MIN_CHAR_96) % 96) + 32;
2427 *charset = Vcharset_ucs;
2431 else if (c <= MAX_CHAR_94x94)
2434 = CHARSET_BY_ATTRIBUTES (94, 2,
2435 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
2436 CHARSET_LEFT_TO_RIGHT);
2437 if (!NILP (*charset))
2438 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
2439 | (((c - MIN_CHAR_94x94) % 94) + 33);
2442 *charset = Vcharset_ucs;
2446 else if (c <= MAX_CHAR_96x96)
2449 = CHARSET_BY_ATTRIBUTES (96, 2,
2450 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
2451 CHARSET_LEFT_TO_RIGHT);
2452 if (!NILP (*charset))
2453 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
2454 | (((c - MIN_CHAR_96x96) % 96) + 32);
2457 *charset = Vcharset_ucs;
2461 else if (c < MIN_CHAR_MOJIKYO)
2463 *charset = Vcharset_ucs;
2466 else if (c <= MAX_CHAR_MOJIKYO)
2468 *charset = Vcharset_mojikyo;
2469 return c - MIN_CHAR_MOJIKYO;
2473 *charset = Vcharset_ucs;
2478 Lisp_Object Vdefault_coded_charset_priority_list;
2482 /************************************************************************/
2483 /* Basic charset Lisp functions */
2484 /************************************************************************/
2486 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
2487 Return non-nil if OBJECT is a charset.
2491 return CHARSETP (object) ? Qt : Qnil;
2494 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
2495 Retrieve the charset of the given name.
2496 If CHARSET-OR-NAME is a charset object, it is simply returned.
2497 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
2498 nil is returned. Otherwise the associated charset object is returned.
2502 if (CHARSETP (charset_or_name))
2503 return charset_or_name;
2505 CHECK_SYMBOL (charset_or_name);
2506 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
2509 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
2510 Retrieve the charset of the given name.
2511 Same as `find-charset' except an error is signalled if there is no such
2512 charset instead of returning nil.
2516 Lisp_Object charset = Ffind_charset (name);
2519 signal_simple_error ("No such charset", name);
2523 /* We store the charsets in hash tables with the names as the key and the
2524 actual charset object as the value. Occasionally we need to use them
2525 in a list format. These routines provide us with that. */
2526 struct charset_list_closure
2528 Lisp_Object *charset_list;
2532 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
2533 void *charset_list_closure)
2535 /* This function can GC */
2536 struct charset_list_closure *chcl =
2537 (struct charset_list_closure*) charset_list_closure;
2538 Lisp_Object *charset_list = chcl->charset_list;
2540 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
2544 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
2545 Return a list of the names of all defined charsets.
2549 Lisp_Object charset_list = Qnil;
2550 struct gcpro gcpro1;
2551 struct charset_list_closure charset_list_closure;
2553 GCPRO1 (charset_list);
2554 charset_list_closure.charset_list = &charset_list;
2555 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
2556 &charset_list_closure);
2559 return charset_list;
2562 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
2563 Return the name of charset CHARSET.
2567 return XCHARSET_NAME (Fget_charset (charset));
2570 /* #### SJT Should generic properties be allowed? */
2571 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
2572 Define a new character set.
2573 This function is for use with Mule support.
2574 NAME is a symbol, the name by which the character set is normally referred.
2575 DOC-STRING is a string describing the character set.
2576 PROPS is a property list, describing the specific nature of the
2577 character set. Recognized properties are:
2579 'short-name Short version of the charset name (ex: Latin-1)
2580 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
2581 'registry A regular expression matching the font registry field for
2583 'dimension Number of octets used to index a character in this charset.
2584 Either 1 or 2. Defaults to 1.
2585 'columns Number of columns used to display a character in this charset.
2586 Only used in TTY mode. (Under X, the actual width of a
2587 character can be derived from the font used to display the
2588 characters.) If unspecified, defaults to the dimension
2589 (this is almost always the correct value).
2590 'chars Number of characters in each dimension (94 or 96).
2591 Defaults to 94. Note that if the dimension is 2, the
2592 character set thus described is 94x94 or 96x96.
2593 'final Final byte of ISO 2022 escape sequence. Must be
2594 supplied. Each combination of (DIMENSION, CHARS) defines a
2595 separate namespace for final bytes. Note that ISO
2596 2022 restricts the final byte to the range
2597 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2598 dimension == 2. Note also that final bytes in the range
2599 0x30 - 0x3F are reserved for user-defined (not official)
2601 'graphic 0 (use left half of font on output) or 1 (use right half
2602 of font on output). Defaults to 0. For example, for
2603 a font whose registry is ISO8859-1, the left half
2604 (octets 0x20 - 0x7F) is the `ascii' character set, while
2605 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2606 character set. With 'graphic set to 0, the octets
2607 will have their high bit cleared; with it set to 1,
2608 the octets will have their high bit set.
2609 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2611 'ccl-program A compiled CCL program used to convert a character in
2612 this charset into an index into the font. This is in
2613 addition to the 'graphic property. The CCL program
2614 is passed the octets of the character, with the high
2615 bit cleared and set depending upon whether the value
2616 of the 'graphic property is 0 or 1.
2618 (name, doc_string, props))
2620 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2621 int direction = CHARSET_LEFT_TO_RIGHT;
2622 Lisp_Object registry = Qnil;
2623 Lisp_Object charset;
2624 Lisp_Object ccl_program = Qnil;
2625 Lisp_Object short_name = Qnil, long_name = Qnil;
2626 int byte_offset = -1;
2628 CHECK_SYMBOL (name);
2629 if (!NILP (doc_string))
2630 CHECK_STRING (doc_string);
2632 charset = Ffind_charset (name);
2633 if (!NILP (charset))
2634 signal_simple_error ("Cannot redefine existing charset", name);
2637 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
2639 if (EQ (keyword, Qshort_name))
2641 CHECK_STRING (value);
2645 if (EQ (keyword, Qlong_name))
2647 CHECK_STRING (value);
2651 else if (EQ (keyword, Qdimension))
2654 dimension = XINT (value);
2655 if (dimension < 1 || dimension > 2)
2656 signal_simple_error ("Invalid value for 'dimension", value);
2659 else if (EQ (keyword, Qchars))
2662 chars = XINT (value);
2663 if (chars != 94 && chars != 96)
2664 signal_simple_error ("Invalid value for 'chars", value);
2667 else if (EQ (keyword, Qcolumns))
2670 columns = XINT (value);
2671 if (columns != 1 && columns != 2)
2672 signal_simple_error ("Invalid value for 'columns", value);
2675 else if (EQ (keyword, Qgraphic))
2678 graphic = XINT (value);
2680 if (graphic < 0 || graphic > 2)
2682 if (graphic < 0 || graphic > 1)
2684 signal_simple_error ("Invalid value for 'graphic", value);
2687 else if (EQ (keyword, Qregistry))
2689 CHECK_STRING (value);
2693 else if (EQ (keyword, Qdirection))
2695 if (EQ (value, Ql2r))
2696 direction = CHARSET_LEFT_TO_RIGHT;
2697 else if (EQ (value, Qr2l))
2698 direction = CHARSET_RIGHT_TO_LEFT;
2700 signal_simple_error ("Invalid value for 'direction", value);
2703 else if (EQ (keyword, Qfinal))
2705 CHECK_CHAR_COERCE_INT (value);
2706 final = XCHAR (value);
2707 if (final < '0' || final > '~')
2708 signal_simple_error ("Invalid value for 'final", value);
2711 else if (EQ (keyword, Qccl_program))
2713 struct ccl_program test_ccl;
2715 if (setup_ccl_program (&test_ccl, value) < 0)
2716 signal_simple_error ("Invalid value for 'ccl-program", value);
2717 ccl_program = value;
2721 signal_simple_error ("Unrecognized property", keyword);
2726 error ("'final must be specified");
2727 if (dimension == 2 && final > 0x5F)
2729 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2732 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2733 CHARSET_LEFT_TO_RIGHT)) ||
2734 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2735 CHARSET_RIGHT_TO_LEFT)))
2737 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2739 id = get_unallocated_leading_byte (dimension);
2741 if (NILP (doc_string))
2742 doc_string = build_string ("");
2744 if (NILP (registry))
2745 registry = build_string ("");
2747 if (NILP (short_name))
2748 XSETSTRING (short_name, XSYMBOL (name)->name);
2750 if (NILP (long_name))
2751 long_name = doc_string;
2754 columns = dimension;
2756 if (byte_offset < 0)
2760 else if (chars == 96)
2766 charset = make_charset (id, name, chars, dimension, columns, graphic,
2767 final, direction, short_name, long_name,
2768 doc_string, registry,
2769 Qnil, 0, 0, 0, byte_offset);
2770 if (!NILP (ccl_program))
2771 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2775 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2777 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2778 NEW-NAME is the name of the new charset. Return the new charset.
2780 (charset, new_name))
2782 Lisp_Object new_charset = Qnil;
2783 int id, chars, dimension, columns, graphic, final;
2785 Lisp_Object registry, doc_string, short_name, long_name;
2788 charset = Fget_charset (charset);
2789 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2790 signal_simple_error ("Charset already has reverse-direction charset",
2793 CHECK_SYMBOL (new_name);
2794 if (!NILP (Ffind_charset (new_name)))
2795 signal_simple_error ("Cannot redefine existing charset", new_name);
2797 cs = XCHARSET (charset);
2799 chars = CHARSET_CHARS (cs);
2800 dimension = CHARSET_DIMENSION (cs);
2801 columns = CHARSET_COLUMNS (cs);
2802 id = get_unallocated_leading_byte (dimension);
2804 graphic = CHARSET_GRAPHIC (cs);
2805 final = CHARSET_FINAL (cs);
2806 direction = CHARSET_RIGHT_TO_LEFT;
2807 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2808 direction = CHARSET_LEFT_TO_RIGHT;
2809 doc_string = CHARSET_DOC_STRING (cs);
2810 short_name = CHARSET_SHORT_NAME (cs);
2811 long_name = CHARSET_LONG_NAME (cs);
2812 registry = CHARSET_REGISTRY (cs);
2814 new_charset = make_charset (id, new_name, chars, dimension, columns,
2815 graphic, final, direction, short_name, long_name,
2816 doc_string, registry,
2818 CHARSET_DECODING_TABLE(cs),
2819 CHARSET_UCS_MIN(cs),
2820 CHARSET_UCS_MAX(cs),
2821 CHARSET_CODE_OFFSET(cs),
2822 CHARSET_BYTE_OFFSET(cs)
2828 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2829 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2834 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2835 Define symbol ALIAS as an alias for CHARSET.
2839 CHECK_SYMBOL (alias);
2840 charset = Fget_charset (charset);
2841 return Fputhash (alias, charset, Vcharset_hash_table);
2844 /* #### Reverse direction charsets not yet implemented. */
2846 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2848 Return the reverse-direction charset parallel to CHARSET, if any.
2849 This is the charset with the same properties (in particular, the same
2850 dimension, number of characters per dimension, and final byte) as
2851 CHARSET but whose characters are displayed in the opposite direction.
2855 charset = Fget_charset (charset);
2856 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2860 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2861 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2862 If DIRECTION is omitted, both directions will be checked (left-to-right
2863 will be returned if character sets exist for both directions).
2865 (dimension, chars, final, direction))
2867 int dm, ch, fi, di = -1;
2868 Lisp_Object obj = Qnil;
2870 CHECK_INT (dimension);
2871 dm = XINT (dimension);
2872 if (dm < 1 || dm > 2)
2873 signal_simple_error ("Invalid value for DIMENSION", dimension);
2877 if (ch != 94 && ch != 96)
2878 signal_simple_error ("Invalid value for CHARS", chars);
2880 CHECK_CHAR_COERCE_INT (final);
2882 if (fi < '0' || fi > '~')
2883 signal_simple_error ("Invalid value for FINAL", final);
2885 if (EQ (direction, Ql2r))
2886 di = CHARSET_LEFT_TO_RIGHT;
2887 else if (EQ (direction, Qr2l))
2888 di = CHARSET_RIGHT_TO_LEFT;
2889 else if (!NILP (direction))
2890 signal_simple_error ("Invalid value for DIRECTION", direction);
2892 if (dm == 2 && fi > 0x5F)
2894 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2898 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
2900 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
2903 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
2906 return XCHARSET_NAME (obj);
2910 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2911 Return short name of CHARSET.
2915 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2918 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2919 Return long name of CHARSET.
2923 return XCHARSET_LONG_NAME (Fget_charset (charset));
2926 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2927 Return description of CHARSET.
2931 return XCHARSET_DOC_STRING (Fget_charset (charset));
2934 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2935 Return dimension of CHARSET.
2939 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2942 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2943 Return property PROP of CHARSET, a charset object or symbol naming a charset.
2944 Recognized properties are those listed in `make-charset', as well as
2945 'name and 'doc-string.
2951 charset = Fget_charset (charset);
2952 cs = XCHARSET (charset);
2954 CHECK_SYMBOL (prop);
2955 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2956 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2957 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2958 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2959 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2960 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2961 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2962 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2963 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2964 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2965 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2966 if (EQ (prop, Qdirection))
2967 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2968 if (EQ (prop, Qreverse_direction_charset))
2970 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2971 /* #### Is this translation OK? If so, error checking sufficient? */
2972 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
2974 signal_simple_error ("Unrecognized charset property name", prop);
2975 return Qnil; /* not reached */
2978 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2979 Return charset identification number of CHARSET.
2983 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2986 /* #### We need to figure out which properties we really want to
2989 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2990 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2992 (charset, ccl_program))
2994 struct ccl_program test_ccl;
2996 charset = Fget_charset (charset);
2997 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
2998 signal_simple_error ("Invalid ccl-program", ccl_program);
2999 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
3004 invalidate_charset_font_caches (Lisp_Object charset)
3006 /* Invalidate font cache entries for charset on all devices. */
3007 Lisp_Object devcons, concons, hash_table;
3008 DEVICE_LOOP_NO_BREAK (devcons, concons)
3010 struct device *d = XDEVICE (XCAR (devcons));
3011 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
3012 if (!UNBOUNDP (hash_table))
3013 Fclrhash (hash_table);
3017 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
3018 Set the 'registry property of CHARSET to REGISTRY.
3020 (charset, registry))
3022 charset = Fget_charset (charset);
3023 CHECK_STRING (registry);
3024 XCHARSET_REGISTRY (charset) = registry;
3025 invalidate_charset_font_caches (charset);
3026 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
3031 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
3032 Return mapping-table of CHARSET.
3036 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
3039 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
3040 Set mapping-table of CHARSET to TABLE.
3044 struct Lisp_Charset *cs;
3048 charset = Fget_charset (charset);
3049 cs = XCHARSET (charset);
3053 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
3054 make_vector_newer (CHARSET_DECODING_TABLE(cs));
3055 CHARSET_DECODING_TABLE(cs) = Qnil;
3058 else if (VECTORP (table))
3060 int ccs_len = CHARSET_BYTE_SIZE (cs);
3061 int ret = decoding_table_check_elements (table,
3062 CHARSET_DIMENSION (cs),
3067 signal_simple_error ("Too big table", table);
3069 signal_simple_error ("Invalid element is found", table);
3071 signal_simple_error ("Something wrong", table);
3073 CHARSET_DECODING_TABLE(cs) = Qnil;
3076 signal_error (Qwrong_type_argument,
3077 list2 (build_translated_string ("vector-or-nil-p"),
3080 byte_offset = CHARSET_BYTE_OFFSET (cs);
3081 switch (CHARSET_DIMENSION (cs))
3084 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3086 Lisp_Object c = XVECTOR_DATA(table)[i];
3089 put_char_ccs_code_point (c, charset,
3090 make_int (i + byte_offset));
3094 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3096 Lisp_Object v = XVECTOR_DATA(table)[i];
3102 for (j = 0; j < XVECTOR_LENGTH (v); j++)
3104 Lisp_Object c = XVECTOR_DATA(v)[j];
3107 put_char_ccs_code_point
3109 make_int ( ( (i + byte_offset) << 8 )
3115 put_char_ccs_code_point (v, charset,
3116 make_int (i + byte_offset));
3125 /************************************************************************/
3126 /* Lisp primitives for working with characters */
3127 /************************************************************************/
3130 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
3131 Make a character from CHARSET and code-point CODE.
3137 charset = Fget_charset (charset);
3140 if (XCHARSET_GRAPHIC (charset) == 1)
3142 return make_char (DECODE_CHAR (charset, c));
3145 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
3146 Make a builtin character from CHARSET and code-point CODE.
3153 charset = Fget_charset (charset);
3157 if ((final = XCHARSET_FINAL (charset)) >= '0')
3159 if (XCHARSET_DIMENSION (charset) == 1)
3161 switch (XCHARSET_CHARS (charset))
3165 make_char (MIN_CHAR_94 + (final - '0') * 94
3166 + ((c & 0x7F) - 33));
3169 make_char (MIN_CHAR_96 + (final - '0') * 96
3170 + ((c & 0x7F) - 32));
3172 return Fdecode_char (charset, code);
3177 switch (XCHARSET_CHARS (charset))
3181 make_char (MIN_CHAR_94x94
3182 + (final - '0') * 94 * 94
3183 + (((c >> 8) & 0x7F) - 33) * 94
3184 + ((c & 0x7F) - 33));
3187 make_char (MIN_CHAR_96x96
3188 + (final - '0') * 96 * 96
3189 + (((c >> 8) & 0x7F) - 32) * 96
3190 + ((c & 0x7F) - 32));
3192 return Fdecode_char (charset, code);
3196 else if (XCHARSET_UCS_MAX (charset))
3199 = (XCHARSET_DIMENSION (charset) == 1
3201 c - XCHARSET_BYTE_OFFSET (charset)
3203 ((c >> 8) - XCHARSET_BYTE_OFFSET (charset))
3204 * XCHARSET_CHARS (charset)
3205 + (c & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
3206 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
3207 if ((cid < XCHARSET_UCS_MIN (charset))
3208 || (XCHARSET_UCS_MAX (charset) < cid))
3209 return Fdecode_char (charset, code);
3210 return make_char (cid);
3213 return Fdecode_char (charset, code);
3217 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
3218 Make a character from CHARSET and octets ARG1 and ARG2.
3219 ARG2 is required only for characters from two-dimensional charsets.
3220 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
3221 character s with caron.
3223 (charset, arg1, arg2))
3227 int lowlim, highlim;
3229 charset = Fget_charset (charset);
3230 cs = XCHARSET (charset);
3232 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
3233 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
3235 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
3237 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
3238 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
3241 /* It is useful (and safe, according to Olivier Galibert) to strip
3242 the 8th bit off ARG1 and ARG2 because it allows programmers to
3243 write (make-char 'latin-iso8859-2 CODE) where code is the actual
3244 Latin 2 code of the character. */
3252 if (a1 < lowlim || a1 > highlim)
3253 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
3255 if (CHARSET_DIMENSION (cs) == 1)
3259 ("Charset is of dimension one; second octet must be nil", arg2);
3260 return make_char (MAKE_CHAR (charset, a1, 0));
3269 a2 = XINT (arg2) & 0x7f;
3271 if (a2 < lowlim || a2 > highlim)
3272 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
3274 return make_char (MAKE_CHAR (charset, a1, a2));
3277 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
3278 Return the character set of CHARACTER.
3282 CHECK_CHAR_COERCE_INT (character);
3284 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
3287 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
3288 Return the octet numbered N (should be 0 or 1) of CHARACTER.
3289 N defaults to 0 if omitted.
3293 Lisp_Object charset;
3296 CHECK_CHAR_COERCE_INT (character);
3298 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
3300 if (NILP (n) || EQ (n, Qzero))
3301 return make_int (octet0);
3302 else if (EQ (n, make_int (1)))
3303 return make_int (octet1);
3305 signal_simple_error ("Octet number must be 0 or 1", n);
3308 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
3309 Return list of charset and one or two position-codes of CHARACTER.
3313 /* This function can GC */
3314 struct gcpro gcpro1, gcpro2;
3315 Lisp_Object charset = Qnil;
3316 Lisp_Object rc = Qnil;
3324 GCPRO2 (charset, rc);
3325 CHECK_CHAR_COERCE_INT (character);
3328 code_point = ENCODE_CHAR (XCHAR (character), charset);
3329 dimension = XCHARSET_DIMENSION (charset);
3330 while (dimension > 0)
3332 rc = Fcons (make_int (code_point & 255), rc);
3336 rc = Fcons (XCHARSET_NAME (charset), rc);
3338 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
3340 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
3342 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
3346 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
3355 #ifdef ENABLE_COMPOSITE_CHARS
3356 /************************************************************************/
3357 /* composite character functions */
3358 /************************************************************************/
3361 lookup_composite_char (Bufbyte *str, int len)
3363 Lisp_Object lispstr = make_string (str, len);
3364 Lisp_Object ch = Fgethash (lispstr,
3365 Vcomposite_char_string2char_hash_table,
3371 if (composite_char_row_next >= 128)
3372 signal_simple_error ("No more composite chars available", lispstr);
3373 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
3374 composite_char_col_next);
3375 Fputhash (make_char (emch), lispstr,
3376 Vcomposite_char_char2string_hash_table);
3377 Fputhash (lispstr, make_char (emch),
3378 Vcomposite_char_string2char_hash_table);
3379 composite_char_col_next++;
3380 if (composite_char_col_next >= 128)
3382 composite_char_col_next = 32;
3383 composite_char_row_next++;
3392 composite_char_string (Emchar ch)
3394 Lisp_Object str = Fgethash (make_char (ch),
3395 Vcomposite_char_char2string_hash_table,
3397 assert (!UNBOUNDP (str));
3401 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
3402 Convert a string into a single composite character.
3403 The character is the result of overstriking all the characters in
3408 CHECK_STRING (string);
3409 return make_char (lookup_composite_char (XSTRING_DATA (string),
3410 XSTRING_LENGTH (string)));
3413 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3414 Return a string of the characters comprising a composite character.
3422 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
3423 signal_simple_error ("Must be composite char", ch);
3424 return composite_char_string (emch);
3426 #endif /* ENABLE_COMPOSITE_CHARS */
3429 /************************************************************************/
3430 /* initialization */
3431 /************************************************************************/
3434 syms_of_mule_charset (void)
3437 INIT_LRECORD_IMPLEMENTATION (uint8_byte_table);
3438 INIT_LRECORD_IMPLEMENTATION (uint16_byte_table);
3439 INIT_LRECORD_IMPLEMENTATION (byte_table);
3440 INIT_LRECORD_IMPLEMENTATION (char_id_table);
3442 INIT_LRECORD_IMPLEMENTATION (charset);
3444 DEFSUBR (Fcharsetp);
3445 DEFSUBR (Ffind_charset);
3446 DEFSUBR (Fget_charset);
3447 DEFSUBR (Fcharset_list);
3448 DEFSUBR (Fcharset_name);
3449 DEFSUBR (Fmake_charset);
3450 DEFSUBR (Fmake_reverse_direction_charset);
3451 /* DEFSUBR (Freverse_direction_charset); */
3452 DEFSUBR (Fdefine_charset_alias);
3453 DEFSUBR (Fcharset_from_attributes);
3454 DEFSUBR (Fcharset_short_name);
3455 DEFSUBR (Fcharset_long_name);
3456 DEFSUBR (Fcharset_description);
3457 DEFSUBR (Fcharset_dimension);
3458 DEFSUBR (Fcharset_property);
3459 DEFSUBR (Fcharset_id);
3460 DEFSUBR (Fset_charset_ccl_program);
3461 DEFSUBR (Fset_charset_registry);
3463 DEFSUBR (Fchar_attribute_list);
3464 DEFSUBR (Ffind_char_attribute_table);
3465 DEFSUBR (Fchar_attribute_alist);
3466 DEFSUBR (Fget_char_attribute);
3467 DEFSUBR (Fput_char_attribute);
3468 DEFSUBR (Fremove_char_attribute);
3469 DEFSUBR (Fdefine_char);
3470 DEFSUBR (Fchar_variants);
3471 DEFSUBR (Fget_composite_char);
3472 DEFSUBR (Fcharset_mapping_table);
3473 DEFSUBR (Fset_charset_mapping_table);
3477 DEFSUBR (Fdecode_char);
3478 DEFSUBR (Fdecode_builtin_char);
3480 DEFSUBR (Fmake_char);
3481 DEFSUBR (Fchar_charset);
3482 DEFSUBR (Fchar_octet);
3483 DEFSUBR (Fsplit_char);
3485 #ifdef ENABLE_COMPOSITE_CHARS
3486 DEFSUBR (Fmake_composite_char);
3487 DEFSUBR (Fcomposite_char_string);
3490 defsymbol (&Qcharsetp, "charsetp");
3491 defsymbol (&Qregistry, "registry");
3492 defsymbol (&Qfinal, "final");
3493 defsymbol (&Qgraphic, "graphic");
3494 defsymbol (&Qdirection, "direction");
3495 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3496 defsymbol (&Qshort_name, "short-name");
3497 defsymbol (&Qlong_name, "long-name");
3499 defsymbol (&Ql2r, "l2r");
3500 defsymbol (&Qr2l, "r2l");
3502 /* Charsets, compatible with FSF 20.3
3503 Naming convention is Script-Charset[-Edition] */
3504 defsymbol (&Qascii, "ascii");
3505 defsymbol (&Qcontrol_1, "control-1");
3506 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3507 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3508 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3509 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3510 defsymbol (&Qthai_tis620, "thai-tis620");
3511 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3512 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3513 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3514 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3515 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3516 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3517 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3518 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
3519 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
3520 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
3521 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
3522 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
3523 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
3524 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
3525 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
3527 defsymbol (&Q_ucs, "->ucs");
3528 defsymbol (&Q_decomposition, "->decomposition");
3529 defsymbol (&Qcompat, "compat");
3530 defsymbol (&Qisolated, "isolated");
3531 defsymbol (&Qinitial, "initial");
3532 defsymbol (&Qmedial, "medial");
3533 defsymbol (&Qfinal, "final");
3534 defsymbol (&Qvertical, "vertical");
3535 defsymbol (&QnoBreak, "noBreak");
3536 defsymbol (&Qfraction, "fraction");
3537 defsymbol (&Qsuper, "super");
3538 defsymbol (&Qsub, "sub");
3539 defsymbol (&Qcircle, "circle");
3540 defsymbol (&Qsquare, "square");
3541 defsymbol (&Qwide, "wide");
3542 defsymbol (&Qnarrow, "narrow");
3543 defsymbol (&Qsmall, "small");
3544 defsymbol (&Qfont, "font");
3545 defsymbol (&Qucs, "ucs");
3546 defsymbol (&Qucs_bmp, "ucs-bmp");
3547 defsymbol (&Qucs_cns, "ucs-cns");
3548 defsymbol (&Qlatin_viscii, "latin-viscii");
3549 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3550 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3551 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3552 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3553 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3554 defsymbol (&Qideograph_gt, "ideograph-gt");
3555 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
3556 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
3557 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
3558 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
3559 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
3560 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
3561 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
3562 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
3563 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
3564 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
3565 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
3566 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
3567 defsymbol (&Qchinese_big5, "chinese-big5");
3568 defsymbol (&Qmojikyo, "mojikyo");
3569 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
3570 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
3571 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
3572 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
3573 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
3574 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
3575 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
3576 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
3577 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
3578 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
3579 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
3580 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
3581 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
3582 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
3583 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
3584 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
3585 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
3586 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
3587 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
3588 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
3589 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
3590 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
3591 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3593 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3594 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3596 defsymbol (&Qcomposite, "composite");
3600 vars_of_mule_charset (void)
3607 chlook = xnew (struct charset_lookup);
3608 dumpstruct (&chlook, &charset_lookup_description);
3610 /* Table of charsets indexed by leading byte. */
3611 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3612 chlook->charset_by_leading_byte[i] = Qnil;
3615 /* Table of charsets indexed by type/final-byte. */
3616 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3617 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3618 chlook->charset_by_attributes[i][j] = Qnil;
3620 /* Table of charsets indexed by type/final-byte/direction. */
3621 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3622 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3623 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3624 chlook->charset_by_attributes[i][j][k] = Qnil;
3628 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3630 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3631 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3635 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3636 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3637 Leading-code of private TYPE9N charset of column-width 1.
3639 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3643 Vutf_2000_version = build_string("0.17 (Hōryūji)");
3644 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
3645 Version number of UTF-2000.
3648 staticpro (&Vcharacter_composition_table);
3649 Vcharacter_composition_table = make_char_id_table (Qnil);
3651 staticpro (&Vcharacter_variant_table);
3652 Vcharacter_variant_table = make_char_id_table (Qnil);
3654 Vdefault_coded_charset_priority_list = Qnil;
3655 DEFVAR_LISP ("default-coded-charset-priority-list",
3656 &Vdefault_coded_charset_priority_list /*
3657 Default order of preferred coded-character-sets.
3663 complex_vars_of_mule_charset (void)
3665 staticpro (&Vcharset_hash_table);
3666 Vcharset_hash_table =
3667 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3669 /* Predefined character sets. We store them into variables for
3673 staticpro (&Vchar_attribute_hash_table);
3674 Vchar_attribute_hash_table
3675 = make_lisp_hash_table (16, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3677 staticpro (&Vcharset_ucs);
3679 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
3680 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3681 build_string ("UCS"),
3682 build_string ("UCS"),
3683 build_string ("ISO/IEC 10646"),
3685 Qnil, 0, 0xFFFFFFF, 0, 0);
3686 staticpro (&Vcharset_ucs_bmp);
3688 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3689 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3690 build_string ("BMP"),
3691 build_string ("BMP"),
3692 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3693 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
3694 Qnil, 0, 0xFFFF, 0, 0);
3695 staticpro (&Vcharset_ucs_cns);
3697 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 4,
3698 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3699 build_string ("UCS for CNS"),
3700 build_string ("UCS for CNS 11643"),
3701 build_string ("ISO/IEC 10646 for CNS 11643"),
3703 Qnil, 0, 0xFFFFFFF, 0, 0);
3705 # define MIN_CHAR_THAI 0
3706 # define MAX_CHAR_THAI 0
3707 # define MIN_CHAR_HEBREW 0
3708 # define MAX_CHAR_HEBREW 0
3709 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3710 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3712 staticpro (&Vcharset_ascii);
3714 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3715 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3716 build_string ("ASCII"),
3717 build_string ("ASCII)"),
3718 build_string ("ASCII (ISO646 IRV)"),
3719 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3720 Qnil, 0, 0x7F, 0, 0);
3721 staticpro (&Vcharset_control_1);
3722 Vcharset_control_1 =
3723 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3724 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3725 build_string ("C1"),
3726 build_string ("Control characters"),
3727 build_string ("Control characters 128-191"),
3729 Qnil, 0x80, 0x9F, 0, 0);
3730 staticpro (&Vcharset_latin_iso8859_1);
3731 Vcharset_latin_iso8859_1 =
3732 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3733 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3734 build_string ("Latin-1"),
3735 build_string ("ISO8859-1 (Latin-1)"),
3736 build_string ("ISO8859-1 (Latin-1)"),
3737 build_string ("iso8859-1"),
3738 Qnil, 0xA0, 0xFF, 0, 32);
3739 staticpro (&Vcharset_latin_iso8859_2);
3740 Vcharset_latin_iso8859_2 =
3741 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3742 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3743 build_string ("Latin-2"),
3744 build_string ("ISO8859-2 (Latin-2)"),
3745 build_string ("ISO8859-2 (Latin-2)"),
3746 build_string ("iso8859-2"),
3748 staticpro (&Vcharset_latin_iso8859_3);
3749 Vcharset_latin_iso8859_3 =
3750 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3751 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3752 build_string ("Latin-3"),
3753 build_string ("ISO8859-3 (Latin-3)"),
3754 build_string ("ISO8859-3 (Latin-3)"),
3755 build_string ("iso8859-3"),
3757 staticpro (&Vcharset_latin_iso8859_4);
3758 Vcharset_latin_iso8859_4 =
3759 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3760 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3761 build_string ("Latin-4"),
3762 build_string ("ISO8859-4 (Latin-4)"),
3763 build_string ("ISO8859-4 (Latin-4)"),
3764 build_string ("iso8859-4"),
3766 staticpro (&Vcharset_thai_tis620);
3767 Vcharset_thai_tis620 =
3768 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3769 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3770 build_string ("TIS620"),
3771 build_string ("TIS620 (Thai)"),
3772 build_string ("TIS620.2529 (Thai)"),
3773 build_string ("tis620"),
3774 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3775 staticpro (&Vcharset_greek_iso8859_7);
3776 Vcharset_greek_iso8859_7 =
3777 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3778 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3779 build_string ("ISO8859-7"),
3780 build_string ("ISO8859-7 (Greek)"),
3781 build_string ("ISO8859-7 (Greek)"),
3782 build_string ("iso8859-7"),
3784 0 /* MIN_CHAR_GREEK */,
3785 0 /* MAX_CHAR_GREEK */, 0, 32);
3786 staticpro (&Vcharset_arabic_iso8859_6);
3787 Vcharset_arabic_iso8859_6 =
3788 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3789 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3790 build_string ("ISO8859-6"),
3791 build_string ("ISO8859-6 (Arabic)"),
3792 build_string ("ISO8859-6 (Arabic)"),
3793 build_string ("iso8859-6"),
3795 staticpro (&Vcharset_hebrew_iso8859_8);
3796 Vcharset_hebrew_iso8859_8 =
3797 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3798 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3799 build_string ("ISO8859-8"),
3800 build_string ("ISO8859-8 (Hebrew)"),
3801 build_string ("ISO8859-8 (Hebrew)"),
3802 build_string ("iso8859-8"),
3803 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3804 staticpro (&Vcharset_katakana_jisx0201);
3805 Vcharset_katakana_jisx0201 =
3806 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3807 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3808 build_string ("JISX0201 Kana"),
3809 build_string ("JISX0201.1976 (Japanese Kana)"),
3810 build_string ("JISX0201.1976 Japanese Kana"),
3811 build_string ("jisx0201\\.1976"),
3813 staticpro (&Vcharset_latin_jisx0201);
3814 Vcharset_latin_jisx0201 =
3815 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3816 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3817 build_string ("JISX0201 Roman"),
3818 build_string ("JISX0201.1976 (Japanese Roman)"),
3819 build_string ("JISX0201.1976 Japanese Roman"),
3820 build_string ("jisx0201\\.1976"),
3822 staticpro (&Vcharset_cyrillic_iso8859_5);
3823 Vcharset_cyrillic_iso8859_5 =
3824 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3825 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3826 build_string ("ISO8859-5"),
3827 build_string ("ISO8859-5 (Cyrillic)"),
3828 build_string ("ISO8859-5 (Cyrillic)"),
3829 build_string ("iso8859-5"),
3831 0 /* MIN_CHAR_CYRILLIC */,
3832 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3833 staticpro (&Vcharset_latin_iso8859_9);
3834 Vcharset_latin_iso8859_9 =
3835 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3836 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3837 build_string ("Latin-5"),
3838 build_string ("ISO8859-9 (Latin-5)"),
3839 build_string ("ISO8859-9 (Latin-5)"),
3840 build_string ("iso8859-9"),
3842 staticpro (&Vcharset_japanese_jisx0208_1978);
3843 Vcharset_japanese_jisx0208_1978 =
3844 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3845 Qjapanese_jisx0208_1978, 94, 2,
3846 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3847 build_string ("JIS X0208:1978"),
3848 build_string ("JIS X0208:1978 (Japanese)"),
3850 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3851 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3853 staticpro (&Vcharset_chinese_gb2312);
3854 Vcharset_chinese_gb2312 =
3855 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3856 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3857 build_string ("GB2312"),
3858 build_string ("GB2312)"),
3859 build_string ("GB2312 Chinese simplified"),
3860 build_string ("gb2312"),
3862 staticpro (&Vcharset_japanese_jisx0208);
3863 Vcharset_japanese_jisx0208 =
3864 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3865 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3866 build_string ("JISX0208"),
3867 build_string ("JIS X0208:1983 (Japanese)"),
3868 build_string ("JIS X0208:1983 Japanese Kanji"),
3869 build_string ("jisx0208\\.1983"),
3872 staticpro (&Vcharset_japanese_jisx0208_1990);
3873 Vcharset_japanese_jisx0208_1990 =
3874 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3875 Qjapanese_jisx0208_1990, 94, 2,
3876 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3877 build_string ("JISX0208-1990"),
3878 build_string ("JIS X0208:1990 (Japanese)"),
3879 build_string ("JIS X0208:1990 Japanese Kanji"),
3880 build_string ("jisx0208\\.1990"),
3882 MIN_CHAR_JIS_X0208_1990,
3883 MAX_CHAR_JIS_X0208_1990, 0, 33);
3885 staticpro (&Vcharset_korean_ksc5601);
3886 Vcharset_korean_ksc5601 =
3887 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3888 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3889 build_string ("KSC5601"),
3890 build_string ("KSC5601 (Korean"),
3891 build_string ("KSC5601 Korean Hangul and Hanja"),
3892 build_string ("ksc5601"),
3894 staticpro (&Vcharset_japanese_jisx0212);
3895 Vcharset_japanese_jisx0212 =
3896 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3897 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3898 build_string ("JISX0212"),
3899 build_string ("JISX0212 (Japanese)"),
3900 build_string ("JISX0212 Japanese Supplement"),
3901 build_string ("jisx0212"),
3904 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3905 staticpro (&Vcharset_chinese_cns11643_1);
3906 Vcharset_chinese_cns11643_1 =
3907 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3908 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3909 build_string ("CNS11643-1"),
3910 build_string ("CNS11643-1 (Chinese traditional)"),
3912 ("CNS 11643 Plane 1 Chinese traditional"),
3913 build_string (CHINESE_CNS_PLANE_RE("1")),
3915 staticpro (&Vcharset_chinese_cns11643_2);
3916 Vcharset_chinese_cns11643_2 =
3917 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3918 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3919 build_string ("CNS11643-2"),
3920 build_string ("CNS11643-2 (Chinese traditional)"),
3922 ("CNS 11643 Plane 2 Chinese traditional"),
3923 build_string (CHINESE_CNS_PLANE_RE("2")),
3926 staticpro (&Vcharset_latin_tcvn5712);
3927 Vcharset_latin_tcvn5712 =
3928 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3929 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3930 build_string ("TCVN 5712"),
3931 build_string ("TCVN 5712 (VSCII-2)"),
3932 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3933 build_string ("tcvn5712-1"),
3935 staticpro (&Vcharset_latin_viscii_lower);
3936 Vcharset_latin_viscii_lower =
3937 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3938 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3939 build_string ("VISCII lower"),
3940 build_string ("VISCII lower (Vietnamese)"),
3941 build_string ("VISCII lower (Vietnamese)"),
3942 build_string ("MULEVISCII-LOWER"),
3944 staticpro (&Vcharset_latin_viscii_upper);
3945 Vcharset_latin_viscii_upper =
3946 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3947 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3948 build_string ("VISCII upper"),
3949 build_string ("VISCII upper (Vietnamese)"),
3950 build_string ("VISCII upper (Vietnamese)"),
3951 build_string ("MULEVISCII-UPPER"),
3953 staticpro (&Vcharset_latin_viscii);
3954 Vcharset_latin_viscii =
3955 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3956 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3957 build_string ("VISCII"),
3958 build_string ("VISCII 1.1 (Vietnamese)"),
3959 build_string ("VISCII 1.1 (Vietnamese)"),
3960 build_string ("VISCII1\\.1"),
3962 staticpro (&Vcharset_chinese_big5);
3963 Vcharset_chinese_big5 =
3964 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3965 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3966 build_string ("Big5"),
3967 build_string ("Big5"),
3968 build_string ("Big5 Chinese traditional"),
3969 build_string ("big5"),
3971 staticpro (&Vcharset_ideograph_gt);
3972 Vcharset_ideograph_gt =
3973 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
3974 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3975 build_string ("GT"),
3976 build_string ("GT"),
3977 build_string ("GT"),
3979 Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0);
3980 #define DEF_GT_PJ(n) \
3981 staticpro (&Vcharset_ideograph_gt_pj_##n); \
3982 Vcharset_ideograph_gt_pj_##n = \
3983 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
3984 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3985 build_string ("GT-PJ-"#n), \
3986 build_string ("GT (pseudo JIS encoding) part "#n), \
3987 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
3989 ("\\(GT2000PJ-"#n "\\|jisx0208\\.GT2000-"#n "\\)$"), \
4003 staticpro (&Vcharset_ideograph_daikanwa);
4004 Vcharset_ideograph_daikanwa =
4005 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
4006 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4007 build_string ("Daikanwa"),
4008 build_string ("Morohashi's Daikanwa"),
4009 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
4010 build_string ("Daikanwa"),
4011 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
4012 staticpro (&Vcharset_mojikyo);
4014 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
4015 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4016 build_string ("Mojikyo"),
4017 build_string ("Mojikyo"),
4018 build_string ("Konjaku-Mojikyo"),
4020 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
4021 staticpro (&Vcharset_mojikyo_2022_1);
4022 Vcharset_mojikyo_2022_1 =
4023 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
4024 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
4025 build_string ("Mojikyo-2022-1"),
4026 build_string ("Mojikyo ISO-2022 Part 1"),
4027 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
4031 #define DEF_MOJIKYO_PJ(n) \
4032 staticpro (&Vcharset_mojikyo_pj_##n); \
4033 Vcharset_mojikyo_pj_##n = \
4034 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
4035 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
4036 build_string ("Mojikyo-PJ-"#n), \
4037 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
4039 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
4041 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
4053 DEF_MOJIKYO_PJ (10);
4054 DEF_MOJIKYO_PJ (11);
4055 DEF_MOJIKYO_PJ (12);
4056 DEF_MOJIKYO_PJ (13);
4057 DEF_MOJIKYO_PJ (14);
4058 DEF_MOJIKYO_PJ (15);
4059 DEF_MOJIKYO_PJ (16);
4060 DEF_MOJIKYO_PJ (17);
4061 DEF_MOJIKYO_PJ (18);
4062 DEF_MOJIKYO_PJ (19);
4063 DEF_MOJIKYO_PJ (20);
4064 DEF_MOJIKYO_PJ (21);
4066 staticpro (&Vcharset_ethiopic_ucs);
4067 Vcharset_ethiopic_ucs =
4068 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
4069 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4070 build_string ("Ethiopic (UCS)"),
4071 build_string ("Ethiopic (UCS)"),
4072 build_string ("Ethiopic of UCS"),
4073 build_string ("Ethiopic-Unicode"),
4074 Qnil, 0x1200, 0x137F, 0x1200, 0);
4076 staticpro (&Vcharset_chinese_big5_1);
4077 Vcharset_chinese_big5_1 =
4078 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
4079 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
4080 build_string ("Big5"),
4081 build_string ("Big5 (Level-1)"),
4083 ("Big5 Level-1 Chinese traditional"),
4084 build_string ("big5"),
4086 staticpro (&Vcharset_chinese_big5_2);
4087 Vcharset_chinese_big5_2 =
4088 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
4089 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
4090 build_string ("Big5"),
4091 build_string ("Big5 (Level-2)"),
4093 ("Big5 Level-2 Chinese traditional"),
4094 build_string ("big5"),
4097 #ifdef ENABLE_COMPOSITE_CHARS
4098 /* #### For simplicity, we put composite chars into a 96x96 charset.
4099 This is going to lead to problems because you can run out of
4100 room, esp. as we don't yet recycle numbers. */
4101 staticpro (&Vcharset_composite);
4102 Vcharset_composite =
4103 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
4104 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4105 build_string ("Composite"),
4106 build_string ("Composite characters"),
4107 build_string ("Composite characters"),
4110 /* #### not dumped properly */
4111 composite_char_row_next = 32;
4112 composite_char_col_next = 32;
4114 Vcomposite_char_string2char_hash_table =
4115 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
4116 Vcomposite_char_char2string_hash_table =
4117 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
4118 staticpro (&Vcomposite_char_string2char_hash_table);
4119 staticpro (&Vcomposite_char_char2string_hash_table);
4120 #endif /* ENABLE_COMPOSITE_CHARS */