1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_cns;
70 Lisp_Object Vcharset_ucs_big5;
71 Lisp_Object Vcharset_latin_viscii;
72 Lisp_Object Vcharset_latin_tcvn5712;
73 Lisp_Object Vcharset_latin_viscii_lower;
74 Lisp_Object Vcharset_latin_viscii_upper;
75 Lisp_Object Vcharset_chinese_big5;
76 Lisp_Object Vcharset_chinese_big5_cdp;
77 Lisp_Object Vcharset_ideograph_gt;
78 Lisp_Object Vcharset_ideograph_gt_pj_1;
79 Lisp_Object Vcharset_ideograph_gt_pj_2;
80 Lisp_Object Vcharset_ideograph_gt_pj_3;
81 Lisp_Object Vcharset_ideograph_gt_pj_4;
82 Lisp_Object Vcharset_ideograph_gt_pj_5;
83 Lisp_Object Vcharset_ideograph_gt_pj_6;
84 Lisp_Object Vcharset_ideograph_gt_pj_7;
85 Lisp_Object Vcharset_ideograph_gt_pj_8;
86 Lisp_Object Vcharset_ideograph_gt_pj_9;
87 Lisp_Object Vcharset_ideograph_gt_pj_10;
88 Lisp_Object Vcharset_ideograph_gt_pj_11;
89 Lisp_Object Vcharset_ideograph_daikanwa;
90 Lisp_Object Vcharset_mojikyo;
91 Lisp_Object Vcharset_mojikyo_2022_1;
92 Lisp_Object Vcharset_mojikyo_pj_1;
93 Lisp_Object Vcharset_mojikyo_pj_2;
94 Lisp_Object Vcharset_mojikyo_pj_3;
95 Lisp_Object Vcharset_mojikyo_pj_4;
96 Lisp_Object Vcharset_mojikyo_pj_5;
97 Lisp_Object Vcharset_mojikyo_pj_6;
98 Lisp_Object Vcharset_mojikyo_pj_7;
99 Lisp_Object Vcharset_mojikyo_pj_8;
100 Lisp_Object Vcharset_mojikyo_pj_9;
101 Lisp_Object Vcharset_mojikyo_pj_10;
102 Lisp_Object Vcharset_mojikyo_pj_11;
103 Lisp_Object Vcharset_mojikyo_pj_12;
104 Lisp_Object Vcharset_mojikyo_pj_13;
105 Lisp_Object Vcharset_mojikyo_pj_14;
106 Lisp_Object Vcharset_mojikyo_pj_15;
107 Lisp_Object Vcharset_mojikyo_pj_16;
108 Lisp_Object Vcharset_mojikyo_pj_17;
109 Lisp_Object Vcharset_mojikyo_pj_18;
110 Lisp_Object Vcharset_mojikyo_pj_19;
111 Lisp_Object Vcharset_mojikyo_pj_20;
112 Lisp_Object Vcharset_mojikyo_pj_21;
113 Lisp_Object Vcharset_ethiopic_ucs;
115 Lisp_Object Vcharset_chinese_big5_1;
116 Lisp_Object Vcharset_chinese_big5_2;
118 #ifdef ENABLE_COMPOSITE_CHARS
119 Lisp_Object Vcharset_composite;
121 /* Hash tables for composite chars. One maps string representing
122 composed chars to their equivalent chars; one goes the
124 Lisp_Object Vcomposite_char_char2string_hash_table;
125 Lisp_Object Vcomposite_char_string2char_hash_table;
127 static int composite_char_row_next;
128 static int composite_char_col_next;
130 #endif /* ENABLE_COMPOSITE_CHARS */
132 struct charset_lookup *chlook;
134 static const struct lrecord_description charset_lookup_description_1[] = {
135 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
144 static const struct struct_description charset_lookup_description = {
145 sizeof (struct charset_lookup),
146 charset_lookup_description_1
150 /* Table of number of bytes in the string representation of a character
151 indexed by the first byte of that representation.
153 rep_bytes_by_first_byte(c) is more efficient than the equivalent
154 canonical computation:
156 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
158 const Bytecount rep_bytes_by_first_byte[0xA0] =
159 { /* 0x00 - 0x7f are for straight ASCII */
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
165 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
166 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
167 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
168 /* 0x80 - 0x8f are for Dimension-1 official charsets */
170 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
172 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
174 /* 0x90 - 0x9d are for Dimension-2 official charsets */
175 /* 0x9e is for Dimension-1 private charsets */
176 /* 0x9f is for Dimension-2 private charsets */
177 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
183 #define BT_UINT8_MIN 0
184 #define BT_UINT8_MAX (UCHAR_MAX - 3)
185 #define BT_UINT8_t (UCHAR_MAX - 2)
186 #define BT_UINT8_nil (UCHAR_MAX - 1)
187 #define BT_UINT8_unbound UCHAR_MAX
189 INLINE_HEADER int INT_UINT8_P (Lisp_Object obj);
190 INLINE_HEADER int UINT8_VALUE_P (Lisp_Object obj);
191 INLINE_HEADER unsigned char UINT8_ENCODE (Lisp_Object obj);
192 INLINE_HEADER Lisp_Object UINT8_DECODE (unsigned char n);
193 INLINE_HEADER unsigned short UINT8_TO_UINT16 (unsigned char n);
196 INT_UINT8_P (Lisp_Object obj)
200 int num = XINT (obj);
202 return (BT_UINT8_MIN <= num) && (num <= BT_UINT8_MAX);
209 UINT8_VALUE_P (Lisp_Object obj)
211 return EQ (obj, Qunbound)
212 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT8_P (obj);
215 INLINE_HEADER unsigned char
216 UINT8_ENCODE (Lisp_Object obj)
218 if (EQ (obj, Qunbound))
219 return BT_UINT8_unbound;
220 else if (EQ (obj, Qnil))
222 else if (EQ (obj, Qt))
228 INLINE_HEADER Lisp_Object
229 UINT8_DECODE (unsigned char n)
231 if (n == BT_UINT8_unbound)
233 else if (n == BT_UINT8_nil)
235 else if (n == BT_UINT8_t)
242 mark_uint8_byte_table (Lisp_Object obj)
248 print_uint8_byte_table (Lisp_Object obj,
249 Lisp_Object printcharfun, int escapeflag)
251 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
253 struct gcpro gcpro1, gcpro2;
254 GCPRO2 (obj, printcharfun);
256 write_c_string ("\n#<uint8-byte-table", printcharfun);
257 for (i = 0; i < 256; i++)
259 unsigned char n = bte->property[i];
261 write_c_string ("\n ", printcharfun);
262 write_c_string (" ", printcharfun);
263 if (n == BT_UINT8_unbound)
264 write_c_string ("void", printcharfun);
265 else if (n == BT_UINT8_nil)
266 write_c_string ("nil", printcharfun);
267 else if (n == BT_UINT8_t)
268 write_c_string ("t", printcharfun);
273 sprintf (buf, "%hd", n);
274 write_c_string (buf, printcharfun);
278 write_c_string (">", printcharfun);
282 uint8_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
284 Lisp_Uint8_Byte_Table *te1 = XUINT8_BYTE_TABLE (obj1);
285 Lisp_Uint8_Byte_Table *te2 = XUINT8_BYTE_TABLE (obj2);
288 for (i = 0; i < 256; i++)
289 if (te1->property[i] != te2->property[i])
295 uint8_byte_table_hash (Lisp_Object obj, int depth)
297 Lisp_Uint8_Byte_Table *te = XUINT8_BYTE_TABLE (obj);
301 for (i = 0; i < 256; i++)
302 hash = HASH2 (hash, te->property[i]);
306 DEFINE_LRECORD_IMPLEMENTATION ("uint8-byte-table", uint8_byte_table,
307 mark_uint8_byte_table,
308 print_uint8_byte_table,
309 0, uint8_byte_table_equal,
310 uint8_byte_table_hash,
311 0 /* uint8_byte_table_description */,
312 Lisp_Uint8_Byte_Table);
315 make_uint8_byte_table (unsigned char initval)
319 Lisp_Uint8_Byte_Table *cte;
321 cte = alloc_lcrecord_type (Lisp_Uint8_Byte_Table,
322 &lrecord_uint8_byte_table);
324 for (i = 0; i < 256; i++)
325 cte->property[i] = initval;
327 XSETUINT8_BYTE_TABLE (obj, cte);
332 uint8_byte_table_same_value_p (Lisp_Object obj)
334 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
335 unsigned char v0 = bte->property[0];
338 for (i = 1; i < 256; i++)
340 if (bte->property[i] != v0)
347 map_over_uint8_byte_table (Lisp_Uint8_Byte_Table *ct,
348 int (*fn) (Emchar c, Lisp_Object val, void *arg),
349 void *arg, Emchar ofs, int place)
352 int unit = 1 << (8 * place);
356 for (i = 0, retval = 0; i < 256 && retval == 0; i++)
358 if (ct->property[i] != BT_UINT8_unbound)
361 for (; c < c1 && retval == 0; c++)
362 retval = (fn) (c, UINT8_DECODE (ct->property[i]), arg);
370 #define BT_UINT16_MIN 0
371 #define BT_UINT16_MAX (USHRT_MAX - 3)
372 #define BT_UINT16_t (USHRT_MAX - 2)
373 #define BT_UINT16_nil (USHRT_MAX - 1)
374 #define BT_UINT16_unbound USHRT_MAX
376 INLINE_HEADER int INT_UINT16_P (Lisp_Object obj);
377 INLINE_HEADER int UINT16_VALUE_P (Lisp_Object obj);
378 INLINE_HEADER unsigned short UINT16_ENCODE (Lisp_Object obj);
379 INLINE_HEADER Lisp_Object UINT16_DECODE (unsigned short us);
382 INT_UINT16_P (Lisp_Object obj)
386 int num = XINT (obj);
388 return (BT_UINT16_MIN <= num) && (num <= BT_UINT16_MAX);
395 UINT16_VALUE_P (Lisp_Object obj)
397 return EQ (obj, Qunbound)
398 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT16_P (obj);
401 INLINE_HEADER unsigned short
402 UINT16_ENCODE (Lisp_Object obj)
404 if (EQ (obj, Qunbound))
405 return BT_UINT16_unbound;
406 else if (EQ (obj, Qnil))
407 return BT_UINT16_nil;
408 else if (EQ (obj, Qt))
414 INLINE_HEADER Lisp_Object
415 UINT16_DECODE (unsigned short n)
417 if (n == BT_UINT16_unbound)
419 else if (n == BT_UINT16_nil)
421 else if (n == BT_UINT16_t)
427 INLINE_HEADER unsigned short
428 UINT8_TO_UINT16 (unsigned char n)
430 if (n == BT_UINT8_unbound)
431 return BT_UINT16_unbound;
432 else if (n == BT_UINT8_nil)
433 return BT_UINT16_nil;
434 else if (n == BT_UINT8_t)
441 mark_uint16_byte_table (Lisp_Object obj)
447 print_uint16_byte_table (Lisp_Object obj,
448 Lisp_Object printcharfun, int escapeflag)
450 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
452 struct gcpro gcpro1, gcpro2;
453 GCPRO2 (obj, printcharfun);
455 write_c_string ("\n#<uint16-byte-table", printcharfun);
456 for (i = 0; i < 256; i++)
458 unsigned short n = bte->property[i];
460 write_c_string ("\n ", printcharfun);
461 write_c_string (" ", printcharfun);
462 if (n == BT_UINT16_unbound)
463 write_c_string ("void", printcharfun);
464 else if (n == BT_UINT16_nil)
465 write_c_string ("nil", printcharfun);
466 else if (n == BT_UINT16_t)
467 write_c_string ("t", printcharfun);
472 sprintf (buf, "%hd", n);
473 write_c_string (buf, printcharfun);
477 write_c_string (">", printcharfun);
481 uint16_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
483 Lisp_Uint16_Byte_Table *te1 = XUINT16_BYTE_TABLE (obj1);
484 Lisp_Uint16_Byte_Table *te2 = XUINT16_BYTE_TABLE (obj2);
487 for (i = 0; i < 256; i++)
488 if (te1->property[i] != te2->property[i])
494 uint16_byte_table_hash (Lisp_Object obj, int depth)
496 Lisp_Uint16_Byte_Table *te = XUINT16_BYTE_TABLE (obj);
500 for (i = 0; i < 256; i++)
501 hash = HASH2 (hash, te->property[i]);
505 DEFINE_LRECORD_IMPLEMENTATION ("uint16-byte-table", uint16_byte_table,
506 mark_uint16_byte_table,
507 print_uint16_byte_table,
508 0, uint16_byte_table_equal,
509 uint16_byte_table_hash,
510 0 /* uint16_byte_table_description */,
511 Lisp_Uint16_Byte_Table);
514 make_uint16_byte_table (unsigned short initval)
518 Lisp_Uint16_Byte_Table *cte;
520 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
521 &lrecord_uint16_byte_table);
523 for (i = 0; i < 256; i++)
524 cte->property[i] = initval;
526 XSETUINT16_BYTE_TABLE (obj, cte);
531 expand_uint8_byte_table_to_uint16 (Lisp_Object table)
535 Lisp_Uint8_Byte_Table* bte = XUINT8_BYTE_TABLE(table);
536 Lisp_Uint16_Byte_Table* cte;
538 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
539 &lrecord_uint16_byte_table);
540 for (i = 0; i < 256; i++)
542 cte->property[i] = UINT8_TO_UINT16 (bte->property[i]);
544 XSETUINT16_BYTE_TABLE (obj, cte);
549 uint16_byte_table_same_value_p (Lisp_Object obj)
551 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
552 unsigned short v0 = bte->property[0];
555 for (i = 1; i < 256; i++)
557 if (bte->property[i] != v0)
564 map_over_uint16_byte_table (Lisp_Uint16_Byte_Table *ct,
565 int (*fn) (Emchar c, Lisp_Object val, void *arg),
566 void *arg, Emchar ofs, int place)
569 int unit = 1 << (8 * place);
573 for (i = 0, retval = 0; i < 256 && retval == 0; i++)
575 if (ct->property[i] != BT_UINT16_unbound)
578 for (; c < c1 && retval == 0; c++)
579 retval = (fn) (c, UINT16_DECODE (ct->property[i]), arg);
589 mark_byte_table (Lisp_Object obj)
591 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
594 for (i = 0; i < 256; i++)
596 mark_object (cte->property[i]);
602 print_byte_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
604 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
606 struct gcpro gcpro1, gcpro2;
607 GCPRO2 (obj, printcharfun);
609 write_c_string ("\n#<byte-table", printcharfun);
610 for (i = 0; i < 256; i++)
612 Lisp_Object elt = bte->property[i];
614 write_c_string ("\n ", printcharfun);
615 write_c_string (" ", printcharfun);
616 if (EQ (elt, Qunbound))
617 write_c_string ("void", printcharfun);
619 print_internal (elt, printcharfun, escapeflag);
622 write_c_string (">", printcharfun);
626 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
628 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
629 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
632 for (i = 0; i < 256; i++)
633 if (BYTE_TABLE_P (cte1->property[i]))
635 if (BYTE_TABLE_P (cte2->property[i]))
637 if (!byte_table_equal (cte1->property[i],
638 cte2->property[i], depth + 1))
645 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
651 byte_table_hash (Lisp_Object obj, int depth)
653 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
655 return internal_array_hash (cte->property, 256, depth);
658 static const struct lrecord_description byte_table_description[] = {
659 { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Byte_Table, property), 256 },
663 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
668 byte_table_description,
672 make_byte_table (Lisp_Object initval)
676 Lisp_Byte_Table *cte;
678 cte = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
680 for (i = 0; i < 256; i++)
681 cte->property[i] = initval;
683 XSETBYTE_TABLE (obj, cte);
688 byte_table_same_value_p (Lisp_Object obj)
690 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
691 Lisp_Object v0 = bte->property[0];
694 for (i = 1; i < 256; i++)
696 if (!internal_equal (bte->property[i], v0, 0))
703 map_over_byte_table (Lisp_Byte_Table *ct,
704 int (*fn) (Emchar c, Lisp_Object val, void *arg),
705 void *arg, Emchar ofs, int place)
709 int unit = 1 << (8 * place);
712 for (i = 0, retval = 0; i < 256 && retval == 0; i++)
715 if (UINT8_BYTE_TABLE_P (v))
718 = map_over_uint8_byte_table (XUINT8_BYTE_TABLE(v),
719 fn, arg, c, place - 1);
722 else if (UINT16_BYTE_TABLE_P (v))
725 = map_over_uint16_byte_table (XUINT16_BYTE_TABLE(v),
726 fn, arg, c, place - 1);
729 else if (BYTE_TABLE_P (v))
731 retval = map_over_byte_table (XBYTE_TABLE(v),
732 fn, arg, c, place - 1);
735 else if (!UNBOUNDP (v))
737 Emchar c1 = c + unit;
739 for (; c < c1 && retval == 0; c++)
740 retval = (fn) (c, v, arg);
749 Lisp_Object get_byte_table (Lisp_Object table, unsigned char idx);
750 Lisp_Object put_byte_table (Lisp_Object table, unsigned char idx,
754 get_byte_table (Lisp_Object table, unsigned char idx)
756 if (UINT8_BYTE_TABLE_P (table))
757 return UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[idx]);
758 else if (UINT16_BYTE_TABLE_P (table))
759 return UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[idx]);
760 else if (BYTE_TABLE_P (table))
761 return XBYTE_TABLE(table)->property[idx];
767 put_byte_table (Lisp_Object table, unsigned char idx, Lisp_Object value)
769 if (UINT8_BYTE_TABLE_P (table))
771 if (UINT8_VALUE_P (value))
773 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
774 if (!UINT8_BYTE_TABLE_P (value) &&
775 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
776 && uint8_byte_table_same_value_p (table))
781 else if (UINT16_VALUE_P (value))
783 Lisp_Object new = expand_uint8_byte_table_to_uint16 (table);
785 XUINT16_BYTE_TABLE(new)->property[idx] = UINT16_ENCODE (value);
790 Lisp_Object new = make_byte_table (Qnil);
793 for (i = 0; i < 256; i++)
795 XBYTE_TABLE(new)->property[i]
796 = UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[i]);
798 XBYTE_TABLE(new)->property[idx] = value;
802 else if (UINT16_BYTE_TABLE_P (table))
804 if (UINT16_VALUE_P (value))
806 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
807 if (!UINT8_BYTE_TABLE_P (value) &&
808 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
809 && uint16_byte_table_same_value_p (table))
816 Lisp_Object new = make_byte_table (Qnil);
819 for (i = 0; i < 256; i++)
821 XBYTE_TABLE(new)->property[i]
822 = UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[i]);
824 XBYTE_TABLE(new)->property[idx] = value;
828 else if (BYTE_TABLE_P (table))
830 XBYTE_TABLE(table)->property[idx] = value;
831 if (!UINT8_BYTE_TABLE_P (value) &&
832 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
833 && byte_table_same_value_p (table))
838 else if (!internal_equal (table, value, 0))
840 if (UINT8_VALUE_P (table) && UINT8_VALUE_P (value))
842 table = make_uint8_byte_table (UINT8_ENCODE (table));
843 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
845 else if (UINT16_VALUE_P (table) && UINT16_VALUE_P (value))
847 table = make_uint16_byte_table (UINT16_ENCODE (table));
848 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
852 table = make_byte_table (table);
853 XBYTE_TABLE(table)->property[idx] = value;
860 mark_char_id_table (Lisp_Object obj)
862 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
868 print_char_id_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
870 Lisp_Object table = XCHAR_ID_TABLE (obj)->table;
872 struct gcpro gcpro1, gcpro2;
873 GCPRO2 (obj, printcharfun);
875 write_c_string ("#<char-id-table ", printcharfun);
876 for (i = 0; i < 256; i++)
878 Lisp_Object elt = get_byte_table (table, i);
879 if (i != 0) write_c_string ("\n ", printcharfun);
880 if (EQ (elt, Qunbound))
881 write_c_string ("void", printcharfun);
883 print_internal (elt, printcharfun, escapeflag);
886 write_c_string (">", printcharfun);
890 char_id_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
892 Lisp_Object table1 = XCHAR_ID_TABLE (obj1)->table;
893 Lisp_Object table2 = XCHAR_ID_TABLE (obj2)->table;
896 for (i = 0; i < 256; i++)
898 if (!internal_equal (get_byte_table (table1, i),
899 get_byte_table (table2, i), 0))
906 char_id_table_hash (Lisp_Object obj, int depth)
908 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
910 return char_id_table_hash (cte->table, depth + 1);
913 static const struct lrecord_description char_id_table_description[] = {
914 { XD_LISP_OBJECT, offsetof(Lisp_Char_ID_Table, table) },
918 DEFINE_LRECORD_IMPLEMENTATION ("char-id-table", char_id_table,
921 0, char_id_table_equal,
923 char_id_table_description,
927 make_char_id_table (Lisp_Object initval)
930 Lisp_Char_ID_Table *cte;
932 cte = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
934 cte->table = make_byte_table (initval);
936 XSETCHAR_ID_TABLE (obj, cte);
942 get_char_id_table (Emchar ch, Lisp_Object table)
944 unsigned int code = ch;
951 (XCHAR_ID_TABLE (table)->table,
952 (unsigned char)(code >> 24)),
953 (unsigned char) (code >> 16)),
954 (unsigned char) (code >> 8)),
955 (unsigned char) code);
958 void put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table);
960 put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table)
962 unsigned int code = ch;
963 Lisp_Object table1, table2, table3, table4;
965 table1 = XCHAR_ID_TABLE (table)->table;
966 table2 = get_byte_table (table1, (unsigned char)(code >> 24));
967 table3 = get_byte_table (table2, (unsigned char)(code >> 16));
968 table4 = get_byte_table (table3, (unsigned char)(code >> 8));
970 table4 = put_byte_table (table4, (unsigned char)code, value);
971 table3 = put_byte_table (table3, (unsigned char)(code >> 8), table4);
972 table2 = put_byte_table (table2, (unsigned char)(code >> 16), table3);
973 XCHAR_ID_TABLE (table)->table
974 = put_byte_table (table1, (unsigned char)(code >> 24), table2);
977 /* Map FN (with client data ARG) in char table CT.
978 Mapping stops the first time FN returns non-zero, and that value
979 becomes the return value of map_char_id_table(). */
981 map_char_id_table (Lisp_Char_ID_Table *ct,
982 int (*fn) (Emchar c, Lisp_Object val, void *arg),
985 map_char_id_table (Lisp_Char_ID_Table *ct,
986 int (*fn) (Emchar c, Lisp_Object val, void *arg),
989 Lisp_Object v = ct->table;
991 if (UINT8_BYTE_TABLE_P (v))
992 return map_over_uint8_byte_table (XUINT8_BYTE_TABLE(v), fn, arg, 0, 3);
993 else if (UINT16_BYTE_TABLE_P (v))
994 return map_over_uint16_byte_table (XUINT16_BYTE_TABLE(v), fn, arg, 0, 3);
995 else if (BYTE_TABLE_P (v))
996 return map_over_byte_table (XBYTE_TABLE(v), fn, arg, 0, 3);
997 else if (!UNBOUNDP (v))
1001 Emchar c1 = c + unit;
1004 for (retval = 0; c < c1 && retval == 0; c++)
1005 retval = (fn) (c, v, arg);
1010 struct slow_map_char_id_table_arg
1012 Lisp_Object function;
1017 slow_map_char_id_table_fun (Emchar c, Lisp_Object val, void *arg)
1019 struct slow_map_char_id_table_arg *closure =
1020 (struct slow_map_char_id_table_arg *) arg;
1022 closure->retval = call2 (closure->function, make_char (c), val);
1023 return !NILP (closure->retval);
1027 Lisp_Object Vchar_attribute_hash_table;
1028 Lisp_Object Vcharacter_composition_table;
1029 Lisp_Object Vcharacter_variant_table;
1031 Lisp_Object Qideograph_daikanwa;
1032 Lisp_Object Q_decomposition;
1034 Lisp_Object Qto_ucs;
1036 Lisp_Object Qcompat;
1037 Lisp_Object Qisolated;
1038 Lisp_Object Qinitial;
1039 Lisp_Object Qmedial;
1041 Lisp_Object Qvertical;
1042 Lisp_Object QnoBreak;
1043 Lisp_Object Qfraction;
1046 Lisp_Object Qcircle;
1047 Lisp_Object Qsquare;
1049 Lisp_Object Qnarrow;
1053 Emchar to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
1055 Lisp_Object put_char_ccs_code_point (Lisp_Object character,
1056 Lisp_Object ccs, Lisp_Object value);
1057 Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
1060 to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
1066 else if (EQ (v, Qcompat))
1068 else if (EQ (v, Qisolated))
1070 else if (EQ (v, Qinitial))
1072 else if (EQ (v, Qmedial))
1074 else if (EQ (v, Qfinal))
1076 else if (EQ (v, Qvertical))
1078 else if (EQ (v, QnoBreak))
1080 else if (EQ (v, Qfraction))
1082 else if (EQ (v, Qsuper))
1084 else if (EQ (v, Qsub))
1086 else if (EQ (v, Qcircle))
1088 else if (EQ (v, Qsquare))
1090 else if (EQ (v, Qwide))
1092 else if (EQ (v, Qnarrow))
1094 else if (EQ (v, Qsmall))
1096 else if (EQ (v, Qfont))
1099 signal_simple_error (err_msg, err_arg);
1102 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
1103 Return character corresponding with list.
1107 Lisp_Object table = Vcharacter_composition_table;
1108 Lisp_Object rest = list;
1110 while (CONSP (rest))
1112 Lisp_Object v = Fcar (rest);
1114 Emchar c = to_char_id (v, "Invalid value for composition", list);
1116 ret = get_char_id_table (c, table);
1121 if (!CHAR_ID_TABLE_P (ret))
1126 else if (!CONSP (rest))
1128 else if (CHAR_ID_TABLE_P (ret))
1131 signal_simple_error ("Invalid table is found with", list);
1133 signal_simple_error ("Invalid value for composition", list);
1136 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
1137 Return variants of CHARACTER.
1141 CHECK_CHAR (character);
1142 return Fcopy_list (get_char_id_table (XCHAR (character),
1143 Vcharacter_variant_table));
1147 /* We store the char-attributes in hash tables with the names as the
1148 key and the actual char-id-table object as the value. Occasionally
1149 we need to use them in a list format. These routines provide us
1151 struct char_attribute_list_closure
1153 Lisp_Object *char_attribute_list;
1157 add_char_attribute_to_list_mapper (Lisp_Object key, Lisp_Object value,
1158 void *char_attribute_list_closure)
1160 /* This function can GC */
1161 struct char_attribute_list_closure *calcl
1162 = (struct char_attribute_list_closure*) char_attribute_list_closure;
1163 Lisp_Object *char_attribute_list = calcl->char_attribute_list;
1165 *char_attribute_list = Fcons (key, *char_attribute_list);
1169 DEFUN ("char-attribute-list", Fchar_attribute_list, 0, 0, 0, /*
1170 Return the list of all existing character attributes except coded-charsets.
1174 Lisp_Object char_attribute_list = Qnil;
1175 struct gcpro gcpro1;
1176 struct char_attribute_list_closure char_attribute_list_closure;
1178 GCPRO1 (char_attribute_list);
1179 char_attribute_list_closure.char_attribute_list = &char_attribute_list;
1180 elisp_maphash (add_char_attribute_to_list_mapper,
1181 Vchar_attribute_hash_table,
1182 &char_attribute_list_closure);
1184 return char_attribute_list;
1187 DEFUN ("find-char-attribute-table", Ffind_char_attribute_table, 1, 1, 0, /*
1188 Return char-id-table corresponding to ATTRIBUTE.
1192 return Fgethash (attribute, Vchar_attribute_hash_table, Qnil);
1196 /* We store the char-id-tables in hash tables with the attributes as
1197 the key and the actual char-id-table object as the value. Each
1198 char-id-table stores values of an attribute corresponding with
1199 characters. Occasionally we need to get attributes of a character
1200 in a association-list format. These routines provide us with
1202 struct char_attribute_alist_closure
1205 Lisp_Object *char_attribute_alist;
1209 add_char_attribute_alist_mapper (Lisp_Object key, Lisp_Object value,
1210 void *char_attribute_alist_closure)
1212 /* This function can GC */
1213 struct char_attribute_alist_closure *caacl =
1214 (struct char_attribute_alist_closure*) char_attribute_alist_closure;
1215 Lisp_Object ret = get_char_id_table (caacl->char_id, value);
1216 if (!UNBOUNDP (ret))
1218 Lisp_Object *char_attribute_alist = caacl->char_attribute_alist;
1219 *char_attribute_alist
1220 = Fcons (Fcons (key, ret), *char_attribute_alist);
1225 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
1226 Return the alist of attributes of CHARACTER.
1230 Lisp_Object alist = Qnil;
1233 CHECK_CHAR (character);
1235 struct gcpro gcpro1;
1236 struct char_attribute_alist_closure char_attribute_alist_closure;
1239 char_attribute_alist_closure.char_id = XCHAR (character);
1240 char_attribute_alist_closure.char_attribute_alist = &alist;
1241 elisp_maphash (add_char_attribute_alist_mapper,
1242 Vchar_attribute_hash_table,
1243 &char_attribute_alist_closure);
1247 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1249 Lisp_Object ccs = chlook->charset_by_leading_byte[i];
1253 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1256 if ( CHAR_ID_TABLE_P (encoding_table)
1257 && INTP (cpos = get_char_id_table (XCHAR (character),
1260 alist = Fcons (Fcons (ccs, cpos), alist);
1267 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 3, 0, /*
1268 Return the value of CHARACTER's ATTRIBUTE.
1269 Return DEFAULT-VALUE if the value is not exist.
1271 (character, attribute, default_value))
1275 CHECK_CHAR (character);
1276 if (!NILP (ccs = Ffind_charset (attribute)))
1278 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1280 if (CHAR_ID_TABLE_P (encoding_table))
1281 return get_char_id_table (XCHAR (character), encoding_table);
1285 Lisp_Object table = Fgethash (attribute,
1286 Vchar_attribute_hash_table,
1288 if (!UNBOUNDP (table))
1290 Lisp_Object ret = get_char_id_table (XCHAR (character), table);
1291 if (!UNBOUNDP (ret))
1295 return default_value;
1298 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
1299 Store CHARACTER's ATTRIBUTE with VALUE.
1301 (character, attribute, value))
1305 CHECK_CHAR (character);
1306 ccs = Ffind_charset (attribute);
1309 return put_char_ccs_code_point (character, ccs, value);
1311 else if (EQ (attribute, Q_decomposition))
1316 signal_simple_error ("Invalid value for ->decomposition",
1319 if (CONSP (Fcdr (value)))
1321 Lisp_Object rest = value;
1322 Lisp_Object table = Vcharacter_composition_table;
1326 GET_EXTERNAL_LIST_LENGTH (rest, len);
1327 seq = make_vector (len, Qnil);
1329 while (CONSP (rest))
1331 Lisp_Object v = Fcar (rest);
1334 = to_char_id (v, "Invalid value for ->decomposition", value);
1337 XVECTOR_DATA(seq)[i++] = v;
1339 XVECTOR_DATA(seq)[i++] = make_char (c);
1343 put_char_id_table (c, character, table);
1348 ntable = get_char_id_table (c, table);
1349 if (!CHAR_ID_TABLE_P (ntable))
1351 ntable = make_char_id_table (Qnil);
1352 put_char_id_table (c, ntable, table);
1360 Lisp_Object v = Fcar (value);
1364 Emchar c = XINT (v);
1366 = get_char_id_table (c, Vcharacter_variant_table);
1368 if (NILP (Fmemq (v, ret)))
1370 put_char_id_table (c, Fcons (character, ret),
1371 Vcharacter_variant_table);
1374 seq = make_vector (1, v);
1378 else if (EQ (attribute, Qto_ucs) || EQ (attribute, Q_ucs))
1384 signal_simple_error ("Invalid value for ->ucs", value);
1388 ret = get_char_id_table (c, Vcharacter_variant_table);
1389 if (NILP (Fmemq (character, ret)))
1391 put_char_id_table (c, Fcons (character, ret),
1392 Vcharacter_variant_table);
1395 if (EQ (attribute, Q_ucs))
1396 attribute = Qto_ucs;
1400 Lisp_Object table = Fgethash (attribute,
1401 Vchar_attribute_hash_table,
1406 table = make_char_id_table (Qunbound);
1407 Fputhash (attribute, table, Vchar_attribute_hash_table);
1409 put_char_id_table (XCHAR (character), value, table);
1414 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
1415 Remove CHARACTER's ATTRIBUTE.
1417 (character, attribute))
1421 CHECK_CHAR (character);
1422 ccs = Ffind_charset (attribute);
1425 return remove_char_ccs (character, ccs);
1429 Lisp_Object table = Fgethash (attribute,
1430 Vchar_attribute_hash_table,
1432 if (!UNBOUNDP (table))
1434 put_char_id_table (XCHAR (character), Qunbound, table);
1441 DEFUN ("map-char-attribute", Fmap_char_attribute, 2, 2, 0, /*
1442 Map FUNCTION over entries in ATTRIBUTE, calling it with two args,
1443 each key and value in the table.
1445 (function, attribute))
1448 Lisp_Char_ID_Table *ct;
1449 struct slow_map_char_id_table_arg slarg;
1450 struct gcpro gcpro1, gcpro2;
1452 if (!NILP (ccs = Ffind_charset (attribute)))
1454 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1456 if (CHAR_ID_TABLE_P (encoding_table))
1457 ct = XCHAR_ID_TABLE (encoding_table);
1463 Lisp_Object table = Fgethash (attribute,
1464 Vchar_attribute_hash_table,
1466 if (CHAR_ID_TABLE_P (table))
1467 ct = XCHAR_ID_TABLE (table);
1471 slarg.function = function;
1472 slarg.retval = Qnil;
1473 GCPRO2 (slarg.function, slarg.retval);
1474 map_char_id_table (ct, slow_map_char_id_table_fun, &slarg);
1477 return slarg.retval;
1480 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
1482 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
1484 /* ad-hoc method for `ascii' */
1485 if ((CHARSET_CHARS (cs) == 94) &&
1486 (CHARSET_BYTE_OFFSET (cs) != 33))
1487 return 128 - CHARSET_BYTE_OFFSET (cs);
1489 return CHARSET_CHARS (cs);
1492 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
1494 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
1496 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
1500 if (XVECTOR_LENGTH (v) > ccs_len)
1503 for (i = 0; i < XVECTOR_LENGTH (v); i++)
1505 Lisp_Object c = XVECTOR_DATA(v)[i];
1507 if (!NILP (c) && !CHARP (c))
1511 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
1523 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1526 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1536 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1537 nv = XVECTOR_DATA(v)[i];
1543 XVECTOR_DATA(v)[i] = Qnil;
1547 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1548 int code_point, Lisp_Object character);
1550 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1551 int code_point, Lisp_Object character)
1555 int ccs_len = XVECTOR_LENGTH (v);
1560 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1561 nv = XVECTOR_DATA(v)[i];
1565 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
1571 XVECTOR_DATA(v)[i] = character;
1575 put_char_ccs_code_point (Lisp_Object character,
1576 Lisp_Object ccs, Lisp_Object value)
1578 Lisp_Object encoding_table;
1580 if (!EQ (XCHARSET_NAME (ccs), Qucs)
1581 || (XCHAR (character) != XINT (value)))
1583 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
1584 int dim = XCHARSET_DIMENSION (ccs);
1585 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
1586 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
1590 { /* obsolete representation: value must be a list of bytes */
1591 Lisp_Object ret = Fcar (value);
1595 signal_simple_error ("Invalid value for coded-charset", value);
1596 code_point = XINT (ret);
1597 if (XCHARSET_GRAPHIC (ccs) == 1)
1599 rest = Fcdr (value);
1600 while (!NILP (rest))
1605 signal_simple_error ("Invalid value for coded-charset",
1609 signal_simple_error ("Invalid value for coded-charset",
1612 if (XCHARSET_GRAPHIC (ccs) == 1)
1614 code_point = (code_point << 8) | j;
1617 value = make_int (code_point);
1619 else if (INTP (value))
1621 code_point = XINT (value);
1622 if (XCHARSET_GRAPHIC (ccs) == 1)
1624 code_point &= 0x7F7F7F7F;
1625 value = make_int (code_point);
1629 signal_simple_error ("Invalid value for coded-charset", value);
1633 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1636 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
1641 XCHARSET_DECODING_TABLE (ccs)
1642 = v = make_older_vector (ccs_len, Qnil);
1645 decoding_table_put_char (v, dim, byte_offset, code_point, character);
1647 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
1649 XCHARSET_ENCODING_TABLE (ccs)
1650 = encoding_table = make_char_id_table (Qnil);
1652 put_char_id_table (XCHAR (character), value, encoding_table);
1657 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
1659 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1660 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1662 if (VECTORP (decoding_table))
1664 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1668 decoding_table_remove_char (decoding_table,
1669 XCHARSET_DIMENSION (ccs),
1670 XCHARSET_BYTE_OFFSET (ccs),
1674 if (CHAR_ID_TABLE_P (encoding_table))
1676 put_char_id_table (XCHAR (character), Qnil, encoding_table);
1681 EXFUN (Fmake_char, 3);
1682 EXFUN (Fdecode_char, 2);
1684 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
1685 Store character's ATTRIBUTES.
1689 Lisp_Object rest = attributes;
1690 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
1691 Lisp_Object character;
1695 while (CONSP (rest))
1697 Lisp_Object cell = Fcar (rest);
1701 signal_simple_error ("Invalid argument", attributes);
1702 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
1703 && ((XCHARSET_FINAL (ccs) != 0) ||
1704 (XCHARSET_UCS_MAX (ccs) > 0)) )
1708 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1710 character = Fdecode_char (ccs, cell);
1711 if (!NILP (character))
1712 goto setup_attributes;
1716 if ( (!NILP (code = Fcdr (Fassq (Qto_ucs, attributes)))) ||
1717 (!NILP (code = Fcdr (Fassq (Q_ucs, attributes)))) )
1721 signal_simple_error ("Invalid argument", attributes);
1723 character = make_char (XINT (code) + 0x100000);
1724 goto setup_attributes;
1728 else if (!INTP (code))
1729 signal_simple_error ("Invalid argument", attributes);
1731 character = make_char (XINT (code));
1735 while (CONSP (rest))
1737 Lisp_Object cell = Fcar (rest);
1740 signal_simple_error ("Invalid argument", attributes);
1742 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
1748 DEFUN ("find-char", Ffind_char, 1, 1, 0, /*
1749 Retrieve the character of the given ATTRIBUTES.
1753 Lisp_Object rest = attributes;
1756 while (CONSP (rest))
1758 Lisp_Object cell = Fcar (rest);
1762 signal_simple_error ("Invalid argument", attributes);
1763 if (!NILP (ccs = Ffind_charset (Fcar (cell))))
1767 return Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1769 return Fdecode_char (ccs, cell);
1773 if ( (!NILP (code = Fcdr (Fassq (Qto_ucs, attributes)))) ||
1774 (!NILP (code = Fcdr (Fassq (Q_ucs, attributes)))) )
1777 signal_simple_error ("Invalid argument", attributes);
1779 return make_char (XINT (code) + 0x100000);
1784 Lisp_Object Vutf_2000_version;
1788 int leading_code_private_11;
1791 Lisp_Object Qcharsetp;
1793 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1794 Lisp_Object Qregistry, Qfinal, Qgraphic;
1795 Lisp_Object Qdirection;
1796 Lisp_Object Qreverse_direction_charset;
1797 Lisp_Object Qleading_byte;
1798 Lisp_Object Qshort_name, Qlong_name;
1812 Qcyrillic_iso8859_5,
1814 Qjapanese_jisx0208_1978,
1818 Qjapanese_jisx0208_1990,
1821 Qchinese_cns11643_1,
1822 Qchinese_cns11643_2,
1829 Qlatin_viscii_lower,
1830 Qlatin_viscii_upper,
1831 Qvietnamese_viscii_lower,
1832 Qvietnamese_viscii_upper,
1845 Qideograph_gt_pj_10,
1846 Qideograph_gt_pj_11,
1876 Lisp_Object Ql2r, Qr2l;
1878 Lisp_Object Vcharset_hash_table;
1880 /* Composite characters are characters constructed by overstriking two
1881 or more regular characters.
1883 1) The old Mule implementation involves storing composite characters
1884 in a buffer as a tag followed by all of the actual characters
1885 used to make up the composite character. I think this is a bad
1886 idea; it greatly complicates code that wants to handle strings
1887 one character at a time because it has to deal with the possibility
1888 of great big ungainly characters. It's much more reasonable to
1889 simply store an index into a table of composite characters.
1891 2) The current implementation only allows for 16,384 separate
1892 composite characters over the lifetime of the XEmacs process.
1893 This could become a potential problem if the user
1894 edited lots of different files that use composite characters.
1895 Due to FSF bogosity, increasing the number of allowable
1896 composite characters under Mule would decrease the number
1897 of possible faces that can exist. Mule already has shrunk
1898 this to 2048, and further shrinkage would become uncomfortable.
1899 No such problems exist in XEmacs.
1901 Composite characters could be represented as 0x80 C1 C2 C3,
1902 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1903 for slightly under 2^20 (one million) composite characters
1904 over the XEmacs process lifetime, and you only need to
1905 increase the size of a Mule character from 19 to 21 bits.
1906 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1907 85 million (slightly over 2^26) composite characters. */
1910 /************************************************************************/
1911 /* Basic Emchar functions */
1912 /************************************************************************/
1914 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1915 string in STR. Returns the number of bytes stored.
1916 Do not call this directly. Use the macro set_charptr_emchar() instead.
1920 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1926 Lisp_Object charset;
1935 else if ( c <= 0x7ff )
1937 *p++ = (c >> 6) | 0xc0;
1938 *p++ = (c & 0x3f) | 0x80;
1940 else if ( c <= 0xffff )
1942 *p++ = (c >> 12) | 0xe0;
1943 *p++ = ((c >> 6) & 0x3f) | 0x80;
1944 *p++ = (c & 0x3f) | 0x80;
1946 else if ( c <= 0x1fffff )
1948 *p++ = (c >> 18) | 0xf0;
1949 *p++ = ((c >> 12) & 0x3f) | 0x80;
1950 *p++ = ((c >> 6) & 0x3f) | 0x80;
1951 *p++ = (c & 0x3f) | 0x80;
1953 else if ( c <= 0x3ffffff )
1955 *p++ = (c >> 24) | 0xf8;
1956 *p++ = ((c >> 18) & 0x3f) | 0x80;
1957 *p++ = ((c >> 12) & 0x3f) | 0x80;
1958 *p++ = ((c >> 6) & 0x3f) | 0x80;
1959 *p++ = (c & 0x3f) | 0x80;
1963 *p++ = (c >> 30) | 0xfc;
1964 *p++ = ((c >> 24) & 0x3f) | 0x80;
1965 *p++ = ((c >> 18) & 0x3f) | 0x80;
1966 *p++ = ((c >> 12) & 0x3f) | 0x80;
1967 *p++ = ((c >> 6) & 0x3f) | 0x80;
1968 *p++ = (c & 0x3f) | 0x80;
1971 BREAKUP_CHAR (c, charset, c1, c2);
1972 lb = CHAR_LEADING_BYTE (c);
1973 if (LEADING_BYTE_PRIVATE_P (lb))
1974 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1976 if (EQ (charset, Vcharset_control_1))
1985 /* Return the first character from a Mule-encoded string in STR,
1986 assuming it's non-ASCII. Do not call this directly.
1987 Use the macro charptr_emchar() instead. */
1990 non_ascii_charptr_emchar (const Bufbyte *str)
2003 else if ( b >= 0xf8 )
2008 else if ( b >= 0xf0 )
2013 else if ( b >= 0xe0 )
2018 else if ( b >= 0xc0 )
2028 for( ; len > 0; len-- )
2031 ch = ( ch << 6 ) | ( b & 0x3f );
2035 Bufbyte i0 = *str, i1, i2 = 0;
2036 Lisp_Object charset;
2038 if (i0 == LEADING_BYTE_CONTROL_1)
2039 return (Emchar) (*++str - 0x20);
2041 if (LEADING_BYTE_PREFIX_P (i0))
2046 charset = CHARSET_BY_LEADING_BYTE (i0);
2047 if (XCHARSET_DIMENSION (charset) == 2)
2050 return MAKE_CHAR (charset, i1, i2);
2054 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
2055 Do not call this directly. Use the macro valid_char_p() instead. */
2059 non_ascii_valid_char_p (Emchar ch)
2063 /* Must have only lowest 19 bits set */
2067 f1 = CHAR_FIELD1 (ch);
2068 f2 = CHAR_FIELD2 (ch);
2069 f3 = CHAR_FIELD3 (ch);
2073 Lisp_Object charset;
2075 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
2076 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
2077 f2 > MAX_CHAR_FIELD2_PRIVATE)
2082 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
2083 f2 <= MAX_CHAR_FIELD2_PRIVATE))
2087 NOTE: This takes advantage of the fact that
2088 FIELD2_TO_OFFICIAL_LEADING_BYTE and
2089 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
2091 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
2092 if (EQ (charset, Qnil))
2094 return (XCHARSET_CHARS (charset) == 96);
2098 Lisp_Object charset;
2100 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
2101 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
2102 f1 > MAX_CHAR_FIELD1_PRIVATE)
2104 if (f2 < 0x20 || f3 < 0x20)
2107 #ifdef ENABLE_COMPOSITE_CHARS
2108 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
2110 if (UNBOUNDP (Fgethash (make_int (ch),
2111 Vcomposite_char_char2string_hash_table,
2116 #endif /* ENABLE_COMPOSITE_CHARS */
2118 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
2119 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
2122 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
2124 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
2127 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
2129 if (EQ (charset, Qnil))
2131 return (XCHARSET_CHARS (charset) == 96);
2137 /************************************************************************/
2138 /* Basic string functions */
2139 /************************************************************************/
2141 /* Copy the character pointed to by SRC into DST. Do not call this
2142 directly. Use the macro charptr_copy_char() instead.
2143 Return the number of bytes copied. */
2146 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
2148 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
2150 for (i = bytes; i; i--, dst++, src++)
2156 /************************************************************************/
2157 /* streams of Emchars */
2158 /************************************************************************/
2160 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
2161 The functions below are not meant to be called directly; use
2162 the macros in insdel.h. */
2165 Lstream_get_emchar_1 (Lstream *stream, int ch)
2167 Bufbyte str[MAX_EMCHAR_LEN];
2168 Bufbyte *strptr = str;
2171 str[0] = (Bufbyte) ch;
2173 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
2175 int c = Lstream_getc (stream);
2176 bufpos_checking_assert (c >= 0);
2177 *++strptr = (Bufbyte) c;
2179 return charptr_emchar (str);
2183 Lstream_fput_emchar (Lstream *stream, Emchar ch)
2185 Bufbyte str[MAX_EMCHAR_LEN];
2186 Bytecount len = set_charptr_emchar (str, ch);
2187 return Lstream_write (stream, str, len);
2191 Lstream_funget_emchar (Lstream *stream, Emchar ch)
2193 Bufbyte str[MAX_EMCHAR_LEN];
2194 Bytecount len = set_charptr_emchar (str, ch);
2195 Lstream_unread (stream, str, len);
2199 /************************************************************************/
2200 /* charset object */
2201 /************************************************************************/
2204 mark_charset (Lisp_Object obj)
2206 Lisp_Charset *cs = XCHARSET (obj);
2208 mark_object (cs->short_name);
2209 mark_object (cs->long_name);
2210 mark_object (cs->doc_string);
2211 mark_object (cs->registry);
2212 mark_object (cs->ccl_program);
2214 mark_object (cs->encoding_table);
2215 /* mark_object (cs->decoding_table); */
2221 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
2223 Lisp_Charset *cs = XCHARSET (obj);
2227 error ("printing unreadable object #<charset %s 0x%x>",
2228 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
2231 write_c_string ("#<charset ", printcharfun);
2232 print_internal (CHARSET_NAME (cs), printcharfun, 0);
2233 write_c_string (" ", printcharfun);
2234 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
2235 write_c_string (" ", printcharfun);
2236 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
2237 write_c_string (" ", printcharfun);
2238 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
2239 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
2241 CHARSET_DIMENSION (cs),
2242 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
2243 CHARSET_COLUMNS (cs),
2244 CHARSET_GRAPHIC (cs),
2245 CHARSET_FINAL (cs));
2246 write_c_string (buf, printcharfun);
2247 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
2248 sprintf (buf, " 0x%x>", cs->header.uid);
2249 write_c_string (buf, printcharfun);
2252 static const struct lrecord_description charset_description[] = {
2253 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
2254 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
2255 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
2256 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
2257 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
2258 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
2259 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
2261 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
2262 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
2267 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
2268 mark_charset, print_charset, 0, 0, 0,
2269 charset_description,
2272 /* Make a new charset. */
2273 /* #### SJT Should generic properties be allowed? */
2275 make_charset (Charset_ID id, Lisp_Object name,
2276 unsigned short chars, unsigned char dimension,
2277 unsigned char columns, unsigned char graphic,
2278 Bufbyte final, unsigned char direction, Lisp_Object short_name,
2279 Lisp_Object long_name, Lisp_Object doc,
2281 Lisp_Object decoding_table,
2282 Emchar ucs_min, Emchar ucs_max,
2283 Emchar code_offset, unsigned char byte_offset)
2286 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
2290 XSETCHARSET (obj, cs);
2292 CHARSET_ID (cs) = id;
2293 CHARSET_NAME (cs) = name;
2294 CHARSET_SHORT_NAME (cs) = short_name;
2295 CHARSET_LONG_NAME (cs) = long_name;
2296 CHARSET_CHARS (cs) = chars;
2297 CHARSET_DIMENSION (cs) = dimension;
2298 CHARSET_DIRECTION (cs) = direction;
2299 CHARSET_COLUMNS (cs) = columns;
2300 CHARSET_GRAPHIC (cs) = graphic;
2301 CHARSET_FINAL (cs) = final;
2302 CHARSET_DOC_STRING (cs) = doc;
2303 CHARSET_REGISTRY (cs) = reg;
2304 CHARSET_CCL_PROGRAM (cs) = Qnil;
2305 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
2307 CHARSET_DECODING_TABLE(cs) = Qnil;
2308 CHARSET_ENCODING_TABLE(cs) = Qnil;
2309 CHARSET_UCS_MIN(cs) = ucs_min;
2310 CHARSET_UCS_MAX(cs) = ucs_max;
2311 CHARSET_CODE_OFFSET(cs) = code_offset;
2312 CHARSET_BYTE_OFFSET(cs) = byte_offset;
2316 if (id == LEADING_BYTE_ASCII)
2317 CHARSET_REP_BYTES (cs) = 1;
2319 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
2321 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
2326 /* some charsets do not have final characters. This includes
2327 ASCII, Control-1, Composite, and the two faux private
2329 unsigned char iso2022_type
2330 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
2332 if (code_offset == 0)
2334 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
2335 chlook->charset_by_attributes[iso2022_type][final] = obj;
2339 (chlook->charset_by_attributes[iso2022_type][final][direction]));
2340 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
2344 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
2345 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
2347 /* Some charsets are "faux" and don't have names or really exist at
2348 all except in the leading-byte table. */
2350 Fputhash (name, obj, Vcharset_hash_table);
2355 get_unallocated_leading_byte (int dimension)
2360 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
2363 lb = chlook->next_allocated_leading_byte++;
2367 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
2370 lb = chlook->next_allocated_1_byte_leading_byte++;
2374 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
2377 lb = chlook->next_allocated_2_byte_leading_byte++;
2383 ("No more character sets free for this dimension",
2384 make_int (dimension));
2390 /* Number of Big5 characters which have the same code in 1st byte. */
2392 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2395 decode_builtin_char (Lisp_Object charset, int code_point)
2399 if (EQ (charset, Vcharset_chinese_big5))
2401 int c1 = code_point >> 8;
2402 int c2 = code_point & 0xFF;
2405 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
2407 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
2408 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
2410 I = (c1 - 0xA1) * BIG5_SAME_ROW
2411 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
2415 charset = Vcharset_chinese_big5_1;
2419 charset = Vcharset_chinese_big5_2;
2420 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
2422 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
2425 if ((final = XCHARSET_FINAL (charset)) >= '0')
2427 if (XCHARSET_DIMENSION (charset) == 1)
2429 switch (XCHARSET_CHARS (charset))
2433 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
2436 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
2444 switch (XCHARSET_CHARS (charset))
2447 return MIN_CHAR_94x94
2448 + (final - '0') * 94 * 94
2449 + (((code_point >> 8) & 0x7F) - 33) * 94
2450 + ((code_point & 0x7F) - 33);
2452 return MIN_CHAR_96x96
2453 + (final - '0') * 96 * 96
2454 + (((code_point >> 8) & 0x7F) - 32) * 96
2455 + ((code_point & 0x7F) - 32);
2462 else if (XCHARSET_UCS_MAX (charset))
2465 = (XCHARSET_DIMENSION (charset) == 1
2467 code_point - XCHARSET_BYTE_OFFSET (charset)
2469 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
2470 * XCHARSET_CHARS (charset)
2471 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
2472 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
2473 if ((cid < XCHARSET_UCS_MIN (charset))
2474 || (XCHARSET_UCS_MAX (charset) < cid))
2483 range_charset_code_point (Lisp_Object charset, Emchar ch)
2487 if ((XCHARSET_UCS_MIN (charset) <= ch)
2488 && (ch <= XCHARSET_UCS_MAX (charset)))
2490 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
2492 if (XCHARSET_CHARS (charset) == 256)
2494 else if (XCHARSET_DIMENSION (charset) == 1)
2495 return d + XCHARSET_BYTE_OFFSET (charset);
2496 else if (XCHARSET_DIMENSION (charset) == 2)
2498 ((d / XCHARSET_CHARS (charset)
2499 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2500 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2501 else if (XCHARSET_DIMENSION (charset) == 3)
2503 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2504 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2505 | ((d / XCHARSET_CHARS (charset)
2506 % XCHARSET_CHARS (charset)
2507 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2508 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2509 else /* if (XCHARSET_DIMENSION (charset) == 4) */
2511 ((d / (XCHARSET_CHARS (charset)
2512 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2513 + XCHARSET_BYTE_OFFSET (charset)) << 24)
2514 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2515 % XCHARSET_CHARS (charset)
2516 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2517 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
2518 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2519 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2521 else if (XCHARSET_CODE_OFFSET (charset) == 0)
2523 if (XCHARSET_DIMENSION (charset) == 1)
2525 if (XCHARSET_CHARS (charset) == 94)
2527 if (((d = ch - (MIN_CHAR_94
2528 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
2532 else if (XCHARSET_CHARS (charset) == 96)
2534 if (((d = ch - (MIN_CHAR_96
2535 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
2542 else if (XCHARSET_DIMENSION (charset) == 2)
2544 if (XCHARSET_CHARS (charset) == 94)
2546 if (((d = ch - (MIN_CHAR_94x94
2547 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
2550 return (((d / 94) + 33) << 8) | (d % 94 + 33);
2552 else if (XCHARSET_CHARS (charset) == 96)
2554 if (((d = ch - (MIN_CHAR_96x96
2555 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
2558 return (((d / 96) + 32) << 8) | (d % 96 + 32);
2564 if (EQ (charset, Vcharset_mojikyo_2022_1)
2565 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
2567 int m = ch - MIN_CHAR_MOJIKYO - 1;
2568 int byte1 = m / (94 * 60) + 33;
2569 int byte2 = (m % (94 * 60)) / 94;
2570 int byte3 = m % 94 + 33;
2576 return (byte1 << 16) | (byte2 << 8) | byte3;
2582 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
2584 if (c <= MAX_CHAR_BASIC_LATIN)
2586 *charset = Vcharset_ascii;
2591 *charset = Vcharset_control_1;
2596 *charset = Vcharset_latin_iso8859_1;
2600 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
2602 *charset = Vcharset_hebrew_iso8859_8;
2603 return c - MIN_CHAR_HEBREW + 0x20;
2606 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
2608 *charset = Vcharset_thai_tis620;
2609 return c - MIN_CHAR_THAI + 0x20;
2612 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
2613 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
2615 return list2 (Vcharset_katakana_jisx0201,
2616 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
2619 else if (c <= MAX_CHAR_BMP)
2621 *charset = Vcharset_ucs_bmp;
2624 else if (c < MIN_CHAR_DAIKANWA)
2626 *charset = Vcharset_ucs;
2629 else if (c <= MAX_CHAR_DAIKANWA)
2631 *charset = Vcharset_ideograph_daikanwa;
2632 return c - MIN_CHAR_DAIKANWA;
2634 else if (c <= MAX_CHAR_MOJIKYO_0)
2636 *charset = Vcharset_mojikyo;
2637 return c - MIN_CHAR_MOJIKYO_0;
2639 else if (c < MIN_CHAR_94)
2641 *charset = Vcharset_ucs;
2644 else if (c <= MAX_CHAR_94)
2646 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
2647 ((c - MIN_CHAR_94) / 94) + '0',
2648 CHARSET_LEFT_TO_RIGHT);
2649 if (!NILP (*charset))
2650 return ((c - MIN_CHAR_94) % 94) + 33;
2653 *charset = Vcharset_ucs;
2657 else if (c <= MAX_CHAR_96)
2659 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
2660 ((c - MIN_CHAR_96) / 96) + '0',
2661 CHARSET_LEFT_TO_RIGHT);
2662 if (!NILP (*charset))
2663 return ((c - MIN_CHAR_96) % 96) + 32;
2666 *charset = Vcharset_ucs;
2670 else if (c <= MAX_CHAR_94x94)
2673 = CHARSET_BY_ATTRIBUTES (94, 2,
2674 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
2675 CHARSET_LEFT_TO_RIGHT);
2676 if (!NILP (*charset))
2677 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
2678 | (((c - MIN_CHAR_94x94) % 94) + 33);
2681 *charset = Vcharset_ucs;
2685 else if (c <= MAX_CHAR_96x96)
2688 = CHARSET_BY_ATTRIBUTES (96, 2,
2689 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
2690 CHARSET_LEFT_TO_RIGHT);
2691 if (!NILP (*charset))
2692 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
2693 | (((c - MIN_CHAR_96x96) % 96) + 32);
2696 *charset = Vcharset_ucs;
2700 else if (c < MIN_CHAR_MOJIKYO)
2702 *charset = Vcharset_ucs;
2705 else if (c <= MAX_CHAR_MOJIKYO)
2707 *charset = Vcharset_mojikyo;
2708 return c - MIN_CHAR_MOJIKYO;
2712 *charset = Vcharset_ucs;
2717 Lisp_Object Vdefault_coded_charset_priority_list;
2721 /************************************************************************/
2722 /* Basic charset Lisp functions */
2723 /************************************************************************/
2725 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
2726 Return non-nil if OBJECT is a charset.
2730 return CHARSETP (object) ? Qt : Qnil;
2733 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
2734 Retrieve the charset of the given name.
2735 If CHARSET-OR-NAME is a charset object, it is simply returned.
2736 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
2737 nil is returned. Otherwise the associated charset object is returned.
2741 if (CHARSETP (charset_or_name))
2742 return charset_or_name;
2744 CHECK_SYMBOL (charset_or_name);
2745 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
2748 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
2749 Retrieve the charset of the given name.
2750 Same as `find-charset' except an error is signalled if there is no such
2751 charset instead of returning nil.
2755 Lisp_Object charset = Ffind_charset (name);
2758 signal_simple_error ("No such charset", name);
2762 /* We store the charsets in hash tables with the names as the key and the
2763 actual charset object as the value. Occasionally we need to use them
2764 in a list format. These routines provide us with that. */
2765 struct charset_list_closure
2767 Lisp_Object *charset_list;
2771 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
2772 void *charset_list_closure)
2774 /* This function can GC */
2775 struct charset_list_closure *chcl =
2776 (struct charset_list_closure*) charset_list_closure;
2777 Lisp_Object *charset_list = chcl->charset_list;
2779 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
2783 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
2784 Return a list of the names of all defined charsets.
2788 Lisp_Object charset_list = Qnil;
2789 struct gcpro gcpro1;
2790 struct charset_list_closure charset_list_closure;
2792 GCPRO1 (charset_list);
2793 charset_list_closure.charset_list = &charset_list;
2794 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
2795 &charset_list_closure);
2798 return charset_list;
2801 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
2802 Return the name of charset CHARSET.
2806 return XCHARSET_NAME (Fget_charset (charset));
2809 /* #### SJT Should generic properties be allowed? */
2810 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
2811 Define a new character set.
2812 This function is for use with Mule support.
2813 NAME is a symbol, the name by which the character set is normally referred.
2814 DOC-STRING is a string describing the character set.
2815 PROPS is a property list, describing the specific nature of the
2816 character set. Recognized properties are:
2818 'short-name Short version of the charset name (ex: Latin-1)
2819 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
2820 'registry A regular expression matching the font registry field for
2822 'dimension Number of octets used to index a character in this charset.
2823 Either 1 or 2. Defaults to 1.
2824 'columns Number of columns used to display a character in this charset.
2825 Only used in TTY mode. (Under X, the actual width of a
2826 character can be derived from the font used to display the
2827 characters.) If unspecified, defaults to the dimension
2828 (this is almost always the correct value).
2829 'chars Number of characters in each dimension (94 or 96).
2830 Defaults to 94. Note that if the dimension is 2, the
2831 character set thus described is 94x94 or 96x96.
2832 'final Final byte of ISO 2022 escape sequence. Must be
2833 supplied. Each combination of (DIMENSION, CHARS) defines a
2834 separate namespace for final bytes. Note that ISO
2835 2022 restricts the final byte to the range
2836 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2837 dimension == 2. Note also that final bytes in the range
2838 0x30 - 0x3F are reserved for user-defined (not official)
2840 'graphic 0 (use left half of font on output) or 1 (use right half
2841 of font on output). Defaults to 0. For example, for
2842 a font whose registry is ISO8859-1, the left half
2843 (octets 0x20 - 0x7F) is the `ascii' character set, while
2844 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2845 character set. With 'graphic set to 0, the octets
2846 will have their high bit cleared; with it set to 1,
2847 the octets will have their high bit set.
2848 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2850 'ccl-program A compiled CCL program used to convert a character in
2851 this charset into an index into the font. This is in
2852 addition to the 'graphic property. The CCL program
2853 is passed the octets of the character, with the high
2854 bit cleared and set depending upon whether the value
2855 of the 'graphic property is 0 or 1.
2857 (name, doc_string, props))
2859 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2860 int direction = CHARSET_LEFT_TO_RIGHT;
2861 Lisp_Object registry = Qnil;
2862 Lisp_Object charset;
2863 Lisp_Object ccl_program = Qnil;
2864 Lisp_Object short_name = Qnil, long_name = Qnil;
2865 int byte_offset = -1;
2867 CHECK_SYMBOL (name);
2868 if (!NILP (doc_string))
2869 CHECK_STRING (doc_string);
2871 charset = Ffind_charset (name);
2872 if (!NILP (charset))
2873 signal_simple_error ("Cannot redefine existing charset", name);
2876 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
2878 if (EQ (keyword, Qshort_name))
2880 CHECK_STRING (value);
2884 if (EQ (keyword, Qlong_name))
2886 CHECK_STRING (value);
2890 else if (EQ (keyword, Qdimension))
2893 dimension = XINT (value);
2894 if (dimension < 1 || dimension > 2)
2895 signal_simple_error ("Invalid value for 'dimension", value);
2898 else if (EQ (keyword, Qchars))
2901 chars = XINT (value);
2902 if (chars != 94 && chars != 96)
2903 signal_simple_error ("Invalid value for 'chars", value);
2906 else if (EQ (keyword, Qcolumns))
2909 columns = XINT (value);
2910 if (columns != 1 && columns != 2)
2911 signal_simple_error ("Invalid value for 'columns", value);
2914 else if (EQ (keyword, Qgraphic))
2917 graphic = XINT (value);
2919 if (graphic < 0 || graphic > 2)
2921 if (graphic < 0 || graphic > 1)
2923 signal_simple_error ("Invalid value for 'graphic", value);
2926 else if (EQ (keyword, Qregistry))
2928 CHECK_STRING (value);
2932 else if (EQ (keyword, Qdirection))
2934 if (EQ (value, Ql2r))
2935 direction = CHARSET_LEFT_TO_RIGHT;
2936 else if (EQ (value, Qr2l))
2937 direction = CHARSET_RIGHT_TO_LEFT;
2939 signal_simple_error ("Invalid value for 'direction", value);
2942 else if (EQ (keyword, Qfinal))
2944 CHECK_CHAR_COERCE_INT (value);
2945 final = XCHAR (value);
2946 if (final < '0' || final > '~')
2947 signal_simple_error ("Invalid value for 'final", value);
2950 else if (EQ (keyword, Qccl_program))
2952 struct ccl_program test_ccl;
2954 if (setup_ccl_program (&test_ccl, value) < 0)
2955 signal_simple_error ("Invalid value for 'ccl-program", value);
2956 ccl_program = value;
2960 signal_simple_error ("Unrecognized property", keyword);
2965 error ("'final must be specified");
2966 if (dimension == 2 && final > 0x5F)
2968 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2971 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2972 CHARSET_LEFT_TO_RIGHT)) ||
2973 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2974 CHARSET_RIGHT_TO_LEFT)))
2976 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2978 id = get_unallocated_leading_byte (dimension);
2980 if (NILP (doc_string))
2981 doc_string = build_string ("");
2983 if (NILP (registry))
2984 registry = build_string ("");
2986 if (NILP (short_name))
2987 XSETSTRING (short_name, XSYMBOL (name)->name);
2989 if (NILP (long_name))
2990 long_name = doc_string;
2993 columns = dimension;
2995 if (byte_offset < 0)
2999 else if (chars == 96)
3005 charset = make_charset (id, name, chars, dimension, columns, graphic,
3006 final, direction, short_name, long_name,
3007 doc_string, registry,
3008 Qnil, 0, 0, 0, byte_offset);
3009 if (!NILP (ccl_program))
3010 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
3014 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
3016 Make a charset equivalent to CHARSET but which goes in the opposite direction.
3017 NEW-NAME is the name of the new charset. Return the new charset.
3019 (charset, new_name))
3021 Lisp_Object new_charset = Qnil;
3022 int id, chars, dimension, columns, graphic, final;
3024 Lisp_Object registry, doc_string, short_name, long_name;
3027 charset = Fget_charset (charset);
3028 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
3029 signal_simple_error ("Charset already has reverse-direction charset",
3032 CHECK_SYMBOL (new_name);
3033 if (!NILP (Ffind_charset (new_name)))
3034 signal_simple_error ("Cannot redefine existing charset", new_name);
3036 cs = XCHARSET (charset);
3038 chars = CHARSET_CHARS (cs);
3039 dimension = CHARSET_DIMENSION (cs);
3040 columns = CHARSET_COLUMNS (cs);
3041 id = get_unallocated_leading_byte (dimension);
3043 graphic = CHARSET_GRAPHIC (cs);
3044 final = CHARSET_FINAL (cs);
3045 direction = CHARSET_RIGHT_TO_LEFT;
3046 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
3047 direction = CHARSET_LEFT_TO_RIGHT;
3048 doc_string = CHARSET_DOC_STRING (cs);
3049 short_name = CHARSET_SHORT_NAME (cs);
3050 long_name = CHARSET_LONG_NAME (cs);
3051 registry = CHARSET_REGISTRY (cs);
3053 new_charset = make_charset (id, new_name, chars, dimension, columns,
3054 graphic, final, direction, short_name, long_name,
3055 doc_string, registry,
3057 CHARSET_DECODING_TABLE(cs),
3058 CHARSET_UCS_MIN(cs),
3059 CHARSET_UCS_MAX(cs),
3060 CHARSET_CODE_OFFSET(cs),
3061 CHARSET_BYTE_OFFSET(cs)
3067 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
3068 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
3073 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
3074 Define symbol ALIAS as an alias for CHARSET.
3078 CHECK_SYMBOL (alias);
3079 charset = Fget_charset (charset);
3080 return Fputhash (alias, charset, Vcharset_hash_table);
3083 /* #### Reverse direction charsets not yet implemented. */
3085 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
3087 Return the reverse-direction charset parallel to CHARSET, if any.
3088 This is the charset with the same properties (in particular, the same
3089 dimension, number of characters per dimension, and final byte) as
3090 CHARSET but whose characters are displayed in the opposite direction.
3094 charset = Fget_charset (charset);
3095 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
3099 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
3100 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
3101 If DIRECTION is omitted, both directions will be checked (left-to-right
3102 will be returned if character sets exist for both directions).
3104 (dimension, chars, final, direction))
3106 int dm, ch, fi, di = -1;
3107 Lisp_Object obj = Qnil;
3109 CHECK_INT (dimension);
3110 dm = XINT (dimension);
3111 if (dm < 1 || dm > 2)
3112 signal_simple_error ("Invalid value for DIMENSION", dimension);
3116 if (ch != 94 && ch != 96)
3117 signal_simple_error ("Invalid value for CHARS", chars);
3119 CHECK_CHAR_COERCE_INT (final);
3121 if (fi < '0' || fi > '~')
3122 signal_simple_error ("Invalid value for FINAL", final);
3124 if (EQ (direction, Ql2r))
3125 di = CHARSET_LEFT_TO_RIGHT;
3126 else if (EQ (direction, Qr2l))
3127 di = CHARSET_RIGHT_TO_LEFT;
3128 else if (!NILP (direction))
3129 signal_simple_error ("Invalid value for DIRECTION", direction);
3131 if (dm == 2 && fi > 0x5F)
3133 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
3137 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
3139 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
3142 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
3145 return XCHARSET_NAME (obj);
3149 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
3150 Return short name of CHARSET.
3154 return XCHARSET_SHORT_NAME (Fget_charset (charset));
3157 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
3158 Return long name of CHARSET.
3162 return XCHARSET_LONG_NAME (Fget_charset (charset));
3165 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
3166 Return description of CHARSET.
3170 return XCHARSET_DOC_STRING (Fget_charset (charset));
3173 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
3174 Return dimension of CHARSET.
3178 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
3181 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
3182 Return property PROP of CHARSET, a charset object or symbol naming a charset.
3183 Recognized properties are those listed in `make-charset', as well as
3184 'name and 'doc-string.
3190 charset = Fget_charset (charset);
3191 cs = XCHARSET (charset);
3193 CHECK_SYMBOL (prop);
3194 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
3195 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
3196 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
3197 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
3198 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
3199 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
3200 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
3201 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
3202 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
3203 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
3204 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
3205 if (EQ (prop, Qdirection))
3206 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
3207 if (EQ (prop, Qreverse_direction_charset))
3209 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
3210 /* #### Is this translation OK? If so, error checking sufficient? */
3211 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
3213 signal_simple_error ("Unrecognized charset property name", prop);
3214 return Qnil; /* not reached */
3217 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
3218 Return charset identification number of CHARSET.
3222 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
3225 /* #### We need to figure out which properties we really want to
3228 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
3229 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
3231 (charset, ccl_program))
3233 struct ccl_program test_ccl;
3235 charset = Fget_charset (charset);
3236 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
3237 signal_simple_error ("Invalid ccl-program", ccl_program);
3238 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
3243 invalidate_charset_font_caches (Lisp_Object charset)
3245 /* Invalidate font cache entries for charset on all devices. */
3246 Lisp_Object devcons, concons, hash_table;
3247 DEVICE_LOOP_NO_BREAK (devcons, concons)
3249 struct device *d = XDEVICE (XCAR (devcons));
3250 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
3251 if (!UNBOUNDP (hash_table))
3252 Fclrhash (hash_table);
3256 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
3257 Set the 'registry property of CHARSET to REGISTRY.
3259 (charset, registry))
3261 charset = Fget_charset (charset);
3262 CHECK_STRING (registry);
3263 XCHARSET_REGISTRY (charset) = registry;
3264 invalidate_charset_font_caches (charset);
3265 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
3270 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
3271 Return mapping-table of CHARSET.
3275 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
3278 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
3279 Set mapping-table of CHARSET to TABLE.
3283 struct Lisp_Charset *cs;
3287 charset = Fget_charset (charset);
3288 cs = XCHARSET (charset);
3292 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
3293 make_vector_newer (CHARSET_DECODING_TABLE(cs));
3294 CHARSET_DECODING_TABLE(cs) = Qnil;
3297 else if (VECTORP (table))
3299 int ccs_len = CHARSET_BYTE_SIZE (cs);
3300 int ret = decoding_table_check_elements (table,
3301 CHARSET_DIMENSION (cs),
3306 signal_simple_error ("Too big table", table);
3308 signal_simple_error ("Invalid element is found", table);
3310 signal_simple_error ("Something wrong", table);
3312 CHARSET_DECODING_TABLE(cs) = Qnil;
3315 signal_error (Qwrong_type_argument,
3316 list2 (build_translated_string ("vector-or-nil-p"),
3319 byte_offset = CHARSET_BYTE_OFFSET (cs);
3320 switch (CHARSET_DIMENSION (cs))
3323 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3325 Lisp_Object c = XVECTOR_DATA(table)[i];
3328 put_char_ccs_code_point (c, charset,
3329 make_int (i + byte_offset));
3333 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3335 Lisp_Object v = XVECTOR_DATA(table)[i];
3341 for (j = 0; j < XVECTOR_LENGTH (v); j++)
3343 Lisp_Object c = XVECTOR_DATA(v)[j];
3346 put_char_ccs_code_point
3348 make_int ( ( (i + byte_offset) << 8 )
3354 put_char_ccs_code_point (v, charset,
3355 make_int (i + byte_offset));
3364 /************************************************************************/
3365 /* Lisp primitives for working with characters */
3366 /************************************************************************/
3369 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
3370 Make a character from CHARSET and code-point CODE.
3376 charset = Fget_charset (charset);
3379 if (XCHARSET_GRAPHIC (charset) == 1)
3381 c = DECODE_CHAR (charset, c);
3382 return c >= 0 ? make_char (c) : Qnil;
3385 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
3386 Make a builtin character from CHARSET and code-point CODE.
3392 charset = Fget_charset (charset);
3394 if (EQ (charset, Vcharset_latin_viscii))
3396 Lisp_Object chr = Fdecode_char (charset, code);
3402 (ret = Fget_char_attribute (chr,
3403 Vcharset_latin_viscii_lower,
3406 charset = Vcharset_latin_viscii_lower;
3410 (ret = Fget_char_attribute (chr,
3411 Vcharset_latin_viscii_upper,
3414 charset = Vcharset_latin_viscii_upper;
3421 if (XCHARSET_GRAPHIC (charset) == 1)
3424 c = decode_builtin_char (charset, c);
3425 return c >= 0 ? make_char (c) : Fdecode_char (charset, code);
3429 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
3430 Make a character from CHARSET and octets ARG1 and ARG2.
3431 ARG2 is required only for characters from two-dimensional charsets.
3432 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
3433 character s with caron.
3435 (charset, arg1, arg2))
3439 int lowlim, highlim;
3441 charset = Fget_charset (charset);
3442 cs = XCHARSET (charset);
3444 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
3445 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
3447 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
3449 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
3450 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
3453 /* It is useful (and safe, according to Olivier Galibert) to strip
3454 the 8th bit off ARG1 and ARG2 because it allows programmers to
3455 write (make-char 'latin-iso8859-2 CODE) where code is the actual
3456 Latin 2 code of the character. */
3464 if (a1 < lowlim || a1 > highlim)
3465 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
3467 if (CHARSET_DIMENSION (cs) == 1)
3471 ("Charset is of dimension one; second octet must be nil", arg2);
3472 return make_char (MAKE_CHAR (charset, a1, 0));
3481 a2 = XINT (arg2) & 0x7f;
3483 if (a2 < lowlim || a2 > highlim)
3484 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
3486 return make_char (MAKE_CHAR (charset, a1, a2));
3489 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
3490 Return the character set of CHARACTER.
3494 CHECK_CHAR_COERCE_INT (character);
3496 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
3499 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
3500 Return the octet numbered N (should be 0 or 1) of CHARACTER.
3501 N defaults to 0 if omitted.
3505 Lisp_Object charset;
3508 CHECK_CHAR_COERCE_INT (character);
3510 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
3512 if (NILP (n) || EQ (n, Qzero))
3513 return make_int (octet0);
3514 else if (EQ (n, make_int (1)))
3515 return make_int (octet1);
3517 signal_simple_error ("Octet number must be 0 or 1", n);
3520 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
3521 Return list of charset and one or two position-codes of CHARACTER.
3525 /* This function can GC */
3526 struct gcpro gcpro1, gcpro2;
3527 Lisp_Object charset = Qnil;
3528 Lisp_Object rc = Qnil;
3536 GCPRO2 (charset, rc);
3537 CHECK_CHAR_COERCE_INT (character);
3540 code_point = ENCODE_CHAR (XCHAR (character), charset);
3541 dimension = XCHARSET_DIMENSION (charset);
3542 while (dimension > 0)
3544 rc = Fcons (make_int (code_point & 255), rc);
3548 rc = Fcons (XCHARSET_NAME (charset), rc);
3550 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
3552 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
3554 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
3558 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
3567 #ifdef ENABLE_COMPOSITE_CHARS
3568 /************************************************************************/
3569 /* composite character functions */
3570 /************************************************************************/
3573 lookup_composite_char (Bufbyte *str, int len)
3575 Lisp_Object lispstr = make_string (str, len);
3576 Lisp_Object ch = Fgethash (lispstr,
3577 Vcomposite_char_string2char_hash_table,
3583 if (composite_char_row_next >= 128)
3584 signal_simple_error ("No more composite chars available", lispstr);
3585 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
3586 composite_char_col_next);
3587 Fputhash (make_char (emch), lispstr,
3588 Vcomposite_char_char2string_hash_table);
3589 Fputhash (lispstr, make_char (emch),
3590 Vcomposite_char_string2char_hash_table);
3591 composite_char_col_next++;
3592 if (composite_char_col_next >= 128)
3594 composite_char_col_next = 32;
3595 composite_char_row_next++;
3604 composite_char_string (Emchar ch)
3606 Lisp_Object str = Fgethash (make_char (ch),
3607 Vcomposite_char_char2string_hash_table,
3609 assert (!UNBOUNDP (str));
3613 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
3614 Convert a string into a single composite character.
3615 The character is the result of overstriking all the characters in
3620 CHECK_STRING (string);
3621 return make_char (lookup_composite_char (XSTRING_DATA (string),
3622 XSTRING_LENGTH (string)));
3625 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3626 Return a string of the characters comprising a composite character.
3634 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
3635 signal_simple_error ("Must be composite char", ch);
3636 return composite_char_string (emch);
3638 #endif /* ENABLE_COMPOSITE_CHARS */
3641 /************************************************************************/
3642 /* initialization */
3643 /************************************************************************/
3646 syms_of_mule_charset (void)
3649 INIT_LRECORD_IMPLEMENTATION (uint8_byte_table);
3650 INIT_LRECORD_IMPLEMENTATION (uint16_byte_table);
3651 INIT_LRECORD_IMPLEMENTATION (byte_table);
3652 INIT_LRECORD_IMPLEMENTATION (char_id_table);
3654 INIT_LRECORD_IMPLEMENTATION (charset);
3656 DEFSUBR (Fcharsetp);
3657 DEFSUBR (Ffind_charset);
3658 DEFSUBR (Fget_charset);
3659 DEFSUBR (Fcharset_list);
3660 DEFSUBR (Fcharset_name);
3661 DEFSUBR (Fmake_charset);
3662 DEFSUBR (Fmake_reverse_direction_charset);
3663 /* DEFSUBR (Freverse_direction_charset); */
3664 DEFSUBR (Fdefine_charset_alias);
3665 DEFSUBR (Fcharset_from_attributes);
3666 DEFSUBR (Fcharset_short_name);
3667 DEFSUBR (Fcharset_long_name);
3668 DEFSUBR (Fcharset_description);
3669 DEFSUBR (Fcharset_dimension);
3670 DEFSUBR (Fcharset_property);
3671 DEFSUBR (Fcharset_id);
3672 DEFSUBR (Fset_charset_ccl_program);
3673 DEFSUBR (Fset_charset_registry);
3675 DEFSUBR (Fchar_attribute_list);
3676 DEFSUBR (Ffind_char_attribute_table);
3677 DEFSUBR (Fchar_attribute_alist);
3678 DEFSUBR (Fget_char_attribute);
3679 DEFSUBR (Fput_char_attribute);
3680 DEFSUBR (Fremove_char_attribute);
3681 DEFSUBR (Fmap_char_attribute);
3682 DEFSUBR (Fdefine_char);
3683 DEFSUBR (Ffind_char);
3684 DEFSUBR (Fchar_variants);
3685 DEFSUBR (Fget_composite_char);
3686 DEFSUBR (Fcharset_mapping_table);
3687 DEFSUBR (Fset_charset_mapping_table);
3691 DEFSUBR (Fdecode_char);
3692 DEFSUBR (Fdecode_builtin_char);
3694 DEFSUBR (Fmake_char);
3695 DEFSUBR (Fchar_charset);
3696 DEFSUBR (Fchar_octet);
3697 DEFSUBR (Fsplit_char);
3699 #ifdef ENABLE_COMPOSITE_CHARS
3700 DEFSUBR (Fmake_composite_char);
3701 DEFSUBR (Fcomposite_char_string);
3704 defsymbol (&Qcharsetp, "charsetp");
3705 defsymbol (&Qregistry, "registry");
3706 defsymbol (&Qfinal, "final");
3707 defsymbol (&Qgraphic, "graphic");
3708 defsymbol (&Qdirection, "direction");
3709 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3710 defsymbol (&Qshort_name, "short-name");
3711 defsymbol (&Qlong_name, "long-name");
3713 defsymbol (&Ql2r, "l2r");
3714 defsymbol (&Qr2l, "r2l");
3716 /* Charsets, compatible with FSF 20.3
3717 Naming convention is Script-Charset[-Edition] */
3718 defsymbol (&Qascii, "ascii");
3719 defsymbol (&Qcontrol_1, "control-1");
3720 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3721 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3722 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3723 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3724 defsymbol (&Qthai_tis620, "thai-tis620");
3725 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3726 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3727 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3728 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3729 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3730 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3731 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3732 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
3733 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
3734 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
3735 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
3736 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
3737 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
3738 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
3739 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
3740 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
3742 defsymbol (&Qto_ucs, "=>ucs");
3743 defsymbol (&Q_ucs, "->ucs");
3744 defsymbol (&Q_decomposition, "->decomposition");
3745 defsymbol (&Qcompat, "compat");
3746 defsymbol (&Qisolated, "isolated");
3747 defsymbol (&Qinitial, "initial");
3748 defsymbol (&Qmedial, "medial");
3749 defsymbol (&Qfinal, "final");
3750 defsymbol (&Qvertical, "vertical");
3751 defsymbol (&QnoBreak, "noBreak");
3752 defsymbol (&Qfraction, "fraction");
3753 defsymbol (&Qsuper, "super");
3754 defsymbol (&Qsub, "sub");
3755 defsymbol (&Qcircle, "circle");
3756 defsymbol (&Qsquare, "square");
3757 defsymbol (&Qwide, "wide");
3758 defsymbol (&Qnarrow, "narrow");
3759 defsymbol (&Qsmall, "small");
3760 defsymbol (&Qfont, "font");
3761 defsymbol (&Qucs, "ucs");
3762 defsymbol (&Qucs_bmp, "ucs-bmp");
3763 defsymbol (&Qucs_cns, "ucs-cns");
3764 defsymbol (&Qucs_big5, "ucs-big5");
3765 defsymbol (&Qlatin_viscii, "latin-viscii");
3766 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3767 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3768 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3769 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3770 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3771 defsymbol (&Qideograph_gt, "ideograph-gt");
3772 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
3773 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
3774 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
3775 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
3776 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
3777 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
3778 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
3779 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
3780 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
3781 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
3782 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
3783 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
3784 defsymbol (&Qchinese_big5, "chinese-big5");
3785 defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp");
3786 defsymbol (&Qmojikyo, "mojikyo");
3787 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
3788 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
3789 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
3790 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
3791 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
3792 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
3793 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
3794 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
3795 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
3796 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
3797 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
3798 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
3799 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
3800 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
3801 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
3802 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
3803 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
3804 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
3805 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
3806 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
3807 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
3808 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
3809 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3811 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3812 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3814 defsymbol (&Qcomposite, "composite");
3818 vars_of_mule_charset (void)
3825 chlook = xnew (struct charset_lookup);
3826 dumpstruct (&chlook, &charset_lookup_description);
3828 /* Table of charsets indexed by leading byte. */
3829 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3830 chlook->charset_by_leading_byte[i] = Qnil;
3833 /* Table of charsets indexed by type/final-byte. */
3834 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3835 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3836 chlook->charset_by_attributes[i][j] = Qnil;
3838 /* Table of charsets indexed by type/final-byte/direction. */
3839 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3840 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3841 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3842 chlook->charset_by_attributes[i][j][k] = Qnil;
3846 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3848 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3849 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3853 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3854 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3855 Leading-code of private TYPE9N charset of column-width 1.
3857 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3861 Vutf_2000_version = build_string("0.17 (Hōryūji)");
3862 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
3863 Version number of UTF-2000.
3866 staticpro (&Vcharacter_composition_table);
3867 Vcharacter_composition_table = make_char_id_table (Qnil);
3869 staticpro (&Vcharacter_variant_table);
3870 Vcharacter_variant_table = make_char_id_table (Qnil);
3872 Vdefault_coded_charset_priority_list = Qnil;
3873 DEFVAR_LISP ("default-coded-charset-priority-list",
3874 &Vdefault_coded_charset_priority_list /*
3875 Default order of preferred coded-character-sets.
3881 complex_vars_of_mule_charset (void)
3883 staticpro (&Vcharset_hash_table);
3884 Vcharset_hash_table =
3885 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3887 /* Predefined character sets. We store them into variables for
3891 staticpro (&Vchar_attribute_hash_table);
3892 Vchar_attribute_hash_table
3893 = make_lisp_hash_table (16, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3895 staticpro (&Vcharset_ucs);
3897 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
3898 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3899 build_string ("UCS"),
3900 build_string ("UCS"),
3901 build_string ("ISO/IEC 10646"),
3903 Qnil, 0, 0xFFFFFFF, 0, 0);
3904 staticpro (&Vcharset_ucs_bmp);
3906 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3907 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3908 build_string ("BMP"),
3909 build_string ("BMP"),
3910 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3911 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
3912 Qnil, 0, 0xFFFF, 0, 0);
3913 staticpro (&Vcharset_ucs_cns);
3915 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
3916 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3917 build_string ("UCS for CNS"),
3918 build_string ("UCS for CNS 11643"),
3919 build_string ("ISO/IEC 10646 for CNS 11643"),
3922 staticpro (&Vcharset_ucs_big5);
3924 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
3925 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3926 build_string ("UCS for Big5"),
3927 build_string ("UCS for Big5"),
3928 build_string ("ISO/IEC 10646 for Big5"),
3932 # define MIN_CHAR_THAI 0
3933 # define MAX_CHAR_THAI 0
3934 /* # define MIN_CHAR_HEBREW 0 */
3935 /* # define MAX_CHAR_HEBREW 0 */
3936 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3937 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3939 staticpro (&Vcharset_ascii);
3941 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3942 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3943 build_string ("ASCII"),
3944 build_string ("ASCII)"),
3945 build_string ("ASCII (ISO646 IRV)"),
3946 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3947 Qnil, 0, 0x7F, 0, 0);
3948 staticpro (&Vcharset_control_1);
3949 Vcharset_control_1 =
3950 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3951 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3952 build_string ("C1"),
3953 build_string ("Control characters"),
3954 build_string ("Control characters 128-191"),
3956 Qnil, 0x80, 0x9F, 0, 0);
3957 staticpro (&Vcharset_latin_iso8859_1);
3958 Vcharset_latin_iso8859_1 =
3959 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3960 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3961 build_string ("Latin-1"),
3962 build_string ("ISO8859-1 (Latin-1)"),
3963 build_string ("ISO8859-1 (Latin-1)"),
3964 build_string ("iso8859-1"),
3965 Qnil, 0xA0, 0xFF, 0, 32);
3966 staticpro (&Vcharset_latin_iso8859_2);
3967 Vcharset_latin_iso8859_2 =
3968 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3969 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3970 build_string ("Latin-2"),
3971 build_string ("ISO8859-2 (Latin-2)"),
3972 build_string ("ISO8859-2 (Latin-2)"),
3973 build_string ("iso8859-2"),
3975 staticpro (&Vcharset_latin_iso8859_3);
3976 Vcharset_latin_iso8859_3 =
3977 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3978 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3979 build_string ("Latin-3"),
3980 build_string ("ISO8859-3 (Latin-3)"),
3981 build_string ("ISO8859-3 (Latin-3)"),
3982 build_string ("iso8859-3"),
3984 staticpro (&Vcharset_latin_iso8859_4);
3985 Vcharset_latin_iso8859_4 =
3986 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3987 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3988 build_string ("Latin-4"),
3989 build_string ("ISO8859-4 (Latin-4)"),
3990 build_string ("ISO8859-4 (Latin-4)"),
3991 build_string ("iso8859-4"),
3993 staticpro (&Vcharset_thai_tis620);
3994 Vcharset_thai_tis620 =
3995 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3996 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3997 build_string ("TIS620"),
3998 build_string ("TIS620 (Thai)"),
3999 build_string ("TIS620.2529 (Thai)"),
4000 build_string ("tis620"),
4001 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
4002 staticpro (&Vcharset_greek_iso8859_7);
4003 Vcharset_greek_iso8859_7 =
4004 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
4005 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
4006 build_string ("ISO8859-7"),
4007 build_string ("ISO8859-7 (Greek)"),
4008 build_string ("ISO8859-7 (Greek)"),
4009 build_string ("iso8859-7"),
4011 staticpro (&Vcharset_arabic_iso8859_6);
4012 Vcharset_arabic_iso8859_6 =
4013 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
4014 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
4015 build_string ("ISO8859-6"),
4016 build_string ("ISO8859-6 (Arabic)"),
4017 build_string ("ISO8859-6 (Arabic)"),
4018 build_string ("iso8859-6"),
4020 staticpro (&Vcharset_hebrew_iso8859_8);
4021 Vcharset_hebrew_iso8859_8 =
4022 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
4023 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
4024 build_string ("ISO8859-8"),
4025 build_string ("ISO8859-8 (Hebrew)"),
4026 build_string ("ISO8859-8 (Hebrew)"),
4027 build_string ("iso8859-8"),
4029 0 /* MIN_CHAR_HEBREW */,
4030 0 /* MAX_CHAR_HEBREW */, 0, 32);
4031 staticpro (&Vcharset_katakana_jisx0201);
4032 Vcharset_katakana_jisx0201 =
4033 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
4034 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
4035 build_string ("JISX0201 Kana"),
4036 build_string ("JISX0201.1976 (Japanese Kana)"),
4037 build_string ("JISX0201.1976 Japanese Kana"),
4038 build_string ("jisx0201\\.1976"),
4040 staticpro (&Vcharset_latin_jisx0201);
4041 Vcharset_latin_jisx0201 =
4042 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
4043 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
4044 build_string ("JISX0201 Roman"),
4045 build_string ("JISX0201.1976 (Japanese Roman)"),
4046 build_string ("JISX0201.1976 Japanese Roman"),
4047 build_string ("jisx0201\\.1976"),
4049 staticpro (&Vcharset_cyrillic_iso8859_5);
4050 Vcharset_cyrillic_iso8859_5 =
4051 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
4052 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
4053 build_string ("ISO8859-5"),
4054 build_string ("ISO8859-5 (Cyrillic)"),
4055 build_string ("ISO8859-5 (Cyrillic)"),
4056 build_string ("iso8859-5"),
4058 staticpro (&Vcharset_latin_iso8859_9);
4059 Vcharset_latin_iso8859_9 =
4060 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
4061 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
4062 build_string ("Latin-5"),
4063 build_string ("ISO8859-9 (Latin-5)"),
4064 build_string ("ISO8859-9 (Latin-5)"),
4065 build_string ("iso8859-9"),
4067 staticpro (&Vcharset_japanese_jisx0208_1978);
4068 Vcharset_japanese_jisx0208_1978 =
4069 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
4070 Qjapanese_jisx0208_1978, 94, 2,
4071 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
4072 build_string ("JIS X0208:1978"),
4073 build_string ("JIS X0208:1978 (Japanese)"),
4075 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
4076 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
4078 staticpro (&Vcharset_chinese_gb2312);
4079 Vcharset_chinese_gb2312 =
4080 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
4081 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
4082 build_string ("GB2312"),
4083 build_string ("GB2312)"),
4084 build_string ("GB2312 Chinese simplified"),
4085 build_string ("gb2312"),
4087 staticpro (&Vcharset_chinese_gb12345);
4088 Vcharset_chinese_gb12345 =
4089 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
4090 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4091 build_string ("G1"),
4092 build_string ("GB 12345)"),
4093 build_string ("GB 12345-1990"),
4094 build_string ("GB12345\\(\\.1990\\)?-0"),
4096 staticpro (&Vcharset_japanese_jisx0208);
4097 Vcharset_japanese_jisx0208 =
4098 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
4099 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
4100 build_string ("JISX0208"),
4101 build_string ("JIS X0208:1983 (Japanese)"),
4102 build_string ("JIS X0208:1983 Japanese Kanji"),
4103 build_string ("jisx0208\\.1983"),
4106 staticpro (&Vcharset_japanese_jisx0208_1990);
4107 Vcharset_japanese_jisx0208_1990 =
4108 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
4109 Qjapanese_jisx0208_1990, 94, 2,
4110 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4111 build_string ("JISX0208-1990"),
4112 build_string ("JIS X0208:1990 (Japanese)"),
4113 build_string ("JIS X0208:1990 Japanese Kanji"),
4114 build_string ("jisx0208\\.1990"),
4116 MIN_CHAR_JIS_X0208_1990,
4117 MAX_CHAR_JIS_X0208_1990, 0, 33);
4119 staticpro (&Vcharset_korean_ksc5601);
4120 Vcharset_korean_ksc5601 =
4121 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
4122 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
4123 build_string ("KSC5601"),
4124 build_string ("KSC5601 (Korean"),
4125 build_string ("KSC5601 Korean Hangul and Hanja"),
4126 build_string ("ksc5601"),
4128 staticpro (&Vcharset_japanese_jisx0212);
4129 Vcharset_japanese_jisx0212 =
4130 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
4131 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
4132 build_string ("JISX0212"),
4133 build_string ("JISX0212 (Japanese)"),
4134 build_string ("JISX0212 Japanese Supplement"),
4135 build_string ("jisx0212"),
4138 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
4139 staticpro (&Vcharset_chinese_cns11643_1);
4140 Vcharset_chinese_cns11643_1 =
4141 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
4142 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
4143 build_string ("CNS11643-1"),
4144 build_string ("CNS11643-1 (Chinese traditional)"),
4146 ("CNS 11643 Plane 1 Chinese traditional"),
4147 build_string (CHINESE_CNS_PLANE_RE("1")),
4149 staticpro (&Vcharset_chinese_cns11643_2);
4150 Vcharset_chinese_cns11643_2 =
4151 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
4152 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
4153 build_string ("CNS11643-2"),
4154 build_string ("CNS11643-2 (Chinese traditional)"),
4156 ("CNS 11643 Plane 2 Chinese traditional"),
4157 build_string (CHINESE_CNS_PLANE_RE("2")),
4160 staticpro (&Vcharset_latin_tcvn5712);
4161 Vcharset_latin_tcvn5712 =
4162 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
4163 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
4164 build_string ("TCVN 5712"),
4165 build_string ("TCVN 5712 (VSCII-2)"),
4166 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
4167 build_string ("tcvn5712\\(\\.1993\\)?-1"),
4169 staticpro (&Vcharset_latin_viscii_lower);
4170 Vcharset_latin_viscii_lower =
4171 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
4172 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
4173 build_string ("VISCII lower"),
4174 build_string ("VISCII lower (Vietnamese)"),
4175 build_string ("VISCII lower (Vietnamese)"),
4176 build_string ("MULEVISCII-LOWER"),
4178 staticpro (&Vcharset_latin_viscii_upper);
4179 Vcharset_latin_viscii_upper =
4180 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
4181 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
4182 build_string ("VISCII upper"),
4183 build_string ("VISCII upper (Vietnamese)"),
4184 build_string ("VISCII upper (Vietnamese)"),
4185 build_string ("MULEVISCII-UPPER"),
4187 staticpro (&Vcharset_latin_viscii);
4188 Vcharset_latin_viscii =
4189 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
4190 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
4191 build_string ("VISCII"),
4192 build_string ("VISCII 1.1 (Vietnamese)"),
4193 build_string ("VISCII 1.1 (Vietnamese)"),
4194 build_string ("VISCII1\\.1"),
4196 staticpro (&Vcharset_chinese_big5);
4197 Vcharset_chinese_big5 =
4198 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
4199 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4200 build_string ("Big5"),
4201 build_string ("Big5"),
4202 build_string ("Big5 Chinese traditional"),
4203 build_string ("big5"),
4205 staticpro (&Vcharset_chinese_big5_cdp);
4206 Vcharset_chinese_big5_cdp =
4207 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
4208 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4209 build_string ("Big5-CDP"),
4210 build_string ("Big5 + CDP extension"),
4211 build_string ("Big5 with CDP extension"),
4212 build_string ("big5\\.cdp-0"),
4214 staticpro (&Vcharset_ideograph_gt);
4215 Vcharset_ideograph_gt =
4216 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
4217 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4218 build_string ("GT"),
4219 build_string ("GT"),
4220 build_string ("GT"),
4222 Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0);
4223 #define DEF_GT_PJ(n) \
4224 staticpro (&Vcharset_ideograph_gt_pj_##n); \
4225 Vcharset_ideograph_gt_pj_##n = \
4226 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
4227 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
4228 build_string ("GT-PJ-"#n), \
4229 build_string ("GT (pseudo JIS encoding) part "#n), \
4230 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
4232 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
4246 staticpro (&Vcharset_ideograph_daikanwa);
4247 Vcharset_ideograph_daikanwa =
4248 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
4249 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4250 build_string ("Daikanwa"),
4251 build_string ("Morohashi's Daikanwa"),
4252 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
4253 build_string ("Daikanwa"),
4254 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
4255 staticpro (&Vcharset_mojikyo);
4257 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
4258 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4259 build_string ("Mojikyo"),
4260 build_string ("Mojikyo"),
4261 build_string ("Konjaku-Mojikyo"),
4263 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
4264 staticpro (&Vcharset_mojikyo_2022_1);
4265 Vcharset_mojikyo_2022_1 =
4266 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
4267 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
4268 build_string ("Mojikyo-2022-1"),
4269 build_string ("Mojikyo ISO-2022 Part 1"),
4270 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
4274 #define DEF_MOJIKYO_PJ(n) \
4275 staticpro (&Vcharset_mojikyo_pj_##n); \
4276 Vcharset_mojikyo_pj_##n = \
4277 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
4278 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
4279 build_string ("Mojikyo-PJ-"#n), \
4280 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
4282 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
4284 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
4296 DEF_MOJIKYO_PJ (10);
4297 DEF_MOJIKYO_PJ (11);
4298 DEF_MOJIKYO_PJ (12);
4299 DEF_MOJIKYO_PJ (13);
4300 DEF_MOJIKYO_PJ (14);
4301 DEF_MOJIKYO_PJ (15);
4302 DEF_MOJIKYO_PJ (16);
4303 DEF_MOJIKYO_PJ (17);
4304 DEF_MOJIKYO_PJ (18);
4305 DEF_MOJIKYO_PJ (19);
4306 DEF_MOJIKYO_PJ (20);
4307 DEF_MOJIKYO_PJ (21);
4309 staticpro (&Vcharset_ethiopic_ucs);
4310 Vcharset_ethiopic_ucs =
4311 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
4312 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4313 build_string ("Ethiopic (UCS)"),
4314 build_string ("Ethiopic (UCS)"),
4315 build_string ("Ethiopic of UCS"),
4316 build_string ("Ethiopic-Unicode"),
4317 Qnil, 0x1200, 0x137F, 0x1200, 0);
4319 staticpro (&Vcharset_chinese_big5_1);
4320 Vcharset_chinese_big5_1 =
4321 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
4322 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
4323 build_string ("Big5"),
4324 build_string ("Big5 (Level-1)"),
4326 ("Big5 Level-1 Chinese traditional"),
4327 build_string ("big5"),
4329 staticpro (&Vcharset_chinese_big5_2);
4330 Vcharset_chinese_big5_2 =
4331 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
4332 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
4333 build_string ("Big5"),
4334 build_string ("Big5 (Level-2)"),
4336 ("Big5 Level-2 Chinese traditional"),
4337 build_string ("big5"),
4340 #ifdef ENABLE_COMPOSITE_CHARS
4341 /* #### For simplicity, we put composite chars into a 96x96 charset.
4342 This is going to lead to problems because you can run out of
4343 room, esp. as we don't yet recycle numbers. */
4344 staticpro (&Vcharset_composite);
4345 Vcharset_composite =
4346 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
4347 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4348 build_string ("Composite"),
4349 build_string ("Composite characters"),
4350 build_string ("Composite characters"),
4353 /* #### not dumped properly */
4354 composite_char_row_next = 32;
4355 composite_char_col_next = 32;
4357 Vcomposite_char_string2char_hash_table =
4358 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
4359 Vcomposite_char_char2string_hash_table =
4360 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
4361 staticpro (&Vcomposite_char_string2char_hash_table);
4362 staticpro (&Vcomposite_char_char2string_hash_table);
4363 #endif /* ENABLE_COMPOSITE_CHARS */