1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_cns;
70 Lisp_Object Vcharset_ucs_jis;
71 Lisp_Object Vcharset_ucs_big5;
72 Lisp_Object Vcharset_latin_viscii;
73 Lisp_Object Vcharset_latin_tcvn5712;
74 Lisp_Object Vcharset_latin_viscii_lower;
75 Lisp_Object Vcharset_latin_viscii_upper;
76 Lisp_Object Vcharset_chinese_big5;
77 Lisp_Object Vcharset_chinese_big5_cdp;
78 Lisp_Object Vcharset_ideograph_gt;
79 Lisp_Object Vcharset_ideograph_gt_pj_1;
80 Lisp_Object Vcharset_ideograph_gt_pj_2;
81 Lisp_Object Vcharset_ideograph_gt_pj_3;
82 Lisp_Object Vcharset_ideograph_gt_pj_4;
83 Lisp_Object Vcharset_ideograph_gt_pj_5;
84 Lisp_Object Vcharset_ideograph_gt_pj_6;
85 Lisp_Object Vcharset_ideograph_gt_pj_7;
86 Lisp_Object Vcharset_ideograph_gt_pj_8;
87 Lisp_Object Vcharset_ideograph_gt_pj_9;
88 Lisp_Object Vcharset_ideograph_gt_pj_10;
89 Lisp_Object Vcharset_ideograph_gt_pj_11;
90 Lisp_Object Vcharset_ideograph_daikanwa;
91 Lisp_Object Vcharset_mojikyo;
92 Lisp_Object Vcharset_mojikyo_2022_1;
93 Lisp_Object Vcharset_mojikyo_pj_1;
94 Lisp_Object Vcharset_mojikyo_pj_2;
95 Lisp_Object Vcharset_mojikyo_pj_3;
96 Lisp_Object Vcharset_mojikyo_pj_4;
97 Lisp_Object Vcharset_mojikyo_pj_5;
98 Lisp_Object Vcharset_mojikyo_pj_6;
99 Lisp_Object Vcharset_mojikyo_pj_7;
100 Lisp_Object Vcharset_mojikyo_pj_8;
101 Lisp_Object Vcharset_mojikyo_pj_9;
102 Lisp_Object Vcharset_mojikyo_pj_10;
103 Lisp_Object Vcharset_mojikyo_pj_11;
104 Lisp_Object Vcharset_mojikyo_pj_12;
105 Lisp_Object Vcharset_mojikyo_pj_13;
106 Lisp_Object Vcharset_mojikyo_pj_14;
107 Lisp_Object Vcharset_mojikyo_pj_15;
108 Lisp_Object Vcharset_mojikyo_pj_16;
109 Lisp_Object Vcharset_mojikyo_pj_17;
110 Lisp_Object Vcharset_mojikyo_pj_18;
111 Lisp_Object Vcharset_mojikyo_pj_19;
112 Lisp_Object Vcharset_mojikyo_pj_20;
113 Lisp_Object Vcharset_mojikyo_pj_21;
114 Lisp_Object Vcharset_ethiopic_ucs;
116 Lisp_Object Vcharset_chinese_big5_1;
117 Lisp_Object Vcharset_chinese_big5_2;
119 #ifdef ENABLE_COMPOSITE_CHARS
120 Lisp_Object Vcharset_composite;
122 /* Hash tables for composite chars. One maps string representing
123 composed chars to their equivalent chars; one goes the
125 Lisp_Object Vcomposite_char_char2string_hash_table;
126 Lisp_Object Vcomposite_char_string2char_hash_table;
128 static int composite_char_row_next;
129 static int composite_char_col_next;
131 #endif /* ENABLE_COMPOSITE_CHARS */
133 struct charset_lookup *chlook;
135 static const struct lrecord_description charset_lookup_description_1[] = {
136 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
145 static const struct struct_description charset_lookup_description = {
146 sizeof (struct charset_lookup),
147 charset_lookup_description_1
151 /* Table of number of bytes in the string representation of a character
152 indexed by the first byte of that representation.
154 rep_bytes_by_first_byte(c) is more efficient than the equivalent
155 canonical computation:
157 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
159 const Bytecount rep_bytes_by_first_byte[0xA0] =
160 { /* 0x00 - 0x7f are for straight ASCII */
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
165 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
166 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
167 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
168 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
169 /* 0x80 - 0x8f are for Dimension-1 official charsets */
171 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
173 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
175 /* 0x90 - 0x9d are for Dimension-2 official charsets */
176 /* 0x9e is for Dimension-1 private charsets */
177 /* 0x9f is for Dimension-2 private charsets */
178 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
184 #define BT_UINT8_MIN 0
185 #define BT_UINT8_MAX (UCHAR_MAX - 3)
186 #define BT_UINT8_t (UCHAR_MAX - 2)
187 #define BT_UINT8_nil (UCHAR_MAX - 1)
188 #define BT_UINT8_unbound UCHAR_MAX
190 INLINE_HEADER int INT_UINT8_P (Lisp_Object obj);
191 INLINE_HEADER int UINT8_VALUE_P (Lisp_Object obj);
192 INLINE_HEADER unsigned char UINT8_ENCODE (Lisp_Object obj);
193 INLINE_HEADER Lisp_Object UINT8_DECODE (unsigned char n);
194 INLINE_HEADER unsigned short UINT8_TO_UINT16 (unsigned char n);
197 INT_UINT8_P (Lisp_Object obj)
201 int num = XINT (obj);
203 return (BT_UINT8_MIN <= num) && (num <= BT_UINT8_MAX);
210 UINT8_VALUE_P (Lisp_Object obj)
212 return EQ (obj, Qunbound)
213 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT8_P (obj);
216 INLINE_HEADER unsigned char
217 UINT8_ENCODE (Lisp_Object obj)
219 if (EQ (obj, Qunbound))
220 return BT_UINT8_unbound;
221 else if (EQ (obj, Qnil))
223 else if (EQ (obj, Qt))
229 INLINE_HEADER Lisp_Object
230 UINT8_DECODE (unsigned char n)
232 if (n == BT_UINT8_unbound)
234 else if (n == BT_UINT8_nil)
236 else if (n == BT_UINT8_t)
243 mark_uint8_byte_table (Lisp_Object obj)
249 print_uint8_byte_table (Lisp_Object obj,
250 Lisp_Object printcharfun, int escapeflag)
252 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
254 struct gcpro gcpro1, gcpro2;
255 GCPRO2 (obj, printcharfun);
257 write_c_string ("\n#<uint8-byte-table", printcharfun);
258 for (i = 0; i < 256; i++)
260 unsigned char n = bte->property[i];
262 write_c_string ("\n ", printcharfun);
263 write_c_string (" ", printcharfun);
264 if (n == BT_UINT8_unbound)
265 write_c_string ("void", printcharfun);
266 else if (n == BT_UINT8_nil)
267 write_c_string ("nil", printcharfun);
268 else if (n == BT_UINT8_t)
269 write_c_string ("t", printcharfun);
274 sprintf (buf, "%hd", n);
275 write_c_string (buf, printcharfun);
279 write_c_string (">", printcharfun);
283 uint8_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
285 Lisp_Uint8_Byte_Table *te1 = XUINT8_BYTE_TABLE (obj1);
286 Lisp_Uint8_Byte_Table *te2 = XUINT8_BYTE_TABLE (obj2);
289 for (i = 0; i < 256; i++)
290 if (te1->property[i] != te2->property[i])
296 uint8_byte_table_hash (Lisp_Object obj, int depth)
298 Lisp_Uint8_Byte_Table *te = XUINT8_BYTE_TABLE (obj);
302 for (i = 0; i < 256; i++)
303 hash = HASH2 (hash, te->property[i]);
307 DEFINE_LRECORD_IMPLEMENTATION ("uint8-byte-table", uint8_byte_table,
308 mark_uint8_byte_table,
309 print_uint8_byte_table,
310 0, uint8_byte_table_equal,
311 uint8_byte_table_hash,
312 0 /* uint8_byte_table_description */,
313 Lisp_Uint8_Byte_Table);
316 make_uint8_byte_table (unsigned char initval)
320 Lisp_Uint8_Byte_Table *cte;
322 cte = alloc_lcrecord_type (Lisp_Uint8_Byte_Table,
323 &lrecord_uint8_byte_table);
325 for (i = 0; i < 256; i++)
326 cte->property[i] = initval;
328 XSETUINT8_BYTE_TABLE (obj, cte);
333 uint8_byte_table_same_value_p (Lisp_Object obj)
335 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
336 unsigned char v0 = bte->property[0];
339 for (i = 1; i < 256; i++)
341 if (bte->property[i] != v0)
348 map_over_uint8_byte_table (Lisp_Uint8_Byte_Table *ct,
349 int (*fn) (Emchar c, Lisp_Object val, void *arg),
350 void *arg, Emchar ofs, int place)
353 int unit = 1 << (8 * place);
357 for (i = 0, retval = 0; i < 256 && retval == 0; i++)
359 if (ct->property[i] != BT_UINT8_unbound)
362 for (; c < c1 && retval == 0; c++)
363 retval = (fn) (c, UINT8_DECODE (ct->property[i]), arg);
371 #define BT_UINT16_MIN 0
372 #define BT_UINT16_MAX (USHRT_MAX - 3)
373 #define BT_UINT16_t (USHRT_MAX - 2)
374 #define BT_UINT16_nil (USHRT_MAX - 1)
375 #define BT_UINT16_unbound USHRT_MAX
377 INLINE_HEADER int INT_UINT16_P (Lisp_Object obj);
378 INLINE_HEADER int UINT16_VALUE_P (Lisp_Object obj);
379 INLINE_HEADER unsigned short UINT16_ENCODE (Lisp_Object obj);
380 INLINE_HEADER Lisp_Object UINT16_DECODE (unsigned short us);
383 INT_UINT16_P (Lisp_Object obj)
387 int num = XINT (obj);
389 return (BT_UINT16_MIN <= num) && (num <= BT_UINT16_MAX);
396 UINT16_VALUE_P (Lisp_Object obj)
398 return EQ (obj, Qunbound)
399 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT16_P (obj);
402 INLINE_HEADER unsigned short
403 UINT16_ENCODE (Lisp_Object obj)
405 if (EQ (obj, Qunbound))
406 return BT_UINT16_unbound;
407 else if (EQ (obj, Qnil))
408 return BT_UINT16_nil;
409 else if (EQ (obj, Qt))
415 INLINE_HEADER Lisp_Object
416 UINT16_DECODE (unsigned short n)
418 if (n == BT_UINT16_unbound)
420 else if (n == BT_UINT16_nil)
422 else if (n == BT_UINT16_t)
428 INLINE_HEADER unsigned short
429 UINT8_TO_UINT16 (unsigned char n)
431 if (n == BT_UINT8_unbound)
432 return BT_UINT16_unbound;
433 else if (n == BT_UINT8_nil)
434 return BT_UINT16_nil;
435 else if (n == BT_UINT8_t)
442 mark_uint16_byte_table (Lisp_Object obj)
448 print_uint16_byte_table (Lisp_Object obj,
449 Lisp_Object printcharfun, int escapeflag)
451 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
453 struct gcpro gcpro1, gcpro2;
454 GCPRO2 (obj, printcharfun);
456 write_c_string ("\n#<uint16-byte-table", printcharfun);
457 for (i = 0; i < 256; i++)
459 unsigned short n = bte->property[i];
461 write_c_string ("\n ", printcharfun);
462 write_c_string (" ", printcharfun);
463 if (n == BT_UINT16_unbound)
464 write_c_string ("void", printcharfun);
465 else if (n == BT_UINT16_nil)
466 write_c_string ("nil", printcharfun);
467 else if (n == BT_UINT16_t)
468 write_c_string ("t", printcharfun);
473 sprintf (buf, "%hd", n);
474 write_c_string (buf, printcharfun);
478 write_c_string (">", printcharfun);
482 uint16_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
484 Lisp_Uint16_Byte_Table *te1 = XUINT16_BYTE_TABLE (obj1);
485 Lisp_Uint16_Byte_Table *te2 = XUINT16_BYTE_TABLE (obj2);
488 for (i = 0; i < 256; i++)
489 if (te1->property[i] != te2->property[i])
495 uint16_byte_table_hash (Lisp_Object obj, int depth)
497 Lisp_Uint16_Byte_Table *te = XUINT16_BYTE_TABLE (obj);
501 for (i = 0; i < 256; i++)
502 hash = HASH2 (hash, te->property[i]);
506 DEFINE_LRECORD_IMPLEMENTATION ("uint16-byte-table", uint16_byte_table,
507 mark_uint16_byte_table,
508 print_uint16_byte_table,
509 0, uint16_byte_table_equal,
510 uint16_byte_table_hash,
511 0 /* uint16_byte_table_description */,
512 Lisp_Uint16_Byte_Table);
515 make_uint16_byte_table (unsigned short initval)
519 Lisp_Uint16_Byte_Table *cte;
521 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
522 &lrecord_uint16_byte_table);
524 for (i = 0; i < 256; i++)
525 cte->property[i] = initval;
527 XSETUINT16_BYTE_TABLE (obj, cte);
532 expand_uint8_byte_table_to_uint16 (Lisp_Object table)
536 Lisp_Uint8_Byte_Table* bte = XUINT8_BYTE_TABLE(table);
537 Lisp_Uint16_Byte_Table* cte;
539 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
540 &lrecord_uint16_byte_table);
541 for (i = 0; i < 256; i++)
543 cte->property[i] = UINT8_TO_UINT16 (bte->property[i]);
545 XSETUINT16_BYTE_TABLE (obj, cte);
550 uint16_byte_table_same_value_p (Lisp_Object obj)
552 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
553 unsigned short v0 = bte->property[0];
556 for (i = 1; i < 256; i++)
558 if (bte->property[i] != v0)
565 map_over_uint16_byte_table (Lisp_Uint16_Byte_Table *ct,
566 int (*fn) (Emchar c, Lisp_Object val, void *arg),
567 void *arg, Emchar ofs, int place)
570 int unit = 1 << (8 * place);
574 for (i = 0, retval = 0; i < 256 && retval == 0; i++)
576 if (ct->property[i] != BT_UINT16_unbound)
579 for (; c < c1 && retval == 0; c++)
580 retval = (fn) (c, UINT16_DECODE (ct->property[i]), arg);
590 mark_byte_table (Lisp_Object obj)
592 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
595 for (i = 0; i < 256; i++)
597 mark_object (cte->property[i]);
603 print_byte_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
605 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
607 struct gcpro gcpro1, gcpro2;
608 GCPRO2 (obj, printcharfun);
610 write_c_string ("\n#<byte-table", printcharfun);
611 for (i = 0; i < 256; i++)
613 Lisp_Object elt = bte->property[i];
615 write_c_string ("\n ", printcharfun);
616 write_c_string (" ", printcharfun);
617 if (EQ (elt, Qunbound))
618 write_c_string ("void", printcharfun);
620 print_internal (elt, printcharfun, escapeflag);
623 write_c_string (">", printcharfun);
627 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
629 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
630 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
633 for (i = 0; i < 256; i++)
634 if (BYTE_TABLE_P (cte1->property[i]))
636 if (BYTE_TABLE_P (cte2->property[i]))
638 if (!byte_table_equal (cte1->property[i],
639 cte2->property[i], depth + 1))
646 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
652 byte_table_hash (Lisp_Object obj, int depth)
654 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
656 return internal_array_hash (cte->property, 256, depth);
659 static const struct lrecord_description byte_table_description[] = {
660 { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Byte_Table, property), 256 },
664 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
669 byte_table_description,
673 make_byte_table (Lisp_Object initval)
677 Lisp_Byte_Table *cte;
679 cte = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
681 for (i = 0; i < 256; i++)
682 cte->property[i] = initval;
684 XSETBYTE_TABLE (obj, cte);
689 byte_table_same_value_p (Lisp_Object obj)
691 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
692 Lisp_Object v0 = bte->property[0];
695 for (i = 1; i < 256; i++)
697 if (!internal_equal (bte->property[i], v0, 0))
704 map_over_byte_table (Lisp_Byte_Table *ct,
705 int (*fn) (Emchar c, Lisp_Object val, void *arg),
706 void *arg, Emchar ofs, int place)
710 int unit = 1 << (8 * place);
713 for (i = 0, retval = 0; i < 256 && retval == 0; i++)
716 if (UINT8_BYTE_TABLE_P (v))
719 = map_over_uint8_byte_table (XUINT8_BYTE_TABLE(v),
720 fn, arg, c, place - 1);
723 else if (UINT16_BYTE_TABLE_P (v))
726 = map_over_uint16_byte_table (XUINT16_BYTE_TABLE(v),
727 fn, arg, c, place - 1);
730 else if (BYTE_TABLE_P (v))
732 retval = map_over_byte_table (XBYTE_TABLE(v),
733 fn, arg, c, place - 1);
736 else if (!UNBOUNDP (v))
738 Emchar c1 = c + unit;
740 for (; c < c1 && retval == 0; c++)
741 retval = (fn) (c, v, arg);
750 Lisp_Object get_byte_table (Lisp_Object table, unsigned char idx);
751 Lisp_Object put_byte_table (Lisp_Object table, unsigned char idx,
755 get_byte_table (Lisp_Object table, unsigned char idx)
757 if (UINT8_BYTE_TABLE_P (table))
758 return UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[idx]);
759 else if (UINT16_BYTE_TABLE_P (table))
760 return UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[idx]);
761 else if (BYTE_TABLE_P (table))
762 return XBYTE_TABLE(table)->property[idx];
768 put_byte_table (Lisp_Object table, unsigned char idx, Lisp_Object value)
770 if (UINT8_BYTE_TABLE_P (table))
772 if (UINT8_VALUE_P (value))
774 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
775 if (!UINT8_BYTE_TABLE_P (value) &&
776 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
777 && uint8_byte_table_same_value_p (table))
782 else if (UINT16_VALUE_P (value))
784 Lisp_Object new = expand_uint8_byte_table_to_uint16 (table);
786 XUINT16_BYTE_TABLE(new)->property[idx] = UINT16_ENCODE (value);
791 Lisp_Object new = make_byte_table (Qnil);
794 for (i = 0; i < 256; i++)
796 XBYTE_TABLE(new)->property[i]
797 = UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[i]);
799 XBYTE_TABLE(new)->property[idx] = value;
803 else if (UINT16_BYTE_TABLE_P (table))
805 if (UINT16_VALUE_P (value))
807 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
808 if (!UINT8_BYTE_TABLE_P (value) &&
809 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
810 && uint16_byte_table_same_value_p (table))
817 Lisp_Object new = make_byte_table (Qnil);
820 for (i = 0; i < 256; i++)
822 XBYTE_TABLE(new)->property[i]
823 = UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[i]);
825 XBYTE_TABLE(new)->property[idx] = value;
829 else if (BYTE_TABLE_P (table))
831 XBYTE_TABLE(table)->property[idx] = value;
832 if (!UINT8_BYTE_TABLE_P (value) &&
833 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
834 && byte_table_same_value_p (table))
839 else if (!internal_equal (table, value, 0))
841 if (UINT8_VALUE_P (table) && UINT8_VALUE_P (value))
843 table = make_uint8_byte_table (UINT8_ENCODE (table));
844 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
846 else if (UINT16_VALUE_P (table) && UINT16_VALUE_P (value))
848 table = make_uint16_byte_table (UINT16_ENCODE (table));
849 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
853 table = make_byte_table (table);
854 XBYTE_TABLE(table)->property[idx] = value;
861 mark_char_id_table (Lisp_Object obj)
863 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
869 print_char_id_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
871 Lisp_Object table = XCHAR_ID_TABLE (obj)->table;
873 struct gcpro gcpro1, gcpro2;
874 GCPRO2 (obj, printcharfun);
876 write_c_string ("#<char-id-table ", printcharfun);
877 for (i = 0; i < 256; i++)
879 Lisp_Object elt = get_byte_table (table, i);
880 if (i != 0) write_c_string ("\n ", printcharfun);
881 if (EQ (elt, Qunbound))
882 write_c_string ("void", printcharfun);
884 print_internal (elt, printcharfun, escapeflag);
887 write_c_string (">", printcharfun);
891 char_id_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
893 Lisp_Object table1 = XCHAR_ID_TABLE (obj1)->table;
894 Lisp_Object table2 = XCHAR_ID_TABLE (obj2)->table;
897 for (i = 0; i < 256; i++)
899 if (!internal_equal (get_byte_table (table1, i),
900 get_byte_table (table2, i), 0))
907 char_id_table_hash (Lisp_Object obj, int depth)
909 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
911 return char_id_table_hash (cte->table, depth + 1);
914 static const struct lrecord_description char_id_table_description[] = {
915 { XD_LISP_OBJECT, offsetof(Lisp_Char_ID_Table, table) },
919 DEFINE_LRECORD_IMPLEMENTATION ("char-id-table", char_id_table,
922 0, char_id_table_equal,
924 char_id_table_description,
928 make_char_id_table (Lisp_Object initval)
931 Lisp_Char_ID_Table *cte;
933 cte = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
935 cte->table = make_byte_table (initval);
937 XSETCHAR_ID_TABLE (obj, cte);
943 get_char_id_table (Emchar ch, Lisp_Object table)
945 unsigned int code = ch;
952 (XCHAR_ID_TABLE (table)->table,
953 (unsigned char)(code >> 24)),
954 (unsigned char) (code >> 16)),
955 (unsigned char) (code >> 8)),
956 (unsigned char) code);
959 void put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table);
961 put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table)
963 unsigned int code = ch;
964 Lisp_Object table1, table2, table3, table4;
966 table1 = XCHAR_ID_TABLE (table)->table;
967 table2 = get_byte_table (table1, (unsigned char)(code >> 24));
968 table3 = get_byte_table (table2, (unsigned char)(code >> 16));
969 table4 = get_byte_table (table3, (unsigned char)(code >> 8));
971 table4 = put_byte_table (table4, (unsigned char)code, value);
972 table3 = put_byte_table (table3, (unsigned char)(code >> 8), table4);
973 table2 = put_byte_table (table2, (unsigned char)(code >> 16), table3);
974 XCHAR_ID_TABLE (table)->table
975 = put_byte_table (table1, (unsigned char)(code >> 24), table2);
978 /* Map FN (with client data ARG) in char table CT.
979 Mapping stops the first time FN returns non-zero, and that value
980 becomes the return value of map_char_id_table(). */
982 map_char_id_table (Lisp_Char_ID_Table *ct,
983 int (*fn) (Emchar c, Lisp_Object val, void *arg),
986 map_char_id_table (Lisp_Char_ID_Table *ct,
987 int (*fn) (Emchar c, Lisp_Object val, void *arg),
990 Lisp_Object v = ct->table;
992 if (UINT8_BYTE_TABLE_P (v))
993 return map_over_uint8_byte_table (XUINT8_BYTE_TABLE(v), fn, arg, 0, 3);
994 else if (UINT16_BYTE_TABLE_P (v))
995 return map_over_uint16_byte_table (XUINT16_BYTE_TABLE(v), fn, arg, 0, 3);
996 else if (BYTE_TABLE_P (v))
997 return map_over_byte_table (XBYTE_TABLE(v), fn, arg, 0, 3);
998 else if (!UNBOUNDP (v))
1002 Emchar c1 = c + unit;
1005 for (retval = 0; c < c1 && retval == 0; c++)
1006 retval = (fn) (c, v, arg);
1011 struct slow_map_char_id_table_arg
1013 Lisp_Object function;
1018 slow_map_char_id_table_fun (Emchar c, Lisp_Object val, void *arg)
1020 struct slow_map_char_id_table_arg *closure =
1021 (struct slow_map_char_id_table_arg *) arg;
1023 closure->retval = call2 (closure->function, make_char (c), val);
1024 return !NILP (closure->retval);
1028 Lisp_Object Vchar_attribute_hash_table;
1029 Lisp_Object Vcharacter_composition_table;
1030 Lisp_Object Vcharacter_variant_table;
1032 Lisp_Object Qideograph_daikanwa;
1033 Lisp_Object Q_decomposition;
1035 Lisp_Object Qto_ucs;
1037 Lisp_Object Qcompat;
1038 Lisp_Object Qisolated;
1039 Lisp_Object Qinitial;
1040 Lisp_Object Qmedial;
1042 Lisp_Object Qvertical;
1043 Lisp_Object QnoBreak;
1044 Lisp_Object Qfraction;
1047 Lisp_Object Qcircle;
1048 Lisp_Object Qsquare;
1050 Lisp_Object Qnarrow;
1054 Emchar to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
1056 Lisp_Object put_char_ccs_code_point (Lisp_Object character,
1057 Lisp_Object ccs, Lisp_Object value);
1058 Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
1061 to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
1067 else if (EQ (v, Qcompat))
1069 else if (EQ (v, Qisolated))
1071 else if (EQ (v, Qinitial))
1073 else if (EQ (v, Qmedial))
1075 else if (EQ (v, Qfinal))
1077 else if (EQ (v, Qvertical))
1079 else if (EQ (v, QnoBreak))
1081 else if (EQ (v, Qfraction))
1083 else if (EQ (v, Qsuper))
1085 else if (EQ (v, Qsub))
1087 else if (EQ (v, Qcircle))
1089 else if (EQ (v, Qsquare))
1091 else if (EQ (v, Qwide))
1093 else if (EQ (v, Qnarrow))
1095 else if (EQ (v, Qsmall))
1097 else if (EQ (v, Qfont))
1100 signal_simple_error (err_msg, err_arg);
1103 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
1104 Return character corresponding with list.
1108 Lisp_Object table = Vcharacter_composition_table;
1109 Lisp_Object rest = list;
1111 while (CONSP (rest))
1113 Lisp_Object v = Fcar (rest);
1115 Emchar c = to_char_id (v, "Invalid value for composition", list);
1117 ret = get_char_id_table (c, table);
1122 if (!CHAR_ID_TABLE_P (ret))
1127 else if (!CONSP (rest))
1129 else if (CHAR_ID_TABLE_P (ret))
1132 signal_simple_error ("Invalid table is found with", list);
1134 signal_simple_error ("Invalid value for composition", list);
1137 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
1138 Return variants of CHARACTER.
1142 CHECK_CHAR (character);
1143 return Fcopy_list (get_char_id_table (XCHAR (character),
1144 Vcharacter_variant_table));
1148 /* We store the char-attributes in hash tables with the names as the
1149 key and the actual char-id-table object as the value. Occasionally
1150 we need to use them in a list format. These routines provide us
1152 struct char_attribute_list_closure
1154 Lisp_Object *char_attribute_list;
1158 add_char_attribute_to_list_mapper (Lisp_Object key, Lisp_Object value,
1159 void *char_attribute_list_closure)
1161 /* This function can GC */
1162 struct char_attribute_list_closure *calcl
1163 = (struct char_attribute_list_closure*) char_attribute_list_closure;
1164 Lisp_Object *char_attribute_list = calcl->char_attribute_list;
1166 *char_attribute_list = Fcons (key, *char_attribute_list);
1170 DEFUN ("char-attribute-list", Fchar_attribute_list, 0, 0, 0, /*
1171 Return the list of all existing character attributes except coded-charsets.
1175 Lisp_Object char_attribute_list = Qnil;
1176 struct gcpro gcpro1;
1177 struct char_attribute_list_closure char_attribute_list_closure;
1179 GCPRO1 (char_attribute_list);
1180 char_attribute_list_closure.char_attribute_list = &char_attribute_list;
1181 elisp_maphash (add_char_attribute_to_list_mapper,
1182 Vchar_attribute_hash_table,
1183 &char_attribute_list_closure);
1185 return char_attribute_list;
1188 DEFUN ("find-char-attribute-table", Ffind_char_attribute_table, 1, 1, 0, /*
1189 Return char-id-table corresponding to ATTRIBUTE.
1193 return Fgethash (attribute, Vchar_attribute_hash_table, Qnil);
1197 /* We store the char-id-tables in hash tables with the attributes as
1198 the key and the actual char-id-table object as the value. Each
1199 char-id-table stores values of an attribute corresponding with
1200 characters. Occasionally we need to get attributes of a character
1201 in a association-list format. These routines provide us with
1203 struct char_attribute_alist_closure
1206 Lisp_Object *char_attribute_alist;
1210 add_char_attribute_alist_mapper (Lisp_Object key, Lisp_Object value,
1211 void *char_attribute_alist_closure)
1213 /* This function can GC */
1214 struct char_attribute_alist_closure *caacl =
1215 (struct char_attribute_alist_closure*) char_attribute_alist_closure;
1216 Lisp_Object ret = get_char_id_table (caacl->char_id, value);
1217 if (!UNBOUNDP (ret))
1219 Lisp_Object *char_attribute_alist = caacl->char_attribute_alist;
1220 *char_attribute_alist
1221 = Fcons (Fcons (key, ret), *char_attribute_alist);
1226 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
1227 Return the alist of attributes of CHARACTER.
1231 Lisp_Object alist = Qnil;
1234 CHECK_CHAR (character);
1236 struct gcpro gcpro1;
1237 struct char_attribute_alist_closure char_attribute_alist_closure;
1240 char_attribute_alist_closure.char_id = XCHAR (character);
1241 char_attribute_alist_closure.char_attribute_alist = &alist;
1242 elisp_maphash (add_char_attribute_alist_mapper,
1243 Vchar_attribute_hash_table,
1244 &char_attribute_alist_closure);
1248 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1250 Lisp_Object ccs = chlook->charset_by_leading_byte[i];
1254 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1257 if ( CHAR_ID_TABLE_P (encoding_table)
1258 && INTP (cpos = get_char_id_table (XCHAR (character),
1261 alist = Fcons (Fcons (ccs, cpos), alist);
1268 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 3, 0, /*
1269 Return the value of CHARACTER's ATTRIBUTE.
1270 Return DEFAULT-VALUE if the value is not exist.
1272 (character, attribute, default_value))
1276 CHECK_CHAR (character);
1277 if (!NILP (ccs = Ffind_charset (attribute)))
1279 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1281 if (CHAR_ID_TABLE_P (encoding_table))
1282 return get_char_id_table (XCHAR (character), encoding_table);
1286 Lisp_Object table = Fgethash (attribute,
1287 Vchar_attribute_hash_table,
1289 if (!UNBOUNDP (table))
1291 Lisp_Object ret = get_char_id_table (XCHAR (character), table);
1292 if (!UNBOUNDP (ret))
1296 return default_value;
1299 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
1300 Store CHARACTER's ATTRIBUTE with VALUE.
1302 (character, attribute, value))
1306 CHECK_CHAR (character);
1307 ccs = Ffind_charset (attribute);
1310 return put_char_ccs_code_point (character, ccs, value);
1312 else if (EQ (attribute, Q_decomposition))
1317 signal_simple_error ("Invalid value for ->decomposition",
1320 if (CONSP (Fcdr (value)))
1322 Lisp_Object rest = value;
1323 Lisp_Object table = Vcharacter_composition_table;
1327 GET_EXTERNAL_LIST_LENGTH (rest, len);
1328 seq = make_vector (len, Qnil);
1330 while (CONSP (rest))
1332 Lisp_Object v = Fcar (rest);
1335 = to_char_id (v, "Invalid value for ->decomposition", value);
1338 XVECTOR_DATA(seq)[i++] = v;
1340 XVECTOR_DATA(seq)[i++] = make_char (c);
1344 put_char_id_table (c, character, table);
1349 ntable = get_char_id_table (c, table);
1350 if (!CHAR_ID_TABLE_P (ntable))
1352 ntable = make_char_id_table (Qnil);
1353 put_char_id_table (c, ntable, table);
1361 Lisp_Object v = Fcar (value);
1365 Emchar c = XINT (v);
1367 = get_char_id_table (c, Vcharacter_variant_table);
1369 if (NILP (Fmemq (v, ret)))
1371 put_char_id_table (c, Fcons (character, ret),
1372 Vcharacter_variant_table);
1375 seq = make_vector (1, v);
1379 else if (EQ (attribute, Qto_ucs) || EQ (attribute, Q_ucs))
1385 signal_simple_error ("Invalid value for ->ucs", value);
1389 ret = get_char_id_table (c, Vcharacter_variant_table);
1390 if (NILP (Fmemq (character, ret)))
1392 put_char_id_table (c, Fcons (character, ret),
1393 Vcharacter_variant_table);
1396 if (EQ (attribute, Q_ucs))
1397 attribute = Qto_ucs;
1401 Lisp_Object table = Fgethash (attribute,
1402 Vchar_attribute_hash_table,
1407 table = make_char_id_table (Qunbound);
1408 Fputhash (attribute, table, Vchar_attribute_hash_table);
1410 put_char_id_table (XCHAR (character), value, table);
1415 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
1416 Remove CHARACTER's ATTRIBUTE.
1418 (character, attribute))
1422 CHECK_CHAR (character);
1423 ccs = Ffind_charset (attribute);
1426 return remove_char_ccs (character, ccs);
1430 Lisp_Object table = Fgethash (attribute,
1431 Vchar_attribute_hash_table,
1433 if (!UNBOUNDP (table))
1435 put_char_id_table (XCHAR (character), Qunbound, table);
1442 DEFUN ("map-char-attribute", Fmap_char_attribute, 2, 2, 0, /*
1443 Map FUNCTION over entries in ATTRIBUTE, calling it with two args,
1444 each key and value in the table.
1446 (function, attribute))
1449 Lisp_Char_ID_Table *ct;
1450 struct slow_map_char_id_table_arg slarg;
1451 struct gcpro gcpro1, gcpro2;
1453 if (!NILP (ccs = Ffind_charset (attribute)))
1455 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1457 if (CHAR_ID_TABLE_P (encoding_table))
1458 ct = XCHAR_ID_TABLE (encoding_table);
1464 Lisp_Object table = Fgethash (attribute,
1465 Vchar_attribute_hash_table,
1467 if (CHAR_ID_TABLE_P (table))
1468 ct = XCHAR_ID_TABLE (table);
1472 slarg.function = function;
1473 slarg.retval = Qnil;
1474 GCPRO2 (slarg.function, slarg.retval);
1475 map_char_id_table (ct, slow_map_char_id_table_fun, &slarg);
1478 return slarg.retval;
1481 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
1483 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
1485 /* ad-hoc method for `ascii' */
1486 if ((CHARSET_CHARS (cs) == 94) &&
1487 (CHARSET_BYTE_OFFSET (cs) != 33))
1488 return 128 - CHARSET_BYTE_OFFSET (cs);
1490 return CHARSET_CHARS (cs);
1493 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
1495 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
1497 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
1501 if (XVECTOR_LENGTH (v) > ccs_len)
1504 for (i = 0; i < XVECTOR_LENGTH (v); i++)
1506 Lisp_Object c = XVECTOR_DATA(v)[i];
1508 if (!NILP (c) && !CHARP (c))
1512 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
1524 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1527 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1537 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1538 nv = XVECTOR_DATA(v)[i];
1544 XVECTOR_DATA(v)[i] = Qnil;
1548 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1549 int code_point, Lisp_Object character);
1551 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1552 int code_point, Lisp_Object character)
1556 int ccs_len = XVECTOR_LENGTH (v);
1561 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1562 nv = XVECTOR_DATA(v)[i];
1566 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
1572 XVECTOR_DATA(v)[i] = character;
1576 put_char_ccs_code_point (Lisp_Object character,
1577 Lisp_Object ccs, Lisp_Object value)
1579 Lisp_Object encoding_table;
1581 if (!EQ (XCHARSET_NAME (ccs), Qucs)
1582 || (XCHAR (character) != XINT (value)))
1584 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
1585 int dim = XCHARSET_DIMENSION (ccs);
1586 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
1587 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
1591 { /* obsolete representation: value must be a list of bytes */
1592 Lisp_Object ret = Fcar (value);
1596 signal_simple_error ("Invalid value for coded-charset", value);
1597 code_point = XINT (ret);
1598 if (XCHARSET_GRAPHIC (ccs) == 1)
1600 rest = Fcdr (value);
1601 while (!NILP (rest))
1606 signal_simple_error ("Invalid value for coded-charset",
1610 signal_simple_error ("Invalid value for coded-charset",
1613 if (XCHARSET_GRAPHIC (ccs) == 1)
1615 code_point = (code_point << 8) | j;
1618 value = make_int (code_point);
1620 else if (INTP (value))
1622 code_point = XINT (value);
1623 if (XCHARSET_GRAPHIC (ccs) == 1)
1625 code_point &= 0x7F7F7F7F;
1626 value = make_int (code_point);
1630 signal_simple_error ("Invalid value for coded-charset", value);
1634 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1637 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
1642 XCHARSET_DECODING_TABLE (ccs)
1643 = v = make_older_vector (ccs_len, Qnil);
1646 decoding_table_put_char (v, dim, byte_offset, code_point, character);
1648 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
1650 XCHARSET_ENCODING_TABLE (ccs)
1651 = encoding_table = make_char_id_table (Qnil);
1653 put_char_id_table (XCHAR (character), value, encoding_table);
1658 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
1660 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1661 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1663 if (VECTORP (decoding_table))
1665 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1669 decoding_table_remove_char (decoding_table,
1670 XCHARSET_DIMENSION (ccs),
1671 XCHARSET_BYTE_OFFSET (ccs),
1675 if (CHAR_ID_TABLE_P (encoding_table))
1677 put_char_id_table (XCHAR (character), Qnil, encoding_table);
1682 EXFUN (Fmake_char, 3);
1683 EXFUN (Fdecode_char, 2);
1685 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
1686 Store character's ATTRIBUTES.
1690 Lisp_Object rest = attributes;
1691 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
1692 Lisp_Object character;
1696 while (CONSP (rest))
1698 Lisp_Object cell = Fcar (rest);
1702 signal_simple_error ("Invalid argument", attributes);
1703 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
1704 && ((XCHARSET_FINAL (ccs) != 0) ||
1705 (XCHARSET_UCS_MAX (ccs) > 0)) )
1709 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1711 character = Fdecode_char (ccs, cell);
1712 if (!NILP (character))
1713 goto setup_attributes;
1717 if ( (!NILP (code = Fcdr (Fassq (Qto_ucs, attributes)))) ||
1718 (!NILP (code = Fcdr (Fassq (Q_ucs, attributes)))) )
1722 signal_simple_error ("Invalid argument", attributes);
1724 character = make_char (XINT (code) + 0x100000);
1725 goto setup_attributes;
1729 else if (!INTP (code))
1730 signal_simple_error ("Invalid argument", attributes);
1732 character = make_char (XINT (code));
1736 while (CONSP (rest))
1738 Lisp_Object cell = Fcar (rest);
1741 signal_simple_error ("Invalid argument", attributes);
1743 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
1749 DEFUN ("find-char", Ffind_char, 1, 1, 0, /*
1750 Retrieve the character of the given ATTRIBUTES.
1754 Lisp_Object rest = attributes;
1757 while (CONSP (rest))
1759 Lisp_Object cell = Fcar (rest);
1763 signal_simple_error ("Invalid argument", attributes);
1764 if (!NILP (ccs = Ffind_charset (Fcar (cell))))
1768 return Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1770 return Fdecode_char (ccs, cell);
1774 if ( (!NILP (code = Fcdr (Fassq (Qto_ucs, attributes)))) ||
1775 (!NILP (code = Fcdr (Fassq (Q_ucs, attributes)))) )
1778 signal_simple_error ("Invalid argument", attributes);
1780 return make_char (XINT (code) + 0x100000);
1785 Lisp_Object Vutf_2000_version;
1789 int leading_code_private_11;
1792 Lisp_Object Qcharsetp;
1794 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1795 Lisp_Object Qregistry, Qfinal, Qgraphic;
1796 Lisp_Object Qdirection;
1797 Lisp_Object Qreverse_direction_charset;
1798 Lisp_Object Qleading_byte;
1799 Lisp_Object Qshort_name, Qlong_name;
1813 Qcyrillic_iso8859_5,
1815 Qjapanese_jisx0208_1978,
1819 Qjapanese_jisx0208_1990,
1822 Qchinese_cns11643_1,
1823 Qchinese_cns11643_2,
1831 Qlatin_viscii_lower,
1832 Qlatin_viscii_upper,
1833 Qvietnamese_viscii_lower,
1834 Qvietnamese_viscii_upper,
1847 Qideograph_gt_pj_10,
1848 Qideograph_gt_pj_11,
1878 Lisp_Object Ql2r, Qr2l;
1880 Lisp_Object Vcharset_hash_table;
1882 /* Composite characters are characters constructed by overstriking two
1883 or more regular characters.
1885 1) The old Mule implementation involves storing composite characters
1886 in a buffer as a tag followed by all of the actual characters
1887 used to make up the composite character. I think this is a bad
1888 idea; it greatly complicates code that wants to handle strings
1889 one character at a time because it has to deal with the possibility
1890 of great big ungainly characters. It's much more reasonable to
1891 simply store an index into a table of composite characters.
1893 2) The current implementation only allows for 16,384 separate
1894 composite characters over the lifetime of the XEmacs process.
1895 This could become a potential problem if the user
1896 edited lots of different files that use composite characters.
1897 Due to FSF bogosity, increasing the number of allowable
1898 composite characters under Mule would decrease the number
1899 of possible faces that can exist. Mule already has shrunk
1900 this to 2048, and further shrinkage would become uncomfortable.
1901 No such problems exist in XEmacs.
1903 Composite characters could be represented as 0x80 C1 C2 C3,
1904 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1905 for slightly under 2^20 (one million) composite characters
1906 over the XEmacs process lifetime, and you only need to
1907 increase the size of a Mule character from 19 to 21 bits.
1908 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1909 85 million (slightly over 2^26) composite characters. */
1912 /************************************************************************/
1913 /* Basic Emchar functions */
1914 /************************************************************************/
1916 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1917 string in STR. Returns the number of bytes stored.
1918 Do not call this directly. Use the macro set_charptr_emchar() instead.
1922 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1928 Lisp_Object charset;
1937 else if ( c <= 0x7ff )
1939 *p++ = (c >> 6) | 0xc0;
1940 *p++ = (c & 0x3f) | 0x80;
1942 else if ( c <= 0xffff )
1944 *p++ = (c >> 12) | 0xe0;
1945 *p++ = ((c >> 6) & 0x3f) | 0x80;
1946 *p++ = (c & 0x3f) | 0x80;
1948 else if ( c <= 0x1fffff )
1950 *p++ = (c >> 18) | 0xf0;
1951 *p++ = ((c >> 12) & 0x3f) | 0x80;
1952 *p++ = ((c >> 6) & 0x3f) | 0x80;
1953 *p++ = (c & 0x3f) | 0x80;
1955 else if ( c <= 0x3ffffff )
1957 *p++ = (c >> 24) | 0xf8;
1958 *p++ = ((c >> 18) & 0x3f) | 0x80;
1959 *p++ = ((c >> 12) & 0x3f) | 0x80;
1960 *p++ = ((c >> 6) & 0x3f) | 0x80;
1961 *p++ = (c & 0x3f) | 0x80;
1965 *p++ = (c >> 30) | 0xfc;
1966 *p++ = ((c >> 24) & 0x3f) | 0x80;
1967 *p++ = ((c >> 18) & 0x3f) | 0x80;
1968 *p++ = ((c >> 12) & 0x3f) | 0x80;
1969 *p++ = ((c >> 6) & 0x3f) | 0x80;
1970 *p++ = (c & 0x3f) | 0x80;
1973 BREAKUP_CHAR (c, charset, c1, c2);
1974 lb = CHAR_LEADING_BYTE (c);
1975 if (LEADING_BYTE_PRIVATE_P (lb))
1976 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1978 if (EQ (charset, Vcharset_control_1))
1987 /* Return the first character from a Mule-encoded string in STR,
1988 assuming it's non-ASCII. Do not call this directly.
1989 Use the macro charptr_emchar() instead. */
1992 non_ascii_charptr_emchar (const Bufbyte *str)
2005 else if ( b >= 0xf8 )
2010 else if ( b >= 0xf0 )
2015 else if ( b >= 0xe0 )
2020 else if ( b >= 0xc0 )
2030 for( ; len > 0; len-- )
2033 ch = ( ch << 6 ) | ( b & 0x3f );
2037 Bufbyte i0 = *str, i1, i2 = 0;
2038 Lisp_Object charset;
2040 if (i0 == LEADING_BYTE_CONTROL_1)
2041 return (Emchar) (*++str - 0x20);
2043 if (LEADING_BYTE_PREFIX_P (i0))
2048 charset = CHARSET_BY_LEADING_BYTE (i0);
2049 if (XCHARSET_DIMENSION (charset) == 2)
2052 return MAKE_CHAR (charset, i1, i2);
2056 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
2057 Do not call this directly. Use the macro valid_char_p() instead. */
2061 non_ascii_valid_char_p (Emchar ch)
2065 /* Must have only lowest 19 bits set */
2069 f1 = CHAR_FIELD1 (ch);
2070 f2 = CHAR_FIELD2 (ch);
2071 f3 = CHAR_FIELD3 (ch);
2075 Lisp_Object charset;
2077 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
2078 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
2079 f2 > MAX_CHAR_FIELD2_PRIVATE)
2084 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
2085 f2 <= MAX_CHAR_FIELD2_PRIVATE))
2089 NOTE: This takes advantage of the fact that
2090 FIELD2_TO_OFFICIAL_LEADING_BYTE and
2091 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
2093 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
2094 if (EQ (charset, Qnil))
2096 return (XCHARSET_CHARS (charset) == 96);
2100 Lisp_Object charset;
2102 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
2103 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
2104 f1 > MAX_CHAR_FIELD1_PRIVATE)
2106 if (f2 < 0x20 || f3 < 0x20)
2109 #ifdef ENABLE_COMPOSITE_CHARS
2110 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
2112 if (UNBOUNDP (Fgethash (make_int (ch),
2113 Vcomposite_char_char2string_hash_table,
2118 #endif /* ENABLE_COMPOSITE_CHARS */
2120 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
2121 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
2124 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
2126 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
2129 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
2131 if (EQ (charset, Qnil))
2133 return (XCHARSET_CHARS (charset) == 96);
2139 /************************************************************************/
2140 /* Basic string functions */
2141 /************************************************************************/
2143 /* Copy the character pointed to by SRC into DST. Do not call this
2144 directly. Use the macro charptr_copy_char() instead.
2145 Return the number of bytes copied. */
2148 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
2150 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
2152 for (i = bytes; i; i--, dst++, src++)
2158 /************************************************************************/
2159 /* streams of Emchars */
2160 /************************************************************************/
2162 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
2163 The functions below are not meant to be called directly; use
2164 the macros in insdel.h. */
2167 Lstream_get_emchar_1 (Lstream *stream, int ch)
2169 Bufbyte str[MAX_EMCHAR_LEN];
2170 Bufbyte *strptr = str;
2173 str[0] = (Bufbyte) ch;
2175 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
2177 int c = Lstream_getc (stream);
2178 bufpos_checking_assert (c >= 0);
2179 *++strptr = (Bufbyte) c;
2181 return charptr_emchar (str);
2185 Lstream_fput_emchar (Lstream *stream, Emchar ch)
2187 Bufbyte str[MAX_EMCHAR_LEN];
2188 Bytecount len = set_charptr_emchar (str, ch);
2189 return Lstream_write (stream, str, len);
2193 Lstream_funget_emchar (Lstream *stream, Emchar ch)
2195 Bufbyte str[MAX_EMCHAR_LEN];
2196 Bytecount len = set_charptr_emchar (str, ch);
2197 Lstream_unread (stream, str, len);
2201 /************************************************************************/
2202 /* charset object */
2203 /************************************************************************/
2206 mark_charset (Lisp_Object obj)
2208 Lisp_Charset *cs = XCHARSET (obj);
2210 mark_object (cs->short_name);
2211 mark_object (cs->long_name);
2212 mark_object (cs->doc_string);
2213 mark_object (cs->registry);
2214 mark_object (cs->ccl_program);
2216 mark_object (cs->encoding_table);
2217 /* mark_object (cs->decoding_table); */
2223 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
2225 Lisp_Charset *cs = XCHARSET (obj);
2229 error ("printing unreadable object #<charset %s 0x%x>",
2230 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
2233 write_c_string ("#<charset ", printcharfun);
2234 print_internal (CHARSET_NAME (cs), printcharfun, 0);
2235 write_c_string (" ", printcharfun);
2236 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
2237 write_c_string (" ", printcharfun);
2238 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
2239 write_c_string (" ", printcharfun);
2240 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
2241 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
2243 CHARSET_DIMENSION (cs),
2244 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
2245 CHARSET_COLUMNS (cs),
2246 CHARSET_GRAPHIC (cs),
2247 CHARSET_FINAL (cs));
2248 write_c_string (buf, printcharfun);
2249 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
2250 sprintf (buf, " 0x%x>", cs->header.uid);
2251 write_c_string (buf, printcharfun);
2254 static const struct lrecord_description charset_description[] = {
2255 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
2256 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
2257 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
2258 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
2259 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
2260 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
2261 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
2263 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
2264 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
2269 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
2270 mark_charset, print_charset, 0, 0, 0,
2271 charset_description,
2274 /* Make a new charset. */
2275 /* #### SJT Should generic properties be allowed? */
2277 make_charset (Charset_ID id, Lisp_Object name,
2278 unsigned short chars, unsigned char dimension,
2279 unsigned char columns, unsigned char graphic,
2280 Bufbyte final, unsigned char direction, Lisp_Object short_name,
2281 Lisp_Object long_name, Lisp_Object doc,
2283 Lisp_Object decoding_table,
2284 Emchar ucs_min, Emchar ucs_max,
2285 Emchar code_offset, unsigned char byte_offset)
2288 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
2292 XSETCHARSET (obj, cs);
2294 CHARSET_ID (cs) = id;
2295 CHARSET_NAME (cs) = name;
2296 CHARSET_SHORT_NAME (cs) = short_name;
2297 CHARSET_LONG_NAME (cs) = long_name;
2298 CHARSET_CHARS (cs) = chars;
2299 CHARSET_DIMENSION (cs) = dimension;
2300 CHARSET_DIRECTION (cs) = direction;
2301 CHARSET_COLUMNS (cs) = columns;
2302 CHARSET_GRAPHIC (cs) = graphic;
2303 CHARSET_FINAL (cs) = final;
2304 CHARSET_DOC_STRING (cs) = doc;
2305 CHARSET_REGISTRY (cs) = reg;
2306 CHARSET_CCL_PROGRAM (cs) = Qnil;
2307 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
2309 CHARSET_DECODING_TABLE(cs) = Qnil;
2310 CHARSET_ENCODING_TABLE(cs) = Qnil;
2311 CHARSET_UCS_MIN(cs) = ucs_min;
2312 CHARSET_UCS_MAX(cs) = ucs_max;
2313 CHARSET_CODE_OFFSET(cs) = code_offset;
2314 CHARSET_BYTE_OFFSET(cs) = byte_offset;
2318 if (id == LEADING_BYTE_ASCII)
2319 CHARSET_REP_BYTES (cs) = 1;
2321 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
2323 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
2328 /* some charsets do not have final characters. This includes
2329 ASCII, Control-1, Composite, and the two faux private
2331 unsigned char iso2022_type
2332 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
2334 if (code_offset == 0)
2336 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
2337 chlook->charset_by_attributes[iso2022_type][final] = obj;
2341 (chlook->charset_by_attributes[iso2022_type][final][direction]));
2342 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
2346 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
2347 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
2349 /* Some charsets are "faux" and don't have names or really exist at
2350 all except in the leading-byte table. */
2352 Fputhash (name, obj, Vcharset_hash_table);
2357 get_unallocated_leading_byte (int dimension)
2362 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
2365 lb = chlook->next_allocated_leading_byte++;
2369 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
2372 lb = chlook->next_allocated_1_byte_leading_byte++;
2376 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
2379 lb = chlook->next_allocated_2_byte_leading_byte++;
2385 ("No more character sets free for this dimension",
2386 make_int (dimension));
2392 /* Number of Big5 characters which have the same code in 1st byte. */
2394 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2397 decode_builtin_char (Lisp_Object charset, int code_point)
2401 if (EQ (charset, Vcharset_chinese_big5))
2403 int c1 = code_point >> 8;
2404 int c2 = code_point & 0xFF;
2407 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
2409 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
2410 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
2412 I = (c1 - 0xA1) * BIG5_SAME_ROW
2413 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
2417 charset = Vcharset_chinese_big5_1;
2421 charset = Vcharset_chinese_big5_2;
2422 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
2424 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
2427 if ((final = XCHARSET_FINAL (charset)) >= '0')
2429 if (XCHARSET_DIMENSION (charset) == 1)
2431 switch (XCHARSET_CHARS (charset))
2435 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
2438 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
2446 switch (XCHARSET_CHARS (charset))
2449 return MIN_CHAR_94x94
2450 + (final - '0') * 94 * 94
2451 + (((code_point >> 8) & 0x7F) - 33) * 94
2452 + ((code_point & 0x7F) - 33);
2454 return MIN_CHAR_96x96
2455 + (final - '0') * 96 * 96
2456 + (((code_point >> 8) & 0x7F) - 32) * 96
2457 + ((code_point & 0x7F) - 32);
2464 else if (XCHARSET_UCS_MAX (charset))
2467 = (XCHARSET_DIMENSION (charset) == 1
2469 code_point - XCHARSET_BYTE_OFFSET (charset)
2471 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
2472 * XCHARSET_CHARS (charset)
2473 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
2474 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
2475 if ((cid < XCHARSET_UCS_MIN (charset))
2476 || (XCHARSET_UCS_MAX (charset) < cid))
2485 range_charset_code_point (Lisp_Object charset, Emchar ch)
2489 if ((XCHARSET_UCS_MIN (charset) <= ch)
2490 && (ch <= XCHARSET_UCS_MAX (charset)))
2492 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
2494 if (XCHARSET_CHARS (charset) == 256)
2496 else if (XCHARSET_DIMENSION (charset) == 1)
2497 return d + XCHARSET_BYTE_OFFSET (charset);
2498 else if (XCHARSET_DIMENSION (charset) == 2)
2500 ((d / XCHARSET_CHARS (charset)
2501 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2502 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2503 else if (XCHARSET_DIMENSION (charset) == 3)
2505 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2506 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2507 | ((d / XCHARSET_CHARS (charset)
2508 % XCHARSET_CHARS (charset)
2509 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2510 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2511 else /* if (XCHARSET_DIMENSION (charset) == 4) */
2513 ((d / (XCHARSET_CHARS (charset)
2514 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2515 + XCHARSET_BYTE_OFFSET (charset)) << 24)
2516 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2517 % XCHARSET_CHARS (charset)
2518 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2519 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
2520 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2521 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2523 else if (XCHARSET_CODE_OFFSET (charset) == 0)
2525 if (XCHARSET_DIMENSION (charset) == 1)
2527 if (XCHARSET_CHARS (charset) == 94)
2529 if (((d = ch - (MIN_CHAR_94
2530 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
2534 else if (XCHARSET_CHARS (charset) == 96)
2536 if (((d = ch - (MIN_CHAR_96
2537 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
2544 else if (XCHARSET_DIMENSION (charset) == 2)
2546 if (XCHARSET_CHARS (charset) == 94)
2548 if (((d = ch - (MIN_CHAR_94x94
2549 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
2552 return (((d / 94) + 33) << 8) | (d % 94 + 33);
2554 else if (XCHARSET_CHARS (charset) == 96)
2556 if (((d = ch - (MIN_CHAR_96x96
2557 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
2560 return (((d / 96) + 32) << 8) | (d % 96 + 32);
2566 if (EQ (charset, Vcharset_mojikyo_2022_1)
2567 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
2569 int m = ch - MIN_CHAR_MOJIKYO - 1;
2570 int byte1 = m / (94 * 60) + 33;
2571 int byte2 = (m % (94 * 60)) / 94;
2572 int byte3 = m % 94 + 33;
2578 return (byte1 << 16) | (byte2 << 8) | byte3;
2584 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
2586 if (c <= MAX_CHAR_BASIC_LATIN)
2588 *charset = Vcharset_ascii;
2593 *charset = Vcharset_control_1;
2598 *charset = Vcharset_latin_iso8859_1;
2602 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
2604 *charset = Vcharset_hebrew_iso8859_8;
2605 return c - MIN_CHAR_HEBREW + 0x20;
2608 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
2610 *charset = Vcharset_thai_tis620;
2611 return c - MIN_CHAR_THAI + 0x20;
2614 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
2615 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
2617 return list2 (Vcharset_katakana_jisx0201,
2618 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
2621 else if (c <= MAX_CHAR_BMP)
2623 *charset = Vcharset_ucs_bmp;
2626 else if (c < MIN_CHAR_DAIKANWA)
2628 *charset = Vcharset_ucs;
2631 else if (c <= MAX_CHAR_DAIKANWA)
2633 *charset = Vcharset_ideograph_daikanwa;
2634 return c - MIN_CHAR_DAIKANWA;
2636 else if (c <= MAX_CHAR_MOJIKYO_0)
2638 *charset = Vcharset_mojikyo;
2639 return c - MIN_CHAR_MOJIKYO_0;
2641 else if (c < MIN_CHAR_94)
2643 *charset = Vcharset_ucs;
2646 else if (c <= MAX_CHAR_94)
2648 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
2649 ((c - MIN_CHAR_94) / 94) + '0',
2650 CHARSET_LEFT_TO_RIGHT);
2651 if (!NILP (*charset))
2652 return ((c - MIN_CHAR_94) % 94) + 33;
2655 *charset = Vcharset_ucs;
2659 else if (c <= MAX_CHAR_96)
2661 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
2662 ((c - MIN_CHAR_96) / 96) + '0',
2663 CHARSET_LEFT_TO_RIGHT);
2664 if (!NILP (*charset))
2665 return ((c - MIN_CHAR_96) % 96) + 32;
2668 *charset = Vcharset_ucs;
2672 else if (c <= MAX_CHAR_94x94)
2675 = CHARSET_BY_ATTRIBUTES (94, 2,
2676 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
2677 CHARSET_LEFT_TO_RIGHT);
2678 if (!NILP (*charset))
2679 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
2680 | (((c - MIN_CHAR_94x94) % 94) + 33);
2683 *charset = Vcharset_ucs;
2687 else if (c <= MAX_CHAR_96x96)
2690 = CHARSET_BY_ATTRIBUTES (96, 2,
2691 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
2692 CHARSET_LEFT_TO_RIGHT);
2693 if (!NILP (*charset))
2694 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
2695 | (((c - MIN_CHAR_96x96) % 96) + 32);
2698 *charset = Vcharset_ucs;
2702 else if (c < MIN_CHAR_MOJIKYO)
2704 *charset = Vcharset_ucs;
2707 else if (c <= MAX_CHAR_MOJIKYO)
2709 *charset = Vcharset_mojikyo;
2710 return c - MIN_CHAR_MOJIKYO;
2714 *charset = Vcharset_ucs;
2719 Lisp_Object Vdefault_coded_charset_priority_list;
2723 /************************************************************************/
2724 /* Basic charset Lisp functions */
2725 /************************************************************************/
2727 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
2728 Return non-nil if OBJECT is a charset.
2732 return CHARSETP (object) ? Qt : Qnil;
2735 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
2736 Retrieve the charset of the given name.
2737 If CHARSET-OR-NAME is a charset object, it is simply returned.
2738 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
2739 nil is returned. Otherwise the associated charset object is returned.
2743 if (CHARSETP (charset_or_name))
2744 return charset_or_name;
2746 CHECK_SYMBOL (charset_or_name);
2747 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
2750 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
2751 Retrieve the charset of the given name.
2752 Same as `find-charset' except an error is signalled if there is no such
2753 charset instead of returning nil.
2757 Lisp_Object charset = Ffind_charset (name);
2760 signal_simple_error ("No such charset", name);
2764 /* We store the charsets in hash tables with the names as the key and the
2765 actual charset object as the value. Occasionally we need to use them
2766 in a list format. These routines provide us with that. */
2767 struct charset_list_closure
2769 Lisp_Object *charset_list;
2773 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
2774 void *charset_list_closure)
2776 /* This function can GC */
2777 struct charset_list_closure *chcl =
2778 (struct charset_list_closure*) charset_list_closure;
2779 Lisp_Object *charset_list = chcl->charset_list;
2781 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
2785 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
2786 Return a list of the names of all defined charsets.
2790 Lisp_Object charset_list = Qnil;
2791 struct gcpro gcpro1;
2792 struct charset_list_closure charset_list_closure;
2794 GCPRO1 (charset_list);
2795 charset_list_closure.charset_list = &charset_list;
2796 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
2797 &charset_list_closure);
2800 return charset_list;
2803 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
2804 Return the name of charset CHARSET.
2808 return XCHARSET_NAME (Fget_charset (charset));
2811 /* #### SJT Should generic properties be allowed? */
2812 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
2813 Define a new character set.
2814 This function is for use with Mule support.
2815 NAME is a symbol, the name by which the character set is normally referred.
2816 DOC-STRING is a string describing the character set.
2817 PROPS is a property list, describing the specific nature of the
2818 character set. Recognized properties are:
2820 'short-name Short version of the charset name (ex: Latin-1)
2821 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
2822 'registry A regular expression matching the font registry field for
2824 'dimension Number of octets used to index a character in this charset.
2825 Either 1 or 2. Defaults to 1.
2826 'columns Number of columns used to display a character in this charset.
2827 Only used in TTY mode. (Under X, the actual width of a
2828 character can be derived from the font used to display the
2829 characters.) If unspecified, defaults to the dimension
2830 (this is almost always the correct value).
2831 'chars Number of characters in each dimension (94 or 96).
2832 Defaults to 94. Note that if the dimension is 2, the
2833 character set thus described is 94x94 or 96x96.
2834 'final Final byte of ISO 2022 escape sequence. Must be
2835 supplied. Each combination of (DIMENSION, CHARS) defines a
2836 separate namespace for final bytes. Note that ISO
2837 2022 restricts the final byte to the range
2838 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2839 dimension == 2. Note also that final bytes in the range
2840 0x30 - 0x3F are reserved for user-defined (not official)
2842 'graphic 0 (use left half of font on output) or 1 (use right half
2843 of font on output). Defaults to 0. For example, for
2844 a font whose registry is ISO8859-1, the left half
2845 (octets 0x20 - 0x7F) is the `ascii' character set, while
2846 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2847 character set. With 'graphic set to 0, the octets
2848 will have their high bit cleared; with it set to 1,
2849 the octets will have their high bit set.
2850 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2852 'ccl-program A compiled CCL program used to convert a character in
2853 this charset into an index into the font. This is in
2854 addition to the 'graphic property. The CCL program
2855 is passed the octets of the character, with the high
2856 bit cleared and set depending upon whether the value
2857 of the 'graphic property is 0 or 1.
2859 (name, doc_string, props))
2861 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2862 int direction = CHARSET_LEFT_TO_RIGHT;
2863 Lisp_Object registry = Qnil;
2864 Lisp_Object charset;
2865 Lisp_Object ccl_program = Qnil;
2866 Lisp_Object short_name = Qnil, long_name = Qnil;
2867 int byte_offset = -1;
2869 CHECK_SYMBOL (name);
2870 if (!NILP (doc_string))
2871 CHECK_STRING (doc_string);
2873 charset = Ffind_charset (name);
2874 if (!NILP (charset))
2875 signal_simple_error ("Cannot redefine existing charset", name);
2878 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
2880 if (EQ (keyword, Qshort_name))
2882 CHECK_STRING (value);
2886 if (EQ (keyword, Qlong_name))
2888 CHECK_STRING (value);
2892 else if (EQ (keyword, Qdimension))
2895 dimension = XINT (value);
2896 if (dimension < 1 || dimension > 2)
2897 signal_simple_error ("Invalid value for 'dimension", value);
2900 else if (EQ (keyword, Qchars))
2903 chars = XINT (value);
2904 if (chars != 94 && chars != 96)
2905 signal_simple_error ("Invalid value for 'chars", value);
2908 else if (EQ (keyword, Qcolumns))
2911 columns = XINT (value);
2912 if (columns != 1 && columns != 2)
2913 signal_simple_error ("Invalid value for 'columns", value);
2916 else if (EQ (keyword, Qgraphic))
2919 graphic = XINT (value);
2921 if (graphic < 0 || graphic > 2)
2923 if (graphic < 0 || graphic > 1)
2925 signal_simple_error ("Invalid value for 'graphic", value);
2928 else if (EQ (keyword, Qregistry))
2930 CHECK_STRING (value);
2934 else if (EQ (keyword, Qdirection))
2936 if (EQ (value, Ql2r))
2937 direction = CHARSET_LEFT_TO_RIGHT;
2938 else if (EQ (value, Qr2l))
2939 direction = CHARSET_RIGHT_TO_LEFT;
2941 signal_simple_error ("Invalid value for 'direction", value);
2944 else if (EQ (keyword, Qfinal))
2946 CHECK_CHAR_COERCE_INT (value);
2947 final = XCHAR (value);
2948 if (final < '0' || final > '~')
2949 signal_simple_error ("Invalid value for 'final", value);
2952 else if (EQ (keyword, Qccl_program))
2954 struct ccl_program test_ccl;
2956 if (setup_ccl_program (&test_ccl, value) < 0)
2957 signal_simple_error ("Invalid value for 'ccl-program", value);
2958 ccl_program = value;
2962 signal_simple_error ("Unrecognized property", keyword);
2967 error ("'final must be specified");
2968 if (dimension == 2 && final > 0x5F)
2970 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2973 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2974 CHARSET_LEFT_TO_RIGHT)) ||
2975 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2976 CHARSET_RIGHT_TO_LEFT)))
2978 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2980 id = get_unallocated_leading_byte (dimension);
2982 if (NILP (doc_string))
2983 doc_string = build_string ("");
2985 if (NILP (registry))
2986 registry = build_string ("");
2988 if (NILP (short_name))
2989 XSETSTRING (short_name, XSYMBOL (name)->name);
2991 if (NILP (long_name))
2992 long_name = doc_string;
2995 columns = dimension;
2997 if (byte_offset < 0)
3001 else if (chars == 96)
3007 charset = make_charset (id, name, chars, dimension, columns, graphic,
3008 final, direction, short_name, long_name,
3009 doc_string, registry,
3010 Qnil, 0, 0, 0, byte_offset);
3011 if (!NILP (ccl_program))
3012 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
3016 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
3018 Make a charset equivalent to CHARSET but which goes in the opposite direction.
3019 NEW-NAME is the name of the new charset. Return the new charset.
3021 (charset, new_name))
3023 Lisp_Object new_charset = Qnil;
3024 int id, chars, dimension, columns, graphic, final;
3026 Lisp_Object registry, doc_string, short_name, long_name;
3029 charset = Fget_charset (charset);
3030 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
3031 signal_simple_error ("Charset already has reverse-direction charset",
3034 CHECK_SYMBOL (new_name);
3035 if (!NILP (Ffind_charset (new_name)))
3036 signal_simple_error ("Cannot redefine existing charset", new_name);
3038 cs = XCHARSET (charset);
3040 chars = CHARSET_CHARS (cs);
3041 dimension = CHARSET_DIMENSION (cs);
3042 columns = CHARSET_COLUMNS (cs);
3043 id = get_unallocated_leading_byte (dimension);
3045 graphic = CHARSET_GRAPHIC (cs);
3046 final = CHARSET_FINAL (cs);
3047 direction = CHARSET_RIGHT_TO_LEFT;
3048 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
3049 direction = CHARSET_LEFT_TO_RIGHT;
3050 doc_string = CHARSET_DOC_STRING (cs);
3051 short_name = CHARSET_SHORT_NAME (cs);
3052 long_name = CHARSET_LONG_NAME (cs);
3053 registry = CHARSET_REGISTRY (cs);
3055 new_charset = make_charset (id, new_name, chars, dimension, columns,
3056 graphic, final, direction, short_name, long_name,
3057 doc_string, registry,
3059 CHARSET_DECODING_TABLE(cs),
3060 CHARSET_UCS_MIN(cs),
3061 CHARSET_UCS_MAX(cs),
3062 CHARSET_CODE_OFFSET(cs),
3063 CHARSET_BYTE_OFFSET(cs)
3069 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
3070 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
3075 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
3076 Define symbol ALIAS as an alias for CHARSET.
3080 CHECK_SYMBOL (alias);
3081 charset = Fget_charset (charset);
3082 return Fputhash (alias, charset, Vcharset_hash_table);
3085 /* #### Reverse direction charsets not yet implemented. */
3087 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
3089 Return the reverse-direction charset parallel to CHARSET, if any.
3090 This is the charset with the same properties (in particular, the same
3091 dimension, number of characters per dimension, and final byte) as
3092 CHARSET but whose characters are displayed in the opposite direction.
3096 charset = Fget_charset (charset);
3097 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
3101 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
3102 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
3103 If DIRECTION is omitted, both directions will be checked (left-to-right
3104 will be returned if character sets exist for both directions).
3106 (dimension, chars, final, direction))
3108 int dm, ch, fi, di = -1;
3109 Lisp_Object obj = Qnil;
3111 CHECK_INT (dimension);
3112 dm = XINT (dimension);
3113 if (dm < 1 || dm > 2)
3114 signal_simple_error ("Invalid value for DIMENSION", dimension);
3118 if (ch != 94 && ch != 96)
3119 signal_simple_error ("Invalid value for CHARS", chars);
3121 CHECK_CHAR_COERCE_INT (final);
3123 if (fi < '0' || fi > '~')
3124 signal_simple_error ("Invalid value for FINAL", final);
3126 if (EQ (direction, Ql2r))
3127 di = CHARSET_LEFT_TO_RIGHT;
3128 else if (EQ (direction, Qr2l))
3129 di = CHARSET_RIGHT_TO_LEFT;
3130 else if (!NILP (direction))
3131 signal_simple_error ("Invalid value for DIRECTION", direction);
3133 if (dm == 2 && fi > 0x5F)
3135 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
3139 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
3141 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
3144 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
3147 return XCHARSET_NAME (obj);
3151 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
3152 Return short name of CHARSET.
3156 return XCHARSET_SHORT_NAME (Fget_charset (charset));
3159 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
3160 Return long name of CHARSET.
3164 return XCHARSET_LONG_NAME (Fget_charset (charset));
3167 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
3168 Return description of CHARSET.
3172 return XCHARSET_DOC_STRING (Fget_charset (charset));
3175 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
3176 Return dimension of CHARSET.
3180 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
3183 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
3184 Return property PROP of CHARSET, a charset object or symbol naming a charset.
3185 Recognized properties are those listed in `make-charset', as well as
3186 'name and 'doc-string.
3192 charset = Fget_charset (charset);
3193 cs = XCHARSET (charset);
3195 CHECK_SYMBOL (prop);
3196 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
3197 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
3198 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
3199 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
3200 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
3201 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
3202 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
3203 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
3204 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
3205 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
3206 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
3207 if (EQ (prop, Qdirection))
3208 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
3209 if (EQ (prop, Qreverse_direction_charset))
3211 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
3212 /* #### Is this translation OK? If so, error checking sufficient? */
3213 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
3215 signal_simple_error ("Unrecognized charset property name", prop);
3216 return Qnil; /* not reached */
3219 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
3220 Return charset identification number of CHARSET.
3224 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
3227 /* #### We need to figure out which properties we really want to
3230 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
3231 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
3233 (charset, ccl_program))
3235 struct ccl_program test_ccl;
3237 charset = Fget_charset (charset);
3238 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
3239 signal_simple_error ("Invalid ccl-program", ccl_program);
3240 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
3245 invalidate_charset_font_caches (Lisp_Object charset)
3247 /* Invalidate font cache entries for charset on all devices. */
3248 Lisp_Object devcons, concons, hash_table;
3249 DEVICE_LOOP_NO_BREAK (devcons, concons)
3251 struct device *d = XDEVICE (XCAR (devcons));
3252 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
3253 if (!UNBOUNDP (hash_table))
3254 Fclrhash (hash_table);
3258 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
3259 Set the 'registry property of CHARSET to REGISTRY.
3261 (charset, registry))
3263 charset = Fget_charset (charset);
3264 CHECK_STRING (registry);
3265 XCHARSET_REGISTRY (charset) = registry;
3266 invalidate_charset_font_caches (charset);
3267 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
3272 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
3273 Return mapping-table of CHARSET.
3277 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
3280 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
3281 Set mapping-table of CHARSET to TABLE.
3285 struct Lisp_Charset *cs;
3289 charset = Fget_charset (charset);
3290 cs = XCHARSET (charset);
3294 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
3295 make_vector_newer (CHARSET_DECODING_TABLE(cs));
3296 CHARSET_DECODING_TABLE(cs) = Qnil;
3299 else if (VECTORP (table))
3301 int ccs_len = CHARSET_BYTE_SIZE (cs);
3302 int ret = decoding_table_check_elements (table,
3303 CHARSET_DIMENSION (cs),
3308 signal_simple_error ("Too big table", table);
3310 signal_simple_error ("Invalid element is found", table);
3312 signal_simple_error ("Something wrong", table);
3314 CHARSET_DECODING_TABLE(cs) = Qnil;
3317 signal_error (Qwrong_type_argument,
3318 list2 (build_translated_string ("vector-or-nil-p"),
3321 byte_offset = CHARSET_BYTE_OFFSET (cs);
3322 switch (CHARSET_DIMENSION (cs))
3325 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3327 Lisp_Object c = XVECTOR_DATA(table)[i];
3330 put_char_ccs_code_point (c, charset,
3331 make_int (i + byte_offset));
3335 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3337 Lisp_Object v = XVECTOR_DATA(table)[i];
3343 for (j = 0; j < XVECTOR_LENGTH (v); j++)
3345 Lisp_Object c = XVECTOR_DATA(v)[j];
3348 put_char_ccs_code_point
3350 make_int ( ( (i + byte_offset) << 8 )
3356 put_char_ccs_code_point (v, charset,
3357 make_int (i + byte_offset));
3366 /************************************************************************/
3367 /* Lisp primitives for working with characters */
3368 /************************************************************************/
3371 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
3372 Make a character from CHARSET and code-point CODE.
3378 charset = Fget_charset (charset);
3381 if (XCHARSET_GRAPHIC (charset) == 1)
3383 c = DECODE_CHAR (charset, c);
3384 return c >= 0 ? make_char (c) : Qnil;
3387 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
3388 Make a builtin character from CHARSET and code-point CODE.
3394 charset = Fget_charset (charset);
3396 if (EQ (charset, Vcharset_latin_viscii))
3398 Lisp_Object chr = Fdecode_char (charset, code);
3404 (ret = Fget_char_attribute (chr,
3405 Vcharset_latin_viscii_lower,
3408 charset = Vcharset_latin_viscii_lower;
3412 (ret = Fget_char_attribute (chr,
3413 Vcharset_latin_viscii_upper,
3416 charset = Vcharset_latin_viscii_upper;
3423 if (XCHARSET_GRAPHIC (charset) == 1)
3426 c = decode_builtin_char (charset, c);
3427 return c >= 0 ? make_char (c) : Fdecode_char (charset, code);
3431 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
3432 Make a character from CHARSET and octets ARG1 and ARG2.
3433 ARG2 is required only for characters from two-dimensional charsets.
3434 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
3435 character s with caron.
3437 (charset, arg1, arg2))
3441 int lowlim, highlim;
3443 charset = Fget_charset (charset);
3444 cs = XCHARSET (charset);
3446 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
3447 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
3449 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
3451 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
3452 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
3455 /* It is useful (and safe, according to Olivier Galibert) to strip
3456 the 8th bit off ARG1 and ARG2 because it allows programmers to
3457 write (make-char 'latin-iso8859-2 CODE) where code is the actual
3458 Latin 2 code of the character. */
3466 if (a1 < lowlim || a1 > highlim)
3467 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
3469 if (CHARSET_DIMENSION (cs) == 1)
3473 ("Charset is of dimension one; second octet must be nil", arg2);
3474 return make_char (MAKE_CHAR (charset, a1, 0));
3483 a2 = XINT (arg2) & 0x7f;
3485 if (a2 < lowlim || a2 > highlim)
3486 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
3488 return make_char (MAKE_CHAR (charset, a1, a2));
3491 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
3492 Return the character set of CHARACTER.
3496 CHECK_CHAR_COERCE_INT (character);
3498 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
3501 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
3502 Return the octet numbered N (should be 0 or 1) of CHARACTER.
3503 N defaults to 0 if omitted.
3507 Lisp_Object charset;
3510 CHECK_CHAR_COERCE_INT (character);
3512 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
3514 if (NILP (n) || EQ (n, Qzero))
3515 return make_int (octet0);
3516 else if (EQ (n, make_int (1)))
3517 return make_int (octet1);
3519 signal_simple_error ("Octet number must be 0 or 1", n);
3522 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
3523 Return list of charset and one or two position-codes of CHARACTER.
3527 /* This function can GC */
3528 struct gcpro gcpro1, gcpro2;
3529 Lisp_Object charset = Qnil;
3530 Lisp_Object rc = Qnil;
3538 GCPRO2 (charset, rc);
3539 CHECK_CHAR_COERCE_INT (character);
3542 code_point = ENCODE_CHAR (XCHAR (character), charset);
3543 dimension = XCHARSET_DIMENSION (charset);
3544 while (dimension > 0)
3546 rc = Fcons (make_int (code_point & 255), rc);
3550 rc = Fcons (XCHARSET_NAME (charset), rc);
3552 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
3554 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
3556 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
3560 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
3569 #ifdef ENABLE_COMPOSITE_CHARS
3570 /************************************************************************/
3571 /* composite character functions */
3572 /************************************************************************/
3575 lookup_composite_char (Bufbyte *str, int len)
3577 Lisp_Object lispstr = make_string (str, len);
3578 Lisp_Object ch = Fgethash (lispstr,
3579 Vcomposite_char_string2char_hash_table,
3585 if (composite_char_row_next >= 128)
3586 signal_simple_error ("No more composite chars available", lispstr);
3587 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
3588 composite_char_col_next);
3589 Fputhash (make_char (emch), lispstr,
3590 Vcomposite_char_char2string_hash_table);
3591 Fputhash (lispstr, make_char (emch),
3592 Vcomposite_char_string2char_hash_table);
3593 composite_char_col_next++;
3594 if (composite_char_col_next >= 128)
3596 composite_char_col_next = 32;
3597 composite_char_row_next++;
3606 composite_char_string (Emchar ch)
3608 Lisp_Object str = Fgethash (make_char (ch),
3609 Vcomposite_char_char2string_hash_table,
3611 assert (!UNBOUNDP (str));
3615 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
3616 Convert a string into a single composite character.
3617 The character is the result of overstriking all the characters in
3622 CHECK_STRING (string);
3623 return make_char (lookup_composite_char (XSTRING_DATA (string),
3624 XSTRING_LENGTH (string)));
3627 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3628 Return a string of the characters comprising a composite character.
3636 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
3637 signal_simple_error ("Must be composite char", ch);
3638 return composite_char_string (emch);
3640 #endif /* ENABLE_COMPOSITE_CHARS */
3643 /************************************************************************/
3644 /* initialization */
3645 /************************************************************************/
3648 syms_of_mule_charset (void)
3651 INIT_LRECORD_IMPLEMENTATION (uint8_byte_table);
3652 INIT_LRECORD_IMPLEMENTATION (uint16_byte_table);
3653 INIT_LRECORD_IMPLEMENTATION (byte_table);
3654 INIT_LRECORD_IMPLEMENTATION (char_id_table);
3656 INIT_LRECORD_IMPLEMENTATION (charset);
3658 DEFSUBR (Fcharsetp);
3659 DEFSUBR (Ffind_charset);
3660 DEFSUBR (Fget_charset);
3661 DEFSUBR (Fcharset_list);
3662 DEFSUBR (Fcharset_name);
3663 DEFSUBR (Fmake_charset);
3664 DEFSUBR (Fmake_reverse_direction_charset);
3665 /* DEFSUBR (Freverse_direction_charset); */
3666 DEFSUBR (Fdefine_charset_alias);
3667 DEFSUBR (Fcharset_from_attributes);
3668 DEFSUBR (Fcharset_short_name);
3669 DEFSUBR (Fcharset_long_name);
3670 DEFSUBR (Fcharset_description);
3671 DEFSUBR (Fcharset_dimension);
3672 DEFSUBR (Fcharset_property);
3673 DEFSUBR (Fcharset_id);
3674 DEFSUBR (Fset_charset_ccl_program);
3675 DEFSUBR (Fset_charset_registry);
3677 DEFSUBR (Fchar_attribute_list);
3678 DEFSUBR (Ffind_char_attribute_table);
3679 DEFSUBR (Fchar_attribute_alist);
3680 DEFSUBR (Fget_char_attribute);
3681 DEFSUBR (Fput_char_attribute);
3682 DEFSUBR (Fremove_char_attribute);
3683 DEFSUBR (Fmap_char_attribute);
3684 DEFSUBR (Fdefine_char);
3685 DEFSUBR (Ffind_char);
3686 DEFSUBR (Fchar_variants);
3687 DEFSUBR (Fget_composite_char);
3688 DEFSUBR (Fcharset_mapping_table);
3689 DEFSUBR (Fset_charset_mapping_table);
3693 DEFSUBR (Fdecode_char);
3694 DEFSUBR (Fdecode_builtin_char);
3696 DEFSUBR (Fmake_char);
3697 DEFSUBR (Fchar_charset);
3698 DEFSUBR (Fchar_octet);
3699 DEFSUBR (Fsplit_char);
3701 #ifdef ENABLE_COMPOSITE_CHARS
3702 DEFSUBR (Fmake_composite_char);
3703 DEFSUBR (Fcomposite_char_string);
3706 defsymbol (&Qcharsetp, "charsetp");
3707 defsymbol (&Qregistry, "registry");
3708 defsymbol (&Qfinal, "final");
3709 defsymbol (&Qgraphic, "graphic");
3710 defsymbol (&Qdirection, "direction");
3711 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3712 defsymbol (&Qshort_name, "short-name");
3713 defsymbol (&Qlong_name, "long-name");
3715 defsymbol (&Ql2r, "l2r");
3716 defsymbol (&Qr2l, "r2l");
3718 /* Charsets, compatible with FSF 20.3
3719 Naming convention is Script-Charset[-Edition] */
3720 defsymbol (&Qascii, "ascii");
3721 defsymbol (&Qcontrol_1, "control-1");
3722 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3723 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3724 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3725 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3726 defsymbol (&Qthai_tis620, "thai-tis620");
3727 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3728 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3729 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3730 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3731 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3732 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3733 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3734 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
3735 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
3736 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
3737 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
3738 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
3739 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
3740 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
3741 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
3742 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
3744 defsymbol (&Qto_ucs, "=>ucs");
3745 defsymbol (&Q_ucs, "->ucs");
3746 defsymbol (&Q_decomposition, "->decomposition");
3747 defsymbol (&Qcompat, "compat");
3748 defsymbol (&Qisolated, "isolated");
3749 defsymbol (&Qinitial, "initial");
3750 defsymbol (&Qmedial, "medial");
3751 defsymbol (&Qfinal, "final");
3752 defsymbol (&Qvertical, "vertical");
3753 defsymbol (&QnoBreak, "noBreak");
3754 defsymbol (&Qfraction, "fraction");
3755 defsymbol (&Qsuper, "super");
3756 defsymbol (&Qsub, "sub");
3757 defsymbol (&Qcircle, "circle");
3758 defsymbol (&Qsquare, "square");
3759 defsymbol (&Qwide, "wide");
3760 defsymbol (&Qnarrow, "narrow");
3761 defsymbol (&Qsmall, "small");
3762 defsymbol (&Qfont, "font");
3763 defsymbol (&Qucs, "ucs");
3764 defsymbol (&Qucs_bmp, "ucs-bmp");
3765 defsymbol (&Qucs_cns, "ucs-cns");
3766 defsymbol (&Qucs_jis, "ucs-jis");
3767 defsymbol (&Qucs_big5, "ucs-big5");
3768 defsymbol (&Qlatin_viscii, "latin-viscii");
3769 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3770 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3771 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3772 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3773 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3774 defsymbol (&Qideograph_gt, "ideograph-gt");
3775 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
3776 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
3777 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
3778 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
3779 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
3780 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
3781 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
3782 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
3783 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
3784 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
3785 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
3786 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
3787 defsymbol (&Qchinese_big5, "chinese-big5");
3788 defsymbol (&Qchinese_big5_cdp, "chinese-big5-cdp");
3789 defsymbol (&Qmojikyo, "mojikyo");
3790 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
3791 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
3792 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
3793 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
3794 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
3795 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
3796 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
3797 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
3798 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
3799 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
3800 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
3801 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
3802 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
3803 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
3804 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
3805 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
3806 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
3807 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
3808 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
3809 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
3810 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
3811 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
3812 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3814 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3815 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3817 defsymbol (&Qcomposite, "composite");
3821 vars_of_mule_charset (void)
3828 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
3829 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
3831 /* Table of charsets indexed by leading byte. */
3832 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3833 chlook->charset_by_leading_byte[i] = Qnil;
3836 /* Table of charsets indexed by type/final-byte. */
3837 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3838 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3839 chlook->charset_by_attributes[i][j] = Qnil;
3841 /* Table of charsets indexed by type/final-byte/direction. */
3842 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3843 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3844 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3845 chlook->charset_by_attributes[i][j][k] = Qnil;
3849 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3851 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3852 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3856 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3857 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3858 Leading-code of private TYPE9N charset of column-width 1.
3860 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3864 Vutf_2000_version = build_string("0.17 (Hōryūji)");
3865 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
3866 Version number of UTF-2000.
3869 staticpro (&Vcharacter_composition_table);
3870 Vcharacter_composition_table = make_char_id_table (Qnil);
3872 staticpro (&Vcharacter_variant_table);
3873 Vcharacter_variant_table = make_char_id_table (Qnil);
3875 Vdefault_coded_charset_priority_list = Qnil;
3876 DEFVAR_LISP ("default-coded-charset-priority-list",
3877 &Vdefault_coded_charset_priority_list /*
3878 Default order of preferred coded-character-sets.
3884 complex_vars_of_mule_charset (void)
3886 staticpro (&Vcharset_hash_table);
3887 Vcharset_hash_table =
3888 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3890 /* Predefined character sets. We store them into variables for
3894 staticpro (&Vchar_attribute_hash_table);
3895 Vchar_attribute_hash_table
3896 = make_lisp_hash_table (16, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3898 staticpro (&Vcharset_ucs);
3900 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
3901 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3902 build_string ("UCS"),
3903 build_string ("UCS"),
3904 build_string ("ISO/IEC 10646"),
3906 Qnil, 0, 0xFFFFFFF, 0, 0);
3907 staticpro (&Vcharset_ucs_bmp);
3909 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3910 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3911 build_string ("BMP"),
3912 build_string ("BMP"),
3913 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3914 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
3915 Qnil, 0, 0xFFFF, 0, 0);
3916 staticpro (&Vcharset_ucs_cns);
3918 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
3919 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3920 build_string ("UCS for CNS"),
3921 build_string ("UCS for CNS 11643"),
3922 build_string ("ISO/IEC 10646 for CNS 11643"),
3925 staticpro (&Vcharset_ucs_jis);
3927 make_charset (LEADING_BYTE_UCS_JIS, Qucs_jis, 256, 3,
3928 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3929 build_string ("UCS for JIS"),
3930 build_string ("UCS for JIS X 0208, 0212 and 0213"),
3931 build_string ("ISO/IEC 10646 for JIS X 0208, 0212 and 0213"),
3934 staticpro (&Vcharset_ucs_big5);
3936 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
3937 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3938 build_string ("UCS for Big5"),
3939 build_string ("UCS for Big5"),
3940 build_string ("ISO/IEC 10646 for Big5"),
3944 # define MIN_CHAR_THAI 0
3945 # define MAX_CHAR_THAI 0
3946 /* # define MIN_CHAR_HEBREW 0 */
3947 /* # define MAX_CHAR_HEBREW 0 */
3948 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3949 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3951 staticpro (&Vcharset_ascii);
3953 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3954 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3955 build_string ("ASCII"),
3956 build_string ("ASCII)"),
3957 build_string ("ASCII (ISO646 IRV)"),
3958 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3959 Qnil, 0, 0x7F, 0, 0);
3960 staticpro (&Vcharset_control_1);
3961 Vcharset_control_1 =
3962 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3963 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3964 build_string ("C1"),
3965 build_string ("Control characters"),
3966 build_string ("Control characters 128-191"),
3968 Qnil, 0x80, 0x9F, 0, 0);
3969 staticpro (&Vcharset_latin_iso8859_1);
3970 Vcharset_latin_iso8859_1 =
3971 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3972 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3973 build_string ("Latin-1"),
3974 build_string ("ISO8859-1 (Latin-1)"),
3975 build_string ("ISO8859-1 (Latin-1)"),
3976 build_string ("iso8859-1"),
3977 Qnil, 0xA0, 0xFF, 0, 32);
3978 staticpro (&Vcharset_latin_iso8859_2);
3979 Vcharset_latin_iso8859_2 =
3980 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3981 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3982 build_string ("Latin-2"),
3983 build_string ("ISO8859-2 (Latin-2)"),
3984 build_string ("ISO8859-2 (Latin-2)"),
3985 build_string ("iso8859-2"),
3987 staticpro (&Vcharset_latin_iso8859_3);
3988 Vcharset_latin_iso8859_3 =
3989 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3990 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3991 build_string ("Latin-3"),
3992 build_string ("ISO8859-3 (Latin-3)"),
3993 build_string ("ISO8859-3 (Latin-3)"),
3994 build_string ("iso8859-3"),
3996 staticpro (&Vcharset_latin_iso8859_4);
3997 Vcharset_latin_iso8859_4 =
3998 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3999 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
4000 build_string ("Latin-4"),
4001 build_string ("ISO8859-4 (Latin-4)"),
4002 build_string ("ISO8859-4 (Latin-4)"),
4003 build_string ("iso8859-4"),
4005 staticpro (&Vcharset_thai_tis620);
4006 Vcharset_thai_tis620 =
4007 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
4008 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
4009 build_string ("TIS620"),
4010 build_string ("TIS620 (Thai)"),
4011 build_string ("TIS620.2529 (Thai)"),
4012 build_string ("tis620"),
4013 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
4014 staticpro (&Vcharset_greek_iso8859_7);
4015 Vcharset_greek_iso8859_7 =
4016 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
4017 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
4018 build_string ("ISO8859-7"),
4019 build_string ("ISO8859-7 (Greek)"),
4020 build_string ("ISO8859-7 (Greek)"),
4021 build_string ("iso8859-7"),
4023 staticpro (&Vcharset_arabic_iso8859_6);
4024 Vcharset_arabic_iso8859_6 =
4025 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
4026 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
4027 build_string ("ISO8859-6"),
4028 build_string ("ISO8859-6 (Arabic)"),
4029 build_string ("ISO8859-6 (Arabic)"),
4030 build_string ("iso8859-6"),
4032 staticpro (&Vcharset_hebrew_iso8859_8);
4033 Vcharset_hebrew_iso8859_8 =
4034 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
4035 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
4036 build_string ("ISO8859-8"),
4037 build_string ("ISO8859-8 (Hebrew)"),
4038 build_string ("ISO8859-8 (Hebrew)"),
4039 build_string ("iso8859-8"),
4041 0 /* MIN_CHAR_HEBREW */,
4042 0 /* MAX_CHAR_HEBREW */, 0, 32);
4043 staticpro (&Vcharset_katakana_jisx0201);
4044 Vcharset_katakana_jisx0201 =
4045 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
4046 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
4047 build_string ("JISX0201 Kana"),
4048 build_string ("JISX0201.1976 (Japanese Kana)"),
4049 build_string ("JISX0201.1976 Japanese Kana"),
4050 build_string ("jisx0201\\.1976"),
4052 staticpro (&Vcharset_latin_jisx0201);
4053 Vcharset_latin_jisx0201 =
4054 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
4055 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
4056 build_string ("JISX0201 Roman"),
4057 build_string ("JISX0201.1976 (Japanese Roman)"),
4058 build_string ("JISX0201.1976 Japanese Roman"),
4059 build_string ("jisx0201\\.1976"),
4061 staticpro (&Vcharset_cyrillic_iso8859_5);
4062 Vcharset_cyrillic_iso8859_5 =
4063 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
4064 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
4065 build_string ("ISO8859-5"),
4066 build_string ("ISO8859-5 (Cyrillic)"),
4067 build_string ("ISO8859-5 (Cyrillic)"),
4068 build_string ("iso8859-5"),
4070 staticpro (&Vcharset_latin_iso8859_9);
4071 Vcharset_latin_iso8859_9 =
4072 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
4073 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
4074 build_string ("Latin-5"),
4075 build_string ("ISO8859-9 (Latin-5)"),
4076 build_string ("ISO8859-9 (Latin-5)"),
4077 build_string ("iso8859-9"),
4079 staticpro (&Vcharset_japanese_jisx0208_1978);
4080 Vcharset_japanese_jisx0208_1978 =
4081 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
4082 Qjapanese_jisx0208_1978, 94, 2,
4083 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
4084 build_string ("JIS X0208:1978"),
4085 build_string ("JIS X0208:1978 (Japanese)"),
4087 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
4088 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
4090 staticpro (&Vcharset_chinese_gb2312);
4091 Vcharset_chinese_gb2312 =
4092 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
4093 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
4094 build_string ("GB2312"),
4095 build_string ("GB2312)"),
4096 build_string ("GB2312 Chinese simplified"),
4097 build_string ("gb2312"),
4099 staticpro (&Vcharset_chinese_gb12345);
4100 Vcharset_chinese_gb12345 =
4101 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
4102 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4103 build_string ("G1"),
4104 build_string ("GB 12345)"),
4105 build_string ("GB 12345-1990"),
4106 build_string ("GB12345\\(\\.1990\\)?-0"),
4108 staticpro (&Vcharset_japanese_jisx0208);
4109 Vcharset_japanese_jisx0208 =
4110 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
4111 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
4112 build_string ("JISX0208"),
4113 build_string ("JIS X0208:1983 (Japanese)"),
4114 build_string ("JIS X0208:1983 Japanese Kanji"),
4115 build_string ("jisx0208\\.1983"),
4118 staticpro (&Vcharset_japanese_jisx0208_1990);
4119 Vcharset_japanese_jisx0208_1990 =
4120 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
4121 Qjapanese_jisx0208_1990, 94, 2,
4122 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4123 build_string ("JISX0208-1990"),
4124 build_string ("JIS X0208:1990 (Japanese)"),
4125 build_string ("JIS X0208:1990 Japanese Kanji"),
4126 build_string ("jisx0208\\.1990"),
4128 MIN_CHAR_JIS_X0208_1990,
4129 MAX_CHAR_JIS_X0208_1990, 0, 33);
4131 staticpro (&Vcharset_korean_ksc5601);
4132 Vcharset_korean_ksc5601 =
4133 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
4134 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
4135 build_string ("KSC5601"),
4136 build_string ("KSC5601 (Korean"),
4137 build_string ("KSC5601 Korean Hangul and Hanja"),
4138 build_string ("ksc5601"),
4140 staticpro (&Vcharset_japanese_jisx0212);
4141 Vcharset_japanese_jisx0212 =
4142 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
4143 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
4144 build_string ("JISX0212"),
4145 build_string ("JISX0212 (Japanese)"),
4146 build_string ("JISX0212 Japanese Supplement"),
4147 build_string ("jisx0212"),
4150 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
4151 staticpro (&Vcharset_chinese_cns11643_1);
4152 Vcharset_chinese_cns11643_1 =
4153 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
4154 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
4155 build_string ("CNS11643-1"),
4156 build_string ("CNS11643-1 (Chinese traditional)"),
4158 ("CNS 11643 Plane 1 Chinese traditional"),
4159 build_string (CHINESE_CNS_PLANE_RE("1")),
4161 staticpro (&Vcharset_chinese_cns11643_2);
4162 Vcharset_chinese_cns11643_2 =
4163 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
4164 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
4165 build_string ("CNS11643-2"),
4166 build_string ("CNS11643-2 (Chinese traditional)"),
4168 ("CNS 11643 Plane 2 Chinese traditional"),
4169 build_string (CHINESE_CNS_PLANE_RE("2")),
4172 staticpro (&Vcharset_latin_tcvn5712);
4173 Vcharset_latin_tcvn5712 =
4174 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
4175 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
4176 build_string ("TCVN 5712"),
4177 build_string ("TCVN 5712 (VSCII-2)"),
4178 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
4179 build_string ("tcvn5712\\(\\.1993\\)?-1"),
4181 staticpro (&Vcharset_latin_viscii_lower);
4182 Vcharset_latin_viscii_lower =
4183 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
4184 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
4185 build_string ("VISCII lower"),
4186 build_string ("VISCII lower (Vietnamese)"),
4187 build_string ("VISCII lower (Vietnamese)"),
4188 build_string ("MULEVISCII-LOWER"),
4190 staticpro (&Vcharset_latin_viscii_upper);
4191 Vcharset_latin_viscii_upper =
4192 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
4193 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
4194 build_string ("VISCII upper"),
4195 build_string ("VISCII upper (Vietnamese)"),
4196 build_string ("VISCII upper (Vietnamese)"),
4197 build_string ("MULEVISCII-UPPER"),
4199 staticpro (&Vcharset_latin_viscii);
4200 Vcharset_latin_viscii =
4201 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
4202 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
4203 build_string ("VISCII"),
4204 build_string ("VISCII 1.1 (Vietnamese)"),
4205 build_string ("VISCII 1.1 (Vietnamese)"),
4206 build_string ("VISCII1\\.1"),
4208 staticpro (&Vcharset_chinese_big5);
4209 Vcharset_chinese_big5 =
4210 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
4211 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4212 build_string ("Big5"),
4213 build_string ("Big5"),
4214 build_string ("Big5 Chinese traditional"),
4215 build_string ("big5"),
4217 staticpro (&Vcharset_chinese_big5_cdp);
4218 Vcharset_chinese_big5_cdp =
4219 make_charset (LEADING_BYTE_CHINESE_BIG5_CDP, Qchinese_big5_cdp, 256, 2,
4220 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4221 build_string ("Big5-CDP"),
4222 build_string ("Big5 + CDP extension"),
4223 build_string ("Big5 with CDP extension"),
4224 build_string ("big5\\.cdp-0"),
4226 staticpro (&Vcharset_ideograph_gt);
4227 Vcharset_ideograph_gt =
4228 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
4229 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4230 build_string ("GT"),
4231 build_string ("GT"),
4232 build_string ("GT"),
4234 Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0);
4235 #define DEF_GT_PJ(n) \
4236 staticpro (&Vcharset_ideograph_gt_pj_##n); \
4237 Vcharset_ideograph_gt_pj_##n = \
4238 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
4239 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
4240 build_string ("GT-PJ-"#n), \
4241 build_string ("GT (pseudo JIS encoding) part "#n), \
4242 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
4244 ("\\(GTpj-"#n "\\|jisx0208\\.GT-"#n "\\)$"), \
4258 staticpro (&Vcharset_ideograph_daikanwa);
4259 Vcharset_ideograph_daikanwa =
4260 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
4261 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4262 build_string ("Daikanwa"),
4263 build_string ("Morohashi's Daikanwa"),
4264 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
4265 build_string ("Daikanwa"),
4266 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
4267 staticpro (&Vcharset_mojikyo);
4269 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
4270 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4271 build_string ("Mojikyo"),
4272 build_string ("Mojikyo"),
4273 build_string ("Konjaku-Mojikyo"),
4275 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
4276 staticpro (&Vcharset_mojikyo_2022_1);
4277 Vcharset_mojikyo_2022_1 =
4278 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
4279 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
4280 build_string ("Mojikyo-2022-1"),
4281 build_string ("Mojikyo ISO-2022 Part 1"),
4282 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
4286 #define DEF_MOJIKYO_PJ(n) \
4287 staticpro (&Vcharset_mojikyo_pj_##n); \
4288 Vcharset_mojikyo_pj_##n = \
4289 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
4290 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
4291 build_string ("Mojikyo-PJ-"#n), \
4292 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
4294 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
4296 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
4308 DEF_MOJIKYO_PJ (10);
4309 DEF_MOJIKYO_PJ (11);
4310 DEF_MOJIKYO_PJ (12);
4311 DEF_MOJIKYO_PJ (13);
4312 DEF_MOJIKYO_PJ (14);
4313 DEF_MOJIKYO_PJ (15);
4314 DEF_MOJIKYO_PJ (16);
4315 DEF_MOJIKYO_PJ (17);
4316 DEF_MOJIKYO_PJ (18);
4317 DEF_MOJIKYO_PJ (19);
4318 DEF_MOJIKYO_PJ (20);
4319 DEF_MOJIKYO_PJ (21);
4321 staticpro (&Vcharset_ethiopic_ucs);
4322 Vcharset_ethiopic_ucs =
4323 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
4324 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4325 build_string ("Ethiopic (UCS)"),
4326 build_string ("Ethiopic (UCS)"),
4327 build_string ("Ethiopic of UCS"),
4328 build_string ("Ethiopic-Unicode"),
4329 Qnil, 0x1200, 0x137F, 0x1200, 0);
4331 staticpro (&Vcharset_chinese_big5_1);
4332 Vcharset_chinese_big5_1 =
4333 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
4334 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
4335 build_string ("Big5"),
4336 build_string ("Big5 (Level-1)"),
4338 ("Big5 Level-1 Chinese traditional"),
4339 build_string ("big5"),
4341 staticpro (&Vcharset_chinese_big5_2);
4342 Vcharset_chinese_big5_2 =
4343 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
4344 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
4345 build_string ("Big5"),
4346 build_string ("Big5 (Level-2)"),
4348 ("Big5 Level-2 Chinese traditional"),
4349 build_string ("big5"),
4352 #ifdef ENABLE_COMPOSITE_CHARS
4353 /* #### For simplicity, we put composite chars into a 96x96 charset.
4354 This is going to lead to problems because you can run out of
4355 room, esp. as we don't yet recycle numbers. */
4356 staticpro (&Vcharset_composite);
4357 Vcharset_composite =
4358 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
4359 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4360 build_string ("Composite"),
4361 build_string ("Composite characters"),
4362 build_string ("Composite characters"),
4365 /* #### not dumped properly */
4366 composite_char_row_next = 32;
4367 composite_char_col_next = 32;
4369 Vcomposite_char_string2char_hash_table =
4370 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
4371 Vcomposite_char_char2string_hash_table =
4372 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
4373 staticpro (&Vcomposite_char_string2char_hash_table);
4374 staticpro (&Vcomposite_char_char2string_hash_table);
4375 #endif /* ENABLE_COMPOSITE_CHARS */