1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_cns;
70 Lisp_Object Vcharset_latin_viscii;
71 Lisp_Object Vcharset_latin_tcvn5712;
72 Lisp_Object Vcharset_latin_viscii_lower;
73 Lisp_Object Vcharset_latin_viscii_upper;
74 Lisp_Object Vcharset_chinese_big5;
75 Lisp_Object Vcharset_ideograph_gt;
76 Lisp_Object Vcharset_ideograph_gt_pj_1;
77 Lisp_Object Vcharset_ideograph_gt_pj_2;
78 Lisp_Object Vcharset_ideograph_gt_pj_3;
79 Lisp_Object Vcharset_ideograph_gt_pj_4;
80 Lisp_Object Vcharset_ideograph_gt_pj_5;
81 Lisp_Object Vcharset_ideograph_gt_pj_6;
82 Lisp_Object Vcharset_ideograph_gt_pj_7;
83 Lisp_Object Vcharset_ideograph_gt_pj_8;
84 Lisp_Object Vcharset_ideograph_gt_pj_9;
85 Lisp_Object Vcharset_ideograph_gt_pj_10;
86 Lisp_Object Vcharset_ideograph_gt_pj_11;
87 Lisp_Object Vcharset_ideograph_daikanwa;
88 Lisp_Object Vcharset_mojikyo;
89 Lisp_Object Vcharset_mojikyo_2022_1;
90 Lisp_Object Vcharset_mojikyo_pj_1;
91 Lisp_Object Vcharset_mojikyo_pj_2;
92 Lisp_Object Vcharset_mojikyo_pj_3;
93 Lisp_Object Vcharset_mojikyo_pj_4;
94 Lisp_Object Vcharset_mojikyo_pj_5;
95 Lisp_Object Vcharset_mojikyo_pj_6;
96 Lisp_Object Vcharset_mojikyo_pj_7;
97 Lisp_Object Vcharset_mojikyo_pj_8;
98 Lisp_Object Vcharset_mojikyo_pj_9;
99 Lisp_Object Vcharset_mojikyo_pj_10;
100 Lisp_Object Vcharset_mojikyo_pj_11;
101 Lisp_Object Vcharset_mojikyo_pj_12;
102 Lisp_Object Vcharset_mojikyo_pj_13;
103 Lisp_Object Vcharset_mojikyo_pj_14;
104 Lisp_Object Vcharset_mojikyo_pj_15;
105 Lisp_Object Vcharset_mojikyo_pj_16;
106 Lisp_Object Vcharset_mojikyo_pj_17;
107 Lisp_Object Vcharset_mojikyo_pj_18;
108 Lisp_Object Vcharset_mojikyo_pj_19;
109 Lisp_Object Vcharset_mojikyo_pj_20;
110 Lisp_Object Vcharset_mojikyo_pj_21;
111 Lisp_Object Vcharset_ethiopic_ucs;
113 Lisp_Object Vcharset_chinese_big5_1;
114 Lisp_Object Vcharset_chinese_big5_2;
116 #ifdef ENABLE_COMPOSITE_CHARS
117 Lisp_Object Vcharset_composite;
119 /* Hash tables for composite chars. One maps string representing
120 composed chars to their equivalent chars; one goes the
122 Lisp_Object Vcomposite_char_char2string_hash_table;
123 Lisp_Object Vcomposite_char_string2char_hash_table;
125 static int composite_char_row_next;
126 static int composite_char_col_next;
128 #endif /* ENABLE_COMPOSITE_CHARS */
130 struct charset_lookup *chlook;
132 static const struct lrecord_description charset_lookup_description_1[] = {
133 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
142 static const struct struct_description charset_lookup_description = {
143 sizeof (struct charset_lookup),
144 charset_lookup_description_1
148 /* Table of number of bytes in the string representation of a character
149 indexed by the first byte of that representation.
151 rep_bytes_by_first_byte(c) is more efficient than the equivalent
152 canonical computation:
154 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
156 const Bytecount rep_bytes_by_first_byte[0xA0] =
157 { /* 0x00 - 0x7f are for straight ASCII */
158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
165 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
166 /* 0x80 - 0x8f are for Dimension-1 official charsets */
168 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
170 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
172 /* 0x90 - 0x9d are for Dimension-2 official charsets */
173 /* 0x9e is for Dimension-1 private charsets */
174 /* 0x9f is for Dimension-2 private charsets */
175 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
181 #define BT_UINT8_MIN 0
182 #define BT_UINT8_MAX (UCHAR_MAX - 3)
183 #define BT_UINT8_t (UCHAR_MAX - 2)
184 #define BT_UINT8_nil (UCHAR_MAX - 1)
185 #define BT_UINT8_unbound UCHAR_MAX
187 INLINE_HEADER int INT_UINT8_P (Lisp_Object obj);
188 INLINE_HEADER int UINT8_VALUE_P (Lisp_Object obj);
189 INLINE_HEADER unsigned char UINT8_ENCODE (Lisp_Object obj);
190 INLINE_HEADER Lisp_Object UINT8_DECODE (unsigned char n);
191 INLINE_HEADER unsigned short UINT8_TO_UINT16 (unsigned char n);
194 INT_UINT8_P (Lisp_Object obj)
198 int num = XINT (obj);
200 return (BT_UINT8_MIN <= num) && (num <= BT_UINT8_MAX);
207 UINT8_VALUE_P (Lisp_Object obj)
209 return EQ (obj, Qunbound)
210 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT8_P (obj);
213 INLINE_HEADER unsigned char
214 UINT8_ENCODE (Lisp_Object obj)
216 if (EQ (obj, Qunbound))
217 return BT_UINT8_unbound;
218 else if (EQ (obj, Qnil))
220 else if (EQ (obj, Qt))
226 INLINE_HEADER Lisp_Object
227 UINT8_DECODE (unsigned char n)
229 if (n == BT_UINT8_unbound)
231 else if (n == BT_UINT8_nil)
233 else if (n == BT_UINT8_t)
240 mark_uint8_byte_table (Lisp_Object obj)
246 print_uint8_byte_table (Lisp_Object obj,
247 Lisp_Object printcharfun, int escapeflag)
249 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
251 struct gcpro gcpro1, gcpro2;
252 GCPRO2 (obj, printcharfun);
254 write_c_string ("\n#<uint8-byte-table", printcharfun);
255 for (i = 0; i < 256; i++)
257 unsigned char n = bte->property[i];
259 write_c_string ("\n ", printcharfun);
260 write_c_string (" ", printcharfun);
261 if (n == BT_UINT8_unbound)
262 write_c_string ("void", printcharfun);
263 else if (n == BT_UINT8_nil)
264 write_c_string ("nil", printcharfun);
265 else if (n == BT_UINT8_t)
266 write_c_string ("t", printcharfun);
271 sprintf (buf, "%hd", n);
272 write_c_string (buf, printcharfun);
276 write_c_string (">", printcharfun);
280 uint8_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
282 Lisp_Uint8_Byte_Table *te1 = XUINT8_BYTE_TABLE (obj1);
283 Lisp_Uint8_Byte_Table *te2 = XUINT8_BYTE_TABLE (obj2);
286 for (i = 0; i < 256; i++)
287 if (te1->property[i] != te2->property[i])
293 uint8_byte_table_hash (Lisp_Object obj, int depth)
295 Lisp_Uint8_Byte_Table *te = XUINT8_BYTE_TABLE (obj);
299 for (i = 0; i < 256; i++)
300 hash = HASH2 (hash, te->property[i]);
304 DEFINE_LRECORD_IMPLEMENTATION ("uint8-byte-table", uint8_byte_table,
305 mark_uint8_byte_table,
306 print_uint8_byte_table,
307 0, uint8_byte_table_equal,
308 uint8_byte_table_hash,
309 0 /* uint8_byte_table_description */,
310 Lisp_Uint8_Byte_Table);
313 make_uint8_byte_table (unsigned char initval)
317 Lisp_Uint8_Byte_Table *cte;
319 cte = alloc_lcrecord_type (Lisp_Uint8_Byte_Table,
320 &lrecord_uint8_byte_table);
322 for (i = 0; i < 256; i++)
323 cte->property[i] = initval;
325 XSETUINT8_BYTE_TABLE (obj, cte);
330 uint8_byte_table_same_value_p (Lisp_Object obj)
332 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
333 unsigned char v0 = bte->property[0];
336 for (i = 1; i < 256; i++)
338 if (bte->property[i] != v0)
345 #define BT_UINT16_MIN 0
346 #define BT_UINT16_MAX (USHRT_MAX - 3)
347 #define BT_UINT16_t (USHRT_MAX - 2)
348 #define BT_UINT16_nil (USHRT_MAX - 1)
349 #define BT_UINT16_unbound USHRT_MAX
351 INLINE_HEADER int INT_UINT16_P (Lisp_Object obj);
352 INLINE_HEADER int UINT16_VALUE_P (Lisp_Object obj);
353 INLINE_HEADER unsigned short UINT16_ENCODE (Lisp_Object obj);
354 INLINE_HEADER Lisp_Object UINT16_DECODE (unsigned short us);
357 INT_UINT16_P (Lisp_Object obj)
361 int num = XINT (obj);
363 return (BT_UINT16_MIN <= num) && (num <= BT_UINT16_MAX);
370 UINT16_VALUE_P (Lisp_Object obj)
372 return EQ (obj, Qunbound)
373 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT16_P (obj);
376 INLINE_HEADER unsigned short
377 UINT16_ENCODE (Lisp_Object obj)
379 if (EQ (obj, Qunbound))
380 return BT_UINT16_unbound;
381 else if (EQ (obj, Qnil))
382 return BT_UINT16_nil;
383 else if (EQ (obj, Qt))
389 INLINE_HEADER Lisp_Object
390 UINT16_DECODE (unsigned short n)
392 if (n == BT_UINT16_unbound)
394 else if (n == BT_UINT16_nil)
396 else if (n == BT_UINT16_t)
402 INLINE_HEADER unsigned short
403 UINT8_TO_UINT16 (unsigned char n)
405 if (n == BT_UINT8_unbound)
406 return BT_UINT16_unbound;
407 else if (n == BT_UINT8_nil)
408 return BT_UINT16_nil;
409 else if (n == BT_UINT8_t)
416 mark_uint16_byte_table (Lisp_Object obj)
422 print_uint16_byte_table (Lisp_Object obj,
423 Lisp_Object printcharfun, int escapeflag)
425 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
427 struct gcpro gcpro1, gcpro2;
428 GCPRO2 (obj, printcharfun);
430 write_c_string ("\n#<uint16-byte-table", printcharfun);
431 for (i = 0; i < 256; i++)
433 unsigned short n = bte->property[i];
435 write_c_string ("\n ", printcharfun);
436 write_c_string (" ", printcharfun);
437 if (n == BT_UINT16_unbound)
438 write_c_string ("void", printcharfun);
439 else if (n == BT_UINT16_nil)
440 write_c_string ("nil", printcharfun);
441 else if (n == BT_UINT16_t)
442 write_c_string ("t", printcharfun);
447 sprintf (buf, "%hd", n);
448 write_c_string (buf, printcharfun);
452 write_c_string (">", printcharfun);
456 uint16_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
458 Lisp_Uint16_Byte_Table *te1 = XUINT16_BYTE_TABLE (obj1);
459 Lisp_Uint16_Byte_Table *te2 = XUINT16_BYTE_TABLE (obj2);
462 for (i = 0; i < 256; i++)
463 if (te1->property[i] != te2->property[i])
469 uint16_byte_table_hash (Lisp_Object obj, int depth)
471 Lisp_Uint16_Byte_Table *te = XUINT16_BYTE_TABLE (obj);
475 for (i = 0; i < 256; i++)
476 hash = HASH2 (hash, te->property[i]);
480 DEFINE_LRECORD_IMPLEMENTATION ("uint16-byte-table", uint16_byte_table,
481 mark_uint16_byte_table,
482 print_uint16_byte_table,
483 0, uint16_byte_table_equal,
484 uint16_byte_table_hash,
485 0 /* uint16_byte_table_description */,
486 Lisp_Uint16_Byte_Table);
489 make_uint16_byte_table (unsigned short initval)
493 Lisp_Uint16_Byte_Table *cte;
495 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
496 &lrecord_uint16_byte_table);
498 for (i = 0; i < 256; i++)
499 cte->property[i] = initval;
501 XSETUINT16_BYTE_TABLE (obj, cte);
506 expand_uint8_byte_table_to_uint16 (Lisp_Object table)
510 Lisp_Uint8_Byte_Table* bte = XUINT8_BYTE_TABLE(table);
511 Lisp_Uint16_Byte_Table* cte;
513 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
514 &lrecord_uint16_byte_table);
515 for (i = 0; i < 256; i++)
517 cte->property[i] = UINT8_TO_UINT16 (bte->property[i]);
519 XSETUINT16_BYTE_TABLE (obj, cte);
524 uint16_byte_table_same_value_p (Lisp_Object obj)
526 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
527 unsigned short v0 = bte->property[0];
530 for (i = 1; i < 256; i++)
532 if (bte->property[i] != v0)
540 mark_byte_table (Lisp_Object obj)
542 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
545 for (i = 0; i < 256; i++)
547 mark_object (cte->property[i]);
553 print_byte_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
555 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
557 struct gcpro gcpro1, gcpro2;
558 GCPRO2 (obj, printcharfun);
560 write_c_string ("\n#<byte-table", printcharfun);
561 for (i = 0; i < 256; i++)
563 Lisp_Object elt = bte->property[i];
565 write_c_string ("\n ", printcharfun);
566 write_c_string (" ", printcharfun);
567 if (EQ (elt, Qunbound))
568 write_c_string ("void", printcharfun);
570 print_internal (elt, printcharfun, escapeflag);
573 write_c_string (">", printcharfun);
577 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
579 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
580 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
583 for (i = 0; i < 256; i++)
584 if (BYTE_TABLE_P (cte1->property[i]))
586 if (BYTE_TABLE_P (cte2->property[i]))
588 if (!byte_table_equal (cte1->property[i],
589 cte2->property[i], depth + 1))
596 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
602 byte_table_hash (Lisp_Object obj, int depth)
604 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
606 return internal_array_hash (cte->property, 256, depth);
609 static const struct lrecord_description byte_table_description[] = {
610 { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Byte_Table, property), 256 },
614 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
619 byte_table_description,
623 make_byte_table (Lisp_Object initval)
627 Lisp_Byte_Table *cte;
629 cte = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
631 for (i = 0; i < 256; i++)
632 cte->property[i] = initval;
634 XSETBYTE_TABLE (obj, cte);
639 byte_table_same_value_p (Lisp_Object obj)
641 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
642 Lisp_Object v0 = bte->property[0];
645 for (i = 1; i < 256; i++)
647 if (!internal_equal (bte->property[i], v0, 0))
654 Lisp_Object get_byte_table (Lisp_Object table, unsigned char idx);
655 Lisp_Object put_byte_table (Lisp_Object table, unsigned char idx,
659 get_byte_table (Lisp_Object table, unsigned char idx)
661 if (UINT8_BYTE_TABLE_P (table))
662 return UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[idx]);
663 else if (UINT16_BYTE_TABLE_P (table))
664 return UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[idx]);
665 else if (BYTE_TABLE_P (table))
666 return XBYTE_TABLE(table)->property[idx];
672 put_byte_table (Lisp_Object table, unsigned char idx, Lisp_Object value)
674 if (UINT8_BYTE_TABLE_P (table))
676 if (UINT8_VALUE_P (value))
678 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
679 if (!UINT8_BYTE_TABLE_P (value) &&
680 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
681 && uint8_byte_table_same_value_p (table))
686 else if (UINT16_VALUE_P (value))
688 Lisp_Object new = expand_uint8_byte_table_to_uint16 (table);
690 XUINT16_BYTE_TABLE(new)->property[idx] = UINT16_ENCODE (value);
695 Lisp_Object new = make_byte_table (Qnil);
698 for (i = 0; i < 256; i++)
700 XBYTE_TABLE(new)->property[i]
701 = UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[i]);
703 XBYTE_TABLE(new)->property[idx] = value;
707 else if (UINT16_BYTE_TABLE_P (table))
709 if (UINT16_VALUE_P (value))
711 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
712 if (!UINT8_BYTE_TABLE_P (value) &&
713 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
714 && uint16_byte_table_same_value_p (table))
721 Lisp_Object new = make_byte_table (Qnil);
724 for (i = 0; i < 256; i++)
726 XBYTE_TABLE(new)->property[i]
727 = UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[i]);
729 XBYTE_TABLE(new)->property[idx] = value;
733 else if (BYTE_TABLE_P (table))
735 XBYTE_TABLE(table)->property[idx] = value;
736 if (!UINT8_BYTE_TABLE_P (value) &&
737 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
738 && byte_table_same_value_p (table))
743 else if (!internal_equal (table, value, 0))
745 if (UINT8_VALUE_P (table) && UINT8_VALUE_P (value))
747 table = make_uint8_byte_table (UINT8_ENCODE (table));
748 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
750 else if (UINT16_VALUE_P (table) && UINT16_VALUE_P (value))
752 table = make_uint16_byte_table (UINT16_ENCODE (table));
753 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
757 table = make_byte_table (table);
758 XBYTE_TABLE(table)->property[idx] = value;
765 mark_char_id_table (Lisp_Object obj)
767 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
773 print_char_id_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
775 Lisp_Object table = XCHAR_ID_TABLE (obj)->table;
777 struct gcpro gcpro1, gcpro2;
778 GCPRO2 (obj, printcharfun);
780 write_c_string ("#<char-id-table ", printcharfun);
781 for (i = 0; i < 256; i++)
783 Lisp_Object elt = get_byte_table (table, i);
784 if (i != 0) write_c_string ("\n ", printcharfun);
785 if (EQ (elt, Qunbound))
786 write_c_string ("void", printcharfun);
788 print_internal (elt, printcharfun, escapeflag);
791 write_c_string (">", printcharfun);
795 char_id_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
797 Lisp_Object table1 = XCHAR_ID_TABLE (obj1)->table;
798 Lisp_Object table2 = XCHAR_ID_TABLE (obj2)->table;
801 for (i = 0; i < 256; i++)
803 if (!internal_equal (get_byte_table (table1, i),
804 get_byte_table (table2, i), 0))
811 char_id_table_hash (Lisp_Object obj, int depth)
813 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
815 return char_id_table_hash (cte->table, depth + 1);
818 static const struct lrecord_description char_id_table_description[] = {
819 { XD_LISP_OBJECT, offsetof(Lisp_Char_ID_Table, table) },
823 DEFINE_LRECORD_IMPLEMENTATION ("char-id-table", char_id_table,
826 0, char_id_table_equal,
828 char_id_table_description,
832 make_char_id_table (Lisp_Object initval)
835 Lisp_Char_ID_Table *cte;
837 cte = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
839 cte->table = make_byte_table (initval);
841 XSETCHAR_ID_TABLE (obj, cte);
847 get_char_id_table (Emchar ch, Lisp_Object table)
849 unsigned int code = ch;
856 (XCHAR_ID_TABLE (table)->table,
857 (unsigned char)(code >> 24)),
858 (unsigned char) (code >> 16)),
859 (unsigned char) (code >> 8)),
860 (unsigned char) code);
863 void put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table);
865 put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table)
867 unsigned int code = ch;
868 Lisp_Object table1, table2, table3, table4;
870 table1 = XCHAR_ID_TABLE (table)->table;
871 table2 = get_byte_table (table1, (unsigned char)(code >> 24));
872 table3 = get_byte_table (table2, (unsigned char)(code >> 16));
873 table4 = get_byte_table (table3, (unsigned char)(code >> 8));
875 table4 = put_byte_table (table4, (unsigned char)code, value);
876 table3 = put_byte_table (table3, (unsigned char)(code >> 8), table4);
877 table2 = put_byte_table (table2, (unsigned char)(code >> 16), table3);
878 XCHAR_ID_TABLE (table)->table
879 = put_byte_table (table1, (unsigned char)(code >> 24), table2);
883 Lisp_Object Vchar_attribute_hash_table;
884 Lisp_Object Vcharacter_composition_table;
885 Lisp_Object Vcharacter_variant_table;
887 Lisp_Object Qideograph_daikanwa;
888 Lisp_Object Q_decomposition;
892 Lisp_Object Qisolated;
893 Lisp_Object Qinitial;
896 Lisp_Object Qvertical;
897 Lisp_Object QnoBreak;
898 Lisp_Object Qfraction;
908 Emchar to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
910 Lisp_Object put_char_ccs_code_point (Lisp_Object character,
911 Lisp_Object ccs, Lisp_Object value);
912 Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
915 to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
921 else if (EQ (v, Qcompat))
923 else if (EQ (v, Qisolated))
925 else if (EQ (v, Qinitial))
927 else if (EQ (v, Qmedial))
929 else if (EQ (v, Qfinal))
931 else if (EQ (v, Qvertical))
933 else if (EQ (v, QnoBreak))
935 else if (EQ (v, Qfraction))
937 else if (EQ (v, Qsuper))
939 else if (EQ (v, Qsub))
941 else if (EQ (v, Qcircle))
943 else if (EQ (v, Qsquare))
945 else if (EQ (v, Qwide))
947 else if (EQ (v, Qnarrow))
949 else if (EQ (v, Qsmall))
951 else if (EQ (v, Qfont))
954 signal_simple_error (err_msg, err_arg);
957 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
958 Return character corresponding with list.
962 Lisp_Object table = Vcharacter_composition_table;
963 Lisp_Object rest = list;
967 Lisp_Object v = Fcar (rest);
969 Emchar c = to_char_id (v, "Invalid value for composition", list);
971 ret = get_char_id_table (c, table);
976 if (!CHAR_ID_TABLE_P (ret))
981 else if (!CONSP (rest))
983 else if (CHAR_ID_TABLE_P (ret))
986 signal_simple_error ("Invalid table is found with", list);
988 signal_simple_error ("Invalid value for composition", list);
991 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
992 Return variants of CHARACTER.
996 CHECK_CHAR (character);
997 return Fcopy_list (get_char_id_table (XCHAR (character),
998 Vcharacter_variant_table));
1002 /* We store the char-attributes in hash tables with the names as the
1003 key and the actual char-id-table object as the value. Occasionally
1004 we need to use them in a list format. These routines provide us
1006 struct char_attribute_list_closure
1008 Lisp_Object *char_attribute_list;
1012 add_char_attribute_to_list_mapper (Lisp_Object key, Lisp_Object value,
1013 void *char_attribute_list_closure)
1015 /* This function can GC */
1016 struct char_attribute_list_closure *calcl
1017 = (struct char_attribute_list_closure*) char_attribute_list_closure;
1018 Lisp_Object *char_attribute_list = calcl->char_attribute_list;
1020 *char_attribute_list = Fcons (key, *char_attribute_list);
1024 DEFUN ("char-attribute-list", Fchar_attribute_list, 0, 0, 0, /*
1025 Return the list of all existing character attributes except coded-charsets.
1029 Lisp_Object char_attribute_list = Qnil;
1030 struct gcpro gcpro1;
1031 struct char_attribute_list_closure char_attribute_list_closure;
1033 GCPRO1 (char_attribute_list);
1034 char_attribute_list_closure.char_attribute_list = &char_attribute_list;
1035 elisp_maphash (add_char_attribute_to_list_mapper,
1036 Vchar_attribute_hash_table,
1037 &char_attribute_list_closure);
1039 return char_attribute_list;
1042 DEFUN ("find-char-attribute-table", Ffind_char_attribute_table, 1, 1, 0, /*
1043 Return char-id-table corresponding to ATTRIBUTE.
1047 return Fgethash (attribute, Vchar_attribute_hash_table, Qnil);
1051 /* We store the char-id-tables in hash tables with the attributes as
1052 the key and the actual char-id-table object as the value. Each
1053 char-id-table stores values of an attribute corresponding with
1054 characters. Occasionally we need to get attributes of a character
1055 in a association-list format. These routines provide us with
1057 struct char_attribute_alist_closure
1060 Lisp_Object *char_attribute_alist;
1064 add_char_attribute_alist_mapper (Lisp_Object key, Lisp_Object value,
1065 void *char_attribute_alist_closure)
1067 /* This function can GC */
1068 struct char_attribute_alist_closure *caacl =
1069 (struct char_attribute_alist_closure*) char_attribute_alist_closure;
1070 Lisp_Object ret = get_char_id_table (caacl->char_id, value);
1071 if (!UNBOUNDP (ret))
1073 Lisp_Object *char_attribute_alist = caacl->char_attribute_alist;
1074 *char_attribute_alist
1075 = Fcons (Fcons (key, ret), *char_attribute_alist);
1080 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
1081 Return the alist of attributes of CHARACTER.
1085 Lisp_Object alist = Qnil;
1088 CHECK_CHAR (character);
1090 struct gcpro gcpro1;
1091 struct char_attribute_alist_closure char_attribute_alist_closure;
1094 char_attribute_alist_closure.char_id = XCHAR (character);
1095 char_attribute_alist_closure.char_attribute_alist = &alist;
1096 elisp_maphash (add_char_attribute_alist_mapper,
1097 Vchar_attribute_hash_table,
1098 &char_attribute_alist_closure);
1102 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1104 Lisp_Object ccs = chlook->charset_by_leading_byte[i];
1108 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1111 if ( CHAR_ID_TABLE_P (encoding_table)
1112 && INTP (cpos = get_char_id_table (XCHAR (character),
1115 alist = Fcons (Fcons (ccs, cpos), alist);
1122 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 3, 0, /*
1123 Return the value of CHARACTER's ATTRIBUTE.
1124 Return DEFAULT-VALUE if the value is not exist.
1126 (character, attribute, default_value))
1130 CHECK_CHAR (character);
1131 if (!NILP (ccs = Ffind_charset (attribute)))
1133 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1135 if (CHAR_ID_TABLE_P (encoding_table))
1136 return get_char_id_table (XCHAR (character), encoding_table);
1140 Lisp_Object table = Fgethash (attribute,
1141 Vchar_attribute_hash_table,
1143 if (!UNBOUNDP (table))
1145 Lisp_Object ret = get_char_id_table (XCHAR (character), table);
1146 if (!UNBOUNDP (ret))
1150 return default_value;
1153 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
1154 Store CHARACTER's ATTRIBUTE with VALUE.
1156 (character, attribute, value))
1160 CHECK_CHAR (character);
1161 ccs = Ffind_charset (attribute);
1164 return put_char_ccs_code_point (character, ccs, value);
1166 else if (EQ (attribute, Q_decomposition))
1171 signal_simple_error ("Invalid value for ->decomposition",
1174 if (CONSP (Fcdr (value)))
1176 Lisp_Object rest = value;
1177 Lisp_Object table = Vcharacter_composition_table;
1181 GET_EXTERNAL_LIST_LENGTH (rest, len);
1182 seq = make_vector (len, Qnil);
1184 while (CONSP (rest))
1186 Lisp_Object v = Fcar (rest);
1189 = to_char_id (v, "Invalid value for ->decomposition", value);
1192 XVECTOR_DATA(seq)[i++] = v;
1194 XVECTOR_DATA(seq)[i++] = make_char (c);
1198 put_char_id_table (c, character, table);
1203 ntable = get_char_id_table (c, table);
1204 if (!CHAR_ID_TABLE_P (ntable))
1206 ntable = make_char_id_table (Qnil);
1207 put_char_id_table (c, ntable, table);
1215 Lisp_Object v = Fcar (value);
1219 Emchar c = XINT (v);
1221 = get_char_id_table (c, Vcharacter_variant_table);
1223 if (NILP (Fmemq (v, ret)))
1225 put_char_id_table (c, Fcons (character, ret),
1226 Vcharacter_variant_table);
1229 seq = make_vector (1, v);
1233 else if (EQ (attribute, Q_ucs))
1239 signal_simple_error ("Invalid value for ->ucs", value);
1243 ret = get_char_id_table (c, Vcharacter_variant_table);
1244 if (NILP (Fmemq (character, ret)))
1246 put_char_id_table (c, Fcons (character, ret),
1247 Vcharacter_variant_table);
1251 Lisp_Object table = Fgethash (attribute,
1252 Vchar_attribute_hash_table,
1257 table = make_char_id_table (Qunbound);
1258 Fputhash (attribute, table, Vchar_attribute_hash_table);
1260 put_char_id_table (XCHAR (character), value, table);
1265 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
1266 Remove CHARACTER's ATTRIBUTE.
1268 (character, attribute))
1272 CHECK_CHAR (character);
1273 ccs = Ffind_charset (attribute);
1276 return remove_char_ccs (character, ccs);
1280 Lisp_Object table = Fgethash (attribute,
1281 Vchar_attribute_hash_table,
1283 if (!UNBOUNDP (table))
1285 put_char_id_table (XCHAR (character), Qunbound, table);
1292 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
1294 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
1296 /* ad-hoc method for `ascii' */
1297 if ((CHARSET_CHARS (cs) == 94) &&
1298 (CHARSET_BYTE_OFFSET (cs) != 33))
1299 return 128 - CHARSET_BYTE_OFFSET (cs);
1301 return CHARSET_CHARS (cs);
1304 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
1306 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
1308 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
1312 if (XVECTOR_LENGTH (v) > ccs_len)
1315 for (i = 0; i < XVECTOR_LENGTH (v); i++)
1317 Lisp_Object c = XVECTOR_DATA(v)[i];
1319 if (!NILP (c) && !CHARP (c))
1323 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
1335 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1338 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1348 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1349 nv = XVECTOR_DATA(v)[i];
1355 XVECTOR_DATA(v)[i] = Qnil;
1359 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1360 int code_point, Lisp_Object character);
1362 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1363 int code_point, Lisp_Object character)
1367 int ccs_len = XVECTOR_LENGTH (v);
1372 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1373 nv = XVECTOR_DATA(v)[i];
1377 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
1383 XVECTOR_DATA(v)[i] = character;
1387 put_char_ccs_code_point (Lisp_Object character,
1388 Lisp_Object ccs, Lisp_Object value)
1390 Lisp_Object encoding_table;
1392 if (!EQ (XCHARSET_NAME (ccs), Qucs)
1393 || (XCHAR (character) != XINT (value)))
1395 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
1396 int dim = XCHARSET_DIMENSION (ccs);
1397 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
1398 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
1402 { /* obsolete representation: value must be a list of bytes */
1403 Lisp_Object ret = Fcar (value);
1407 signal_simple_error ("Invalid value for coded-charset", value);
1408 code_point = XINT (ret);
1409 if (XCHARSET_GRAPHIC (ccs) == 1)
1411 rest = Fcdr (value);
1412 while (!NILP (rest))
1417 signal_simple_error ("Invalid value for coded-charset",
1421 signal_simple_error ("Invalid value for coded-charset",
1424 if (XCHARSET_GRAPHIC (ccs) == 1)
1426 code_point = (code_point << 8) | j;
1429 value = make_int (code_point);
1431 else if (INTP (value))
1433 code_point = XINT (value);
1434 if (XCHARSET_GRAPHIC (ccs) == 1)
1436 code_point &= 0x7F7F7F7F;
1437 value = make_int (code_point);
1441 signal_simple_error ("Invalid value for coded-charset", value);
1445 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1448 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
1453 XCHARSET_DECODING_TABLE (ccs)
1454 = v = make_older_vector (ccs_len, Qnil);
1457 decoding_table_put_char (v, dim, byte_offset, code_point, character);
1459 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
1461 XCHARSET_ENCODING_TABLE (ccs)
1462 = encoding_table = make_char_id_table (Qnil);
1464 put_char_id_table (XCHAR (character), value, encoding_table);
1469 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
1471 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1472 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1474 if (VECTORP (decoding_table))
1476 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1480 decoding_table_remove_char (decoding_table,
1481 XCHARSET_DIMENSION (ccs),
1482 XCHARSET_BYTE_OFFSET (ccs),
1486 if (CHAR_ID_TABLE_P (encoding_table))
1488 put_char_id_table (XCHAR (character), Qnil, encoding_table);
1493 EXFUN (Fmake_char, 3);
1494 EXFUN (Fdecode_char, 2);
1496 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
1497 Store character's ATTRIBUTES.
1501 Lisp_Object rest = attributes;
1502 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
1503 Lisp_Object character;
1507 while (CONSP (rest))
1509 Lisp_Object cell = Fcar (rest);
1513 signal_simple_error ("Invalid argument", attributes);
1514 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
1515 && ((XCHARSET_FINAL (ccs) != 0) ||
1516 (XCHARSET_UCS_MAX (ccs) > 0)) )
1520 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1522 character = Fdecode_char (ccs, cell);
1523 if (!NILP (character))
1524 goto setup_attributes;
1528 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
1531 signal_simple_error ("Invalid argument", attributes);
1533 character = make_char (XINT (code) + 0x100000);
1534 goto setup_attributes;
1538 else if (!INTP (code))
1539 signal_simple_error ("Invalid argument", attributes);
1541 character = make_char (XINT (code));
1545 while (CONSP (rest))
1547 Lisp_Object cell = Fcar (rest);
1550 signal_simple_error ("Invalid argument", attributes);
1552 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
1558 Lisp_Object Vutf_2000_version;
1562 int leading_code_private_11;
1565 Lisp_Object Qcharsetp;
1567 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1568 Lisp_Object Qregistry, Qfinal, Qgraphic;
1569 Lisp_Object Qdirection;
1570 Lisp_Object Qreverse_direction_charset;
1571 Lisp_Object Qleading_byte;
1572 Lisp_Object Qshort_name, Qlong_name;
1586 Qcyrillic_iso8859_5,
1588 Qjapanese_jisx0208_1978,
1592 Qjapanese_jisx0208_1990,
1595 Qchinese_cns11643_1,
1596 Qchinese_cns11643_2,
1602 Qlatin_viscii_lower,
1603 Qlatin_viscii_upper,
1604 Qvietnamese_viscii_lower,
1605 Qvietnamese_viscii_upper,
1617 Qideograph_gt_pj_10,
1618 Qideograph_gt_pj_11,
1648 Lisp_Object Ql2r, Qr2l;
1650 Lisp_Object Vcharset_hash_table;
1652 /* Composite characters are characters constructed by overstriking two
1653 or more regular characters.
1655 1) The old Mule implementation involves storing composite characters
1656 in a buffer as a tag followed by all of the actual characters
1657 used to make up the composite character. I think this is a bad
1658 idea; it greatly complicates code that wants to handle strings
1659 one character at a time because it has to deal with the possibility
1660 of great big ungainly characters. It's much more reasonable to
1661 simply store an index into a table of composite characters.
1663 2) The current implementation only allows for 16,384 separate
1664 composite characters over the lifetime of the XEmacs process.
1665 This could become a potential problem if the user
1666 edited lots of different files that use composite characters.
1667 Due to FSF bogosity, increasing the number of allowable
1668 composite characters under Mule would decrease the number
1669 of possible faces that can exist. Mule already has shrunk
1670 this to 2048, and further shrinkage would become uncomfortable.
1671 No such problems exist in XEmacs.
1673 Composite characters could be represented as 0x80 C1 C2 C3,
1674 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1675 for slightly under 2^20 (one million) composite characters
1676 over the XEmacs process lifetime, and you only need to
1677 increase the size of a Mule character from 19 to 21 bits.
1678 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1679 85 million (slightly over 2^26) composite characters. */
1682 /************************************************************************/
1683 /* Basic Emchar functions */
1684 /************************************************************************/
1686 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1687 string in STR. Returns the number of bytes stored.
1688 Do not call this directly. Use the macro set_charptr_emchar() instead.
1692 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1698 Lisp_Object charset;
1707 else if ( c <= 0x7ff )
1709 *p++ = (c >> 6) | 0xc0;
1710 *p++ = (c & 0x3f) | 0x80;
1712 else if ( c <= 0xffff )
1714 *p++ = (c >> 12) | 0xe0;
1715 *p++ = ((c >> 6) & 0x3f) | 0x80;
1716 *p++ = (c & 0x3f) | 0x80;
1718 else if ( c <= 0x1fffff )
1720 *p++ = (c >> 18) | 0xf0;
1721 *p++ = ((c >> 12) & 0x3f) | 0x80;
1722 *p++ = ((c >> 6) & 0x3f) | 0x80;
1723 *p++ = (c & 0x3f) | 0x80;
1725 else if ( c <= 0x3ffffff )
1727 *p++ = (c >> 24) | 0xf8;
1728 *p++ = ((c >> 18) & 0x3f) | 0x80;
1729 *p++ = ((c >> 12) & 0x3f) | 0x80;
1730 *p++ = ((c >> 6) & 0x3f) | 0x80;
1731 *p++ = (c & 0x3f) | 0x80;
1735 *p++ = (c >> 30) | 0xfc;
1736 *p++ = ((c >> 24) & 0x3f) | 0x80;
1737 *p++ = ((c >> 18) & 0x3f) | 0x80;
1738 *p++ = ((c >> 12) & 0x3f) | 0x80;
1739 *p++ = ((c >> 6) & 0x3f) | 0x80;
1740 *p++ = (c & 0x3f) | 0x80;
1743 BREAKUP_CHAR (c, charset, c1, c2);
1744 lb = CHAR_LEADING_BYTE (c);
1745 if (LEADING_BYTE_PRIVATE_P (lb))
1746 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1748 if (EQ (charset, Vcharset_control_1))
1757 /* Return the first character from a Mule-encoded string in STR,
1758 assuming it's non-ASCII. Do not call this directly.
1759 Use the macro charptr_emchar() instead. */
1762 non_ascii_charptr_emchar (const Bufbyte *str)
1775 else if ( b >= 0xf8 )
1780 else if ( b >= 0xf0 )
1785 else if ( b >= 0xe0 )
1790 else if ( b >= 0xc0 )
1800 for( ; len > 0; len-- )
1803 ch = ( ch << 6 ) | ( b & 0x3f );
1807 Bufbyte i0 = *str, i1, i2 = 0;
1808 Lisp_Object charset;
1810 if (i0 == LEADING_BYTE_CONTROL_1)
1811 return (Emchar) (*++str - 0x20);
1813 if (LEADING_BYTE_PREFIX_P (i0))
1818 charset = CHARSET_BY_LEADING_BYTE (i0);
1819 if (XCHARSET_DIMENSION (charset) == 2)
1822 return MAKE_CHAR (charset, i1, i2);
1826 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1827 Do not call this directly. Use the macro valid_char_p() instead. */
1831 non_ascii_valid_char_p (Emchar ch)
1835 /* Must have only lowest 19 bits set */
1839 f1 = CHAR_FIELD1 (ch);
1840 f2 = CHAR_FIELD2 (ch);
1841 f3 = CHAR_FIELD3 (ch);
1845 Lisp_Object charset;
1847 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1848 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1849 f2 > MAX_CHAR_FIELD2_PRIVATE)
1854 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1855 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1859 NOTE: This takes advantage of the fact that
1860 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1861 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1863 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1864 if (EQ (charset, Qnil))
1866 return (XCHARSET_CHARS (charset) == 96);
1870 Lisp_Object charset;
1872 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1873 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1874 f1 > MAX_CHAR_FIELD1_PRIVATE)
1876 if (f2 < 0x20 || f3 < 0x20)
1879 #ifdef ENABLE_COMPOSITE_CHARS
1880 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1882 if (UNBOUNDP (Fgethash (make_int (ch),
1883 Vcomposite_char_char2string_hash_table,
1888 #endif /* ENABLE_COMPOSITE_CHARS */
1890 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1891 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1894 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1896 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1899 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1901 if (EQ (charset, Qnil))
1903 return (XCHARSET_CHARS (charset) == 96);
1909 /************************************************************************/
1910 /* Basic string functions */
1911 /************************************************************************/
1913 /* Copy the character pointed to by SRC into DST. Do not call this
1914 directly. Use the macro charptr_copy_char() instead.
1915 Return the number of bytes copied. */
1918 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
1920 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
1922 for (i = bytes; i; i--, dst++, src++)
1928 /************************************************************************/
1929 /* streams of Emchars */
1930 /************************************************************************/
1932 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1933 The functions below are not meant to be called directly; use
1934 the macros in insdel.h. */
1937 Lstream_get_emchar_1 (Lstream *stream, int ch)
1939 Bufbyte str[MAX_EMCHAR_LEN];
1940 Bufbyte *strptr = str;
1943 str[0] = (Bufbyte) ch;
1945 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
1947 int c = Lstream_getc (stream);
1948 bufpos_checking_assert (c >= 0);
1949 *++strptr = (Bufbyte) c;
1951 return charptr_emchar (str);
1955 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1957 Bufbyte str[MAX_EMCHAR_LEN];
1958 Bytecount len = set_charptr_emchar (str, ch);
1959 return Lstream_write (stream, str, len);
1963 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1965 Bufbyte str[MAX_EMCHAR_LEN];
1966 Bytecount len = set_charptr_emchar (str, ch);
1967 Lstream_unread (stream, str, len);
1971 /************************************************************************/
1972 /* charset object */
1973 /************************************************************************/
1976 mark_charset (Lisp_Object obj)
1978 Lisp_Charset *cs = XCHARSET (obj);
1980 mark_object (cs->short_name);
1981 mark_object (cs->long_name);
1982 mark_object (cs->doc_string);
1983 mark_object (cs->registry);
1984 mark_object (cs->ccl_program);
1986 mark_object (cs->encoding_table);
1987 /* mark_object (cs->decoding_table); */
1993 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1995 Lisp_Charset *cs = XCHARSET (obj);
1999 error ("printing unreadable object #<charset %s 0x%x>",
2000 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
2003 write_c_string ("#<charset ", printcharfun);
2004 print_internal (CHARSET_NAME (cs), printcharfun, 0);
2005 write_c_string (" ", printcharfun);
2006 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
2007 write_c_string (" ", printcharfun);
2008 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
2009 write_c_string (" ", printcharfun);
2010 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
2011 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
2013 CHARSET_DIMENSION (cs),
2014 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
2015 CHARSET_COLUMNS (cs),
2016 CHARSET_GRAPHIC (cs),
2017 CHARSET_FINAL (cs));
2018 write_c_string (buf, printcharfun);
2019 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
2020 sprintf (buf, " 0x%x>", cs->header.uid);
2021 write_c_string (buf, printcharfun);
2024 static const struct lrecord_description charset_description[] = {
2025 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
2026 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
2027 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
2028 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
2029 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
2030 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
2031 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
2033 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
2034 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
2039 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
2040 mark_charset, print_charset, 0, 0, 0,
2041 charset_description,
2044 /* Make a new charset. */
2045 /* #### SJT Should generic properties be allowed? */
2047 make_charset (Charset_ID id, Lisp_Object name,
2048 unsigned short chars, unsigned char dimension,
2049 unsigned char columns, unsigned char graphic,
2050 Bufbyte final, unsigned char direction, Lisp_Object short_name,
2051 Lisp_Object long_name, Lisp_Object doc,
2053 Lisp_Object decoding_table,
2054 Emchar ucs_min, Emchar ucs_max,
2055 Emchar code_offset, unsigned char byte_offset)
2058 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
2062 XSETCHARSET (obj, cs);
2064 CHARSET_ID (cs) = id;
2065 CHARSET_NAME (cs) = name;
2066 CHARSET_SHORT_NAME (cs) = short_name;
2067 CHARSET_LONG_NAME (cs) = long_name;
2068 CHARSET_CHARS (cs) = chars;
2069 CHARSET_DIMENSION (cs) = dimension;
2070 CHARSET_DIRECTION (cs) = direction;
2071 CHARSET_COLUMNS (cs) = columns;
2072 CHARSET_GRAPHIC (cs) = graphic;
2073 CHARSET_FINAL (cs) = final;
2074 CHARSET_DOC_STRING (cs) = doc;
2075 CHARSET_REGISTRY (cs) = reg;
2076 CHARSET_CCL_PROGRAM (cs) = Qnil;
2077 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
2079 CHARSET_DECODING_TABLE(cs) = Qnil;
2080 CHARSET_ENCODING_TABLE(cs) = Qnil;
2081 CHARSET_UCS_MIN(cs) = ucs_min;
2082 CHARSET_UCS_MAX(cs) = ucs_max;
2083 CHARSET_CODE_OFFSET(cs) = code_offset;
2084 CHARSET_BYTE_OFFSET(cs) = byte_offset;
2088 if (id == LEADING_BYTE_ASCII)
2089 CHARSET_REP_BYTES (cs) = 1;
2091 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
2093 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
2098 /* some charsets do not have final characters. This includes
2099 ASCII, Control-1, Composite, and the two faux private
2101 unsigned char iso2022_type
2102 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
2104 if (code_offset == 0)
2106 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
2107 chlook->charset_by_attributes[iso2022_type][final] = obj;
2111 (chlook->charset_by_attributes[iso2022_type][final][direction]));
2112 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
2116 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
2117 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
2119 /* Some charsets are "faux" and don't have names or really exist at
2120 all except in the leading-byte table. */
2122 Fputhash (name, obj, Vcharset_hash_table);
2127 get_unallocated_leading_byte (int dimension)
2132 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
2135 lb = chlook->next_allocated_leading_byte++;
2139 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
2142 lb = chlook->next_allocated_1_byte_leading_byte++;
2146 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
2149 lb = chlook->next_allocated_2_byte_leading_byte++;
2155 ("No more character sets free for this dimension",
2156 make_int (dimension));
2162 /* Number of Big5 characters which have the same code in 1st byte. */
2164 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2167 decode_builtin_char (Lisp_Object charset, int code_point)
2171 if (EQ (charset, Vcharset_chinese_big5))
2173 int c1 = code_point >> 8;
2174 int c2 = code_point & 0xFF;
2177 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
2179 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
2180 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
2182 I = (c1 - 0xA1) * BIG5_SAME_ROW
2183 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
2187 charset = Vcharset_chinese_big5_1;
2191 charset = Vcharset_chinese_big5_2;
2192 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
2194 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
2197 if ((final = XCHARSET_FINAL (charset)) >= '0')
2199 if (XCHARSET_DIMENSION (charset) == 1)
2201 switch (XCHARSET_CHARS (charset))
2205 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
2208 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
2216 switch (XCHARSET_CHARS (charset))
2219 return MIN_CHAR_94x94
2220 + (final - '0') * 94 * 94
2221 + (((code_point >> 8) & 0x7F) - 33) * 94
2222 + ((code_point & 0x7F) - 33);
2224 return MIN_CHAR_96x96
2225 + (final - '0') * 96 * 96
2226 + (((code_point >> 8) & 0x7F) - 32) * 96
2227 + ((code_point & 0x7F) - 32);
2234 else if (XCHARSET_UCS_MAX (charset))
2237 = (XCHARSET_DIMENSION (charset) == 1
2239 code_point - XCHARSET_BYTE_OFFSET (charset)
2241 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
2242 * XCHARSET_CHARS (charset)
2243 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
2244 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
2245 if ((cid < XCHARSET_UCS_MIN (charset))
2246 || (XCHARSET_UCS_MAX (charset) < cid))
2255 range_charset_code_point (Lisp_Object charset, Emchar ch)
2259 if ((XCHARSET_UCS_MIN (charset) <= ch)
2260 && (ch <= XCHARSET_UCS_MAX (charset)))
2262 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
2264 if (XCHARSET_CHARS (charset) == 256)
2266 else if (XCHARSET_DIMENSION (charset) == 1)
2267 return d + XCHARSET_BYTE_OFFSET (charset);
2268 else if (XCHARSET_DIMENSION (charset) == 2)
2270 ((d / XCHARSET_CHARS (charset)
2271 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2272 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2273 else if (XCHARSET_DIMENSION (charset) == 3)
2275 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2276 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2277 | ((d / XCHARSET_CHARS (charset)
2278 % XCHARSET_CHARS (charset)
2279 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2280 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2281 else /* if (XCHARSET_DIMENSION (charset) == 4) */
2283 ((d / (XCHARSET_CHARS (charset)
2284 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2285 + XCHARSET_BYTE_OFFSET (charset)) << 24)
2286 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2287 % XCHARSET_CHARS (charset)
2288 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2289 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
2290 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2291 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2293 else if (XCHARSET_CODE_OFFSET (charset) == 0)
2295 if (XCHARSET_DIMENSION (charset) == 1)
2297 if (XCHARSET_CHARS (charset) == 94)
2299 if (((d = ch - (MIN_CHAR_94
2300 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
2304 else if (XCHARSET_CHARS (charset) == 96)
2306 if (((d = ch - (MIN_CHAR_96
2307 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
2314 else if (XCHARSET_DIMENSION (charset) == 2)
2316 if (XCHARSET_CHARS (charset) == 94)
2318 if (((d = ch - (MIN_CHAR_94x94
2319 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
2322 return (((d / 94) + 33) << 8) | (d % 94 + 33);
2324 else if (XCHARSET_CHARS (charset) == 96)
2326 if (((d = ch - (MIN_CHAR_96x96
2327 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
2330 return (((d / 96) + 32) << 8) | (d % 96 + 32);
2336 if (EQ (charset, Vcharset_mojikyo_2022_1)
2337 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
2339 int m = ch - MIN_CHAR_MOJIKYO - 1;
2340 int byte1 = m / (94 * 60) + 33;
2341 int byte2 = (m % (94 * 60)) / 94;
2342 int byte3 = m % 94 + 33;
2348 return (byte1 << 16) | (byte2 << 8) | byte3;
2354 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
2356 if (c <= MAX_CHAR_BASIC_LATIN)
2358 *charset = Vcharset_ascii;
2363 *charset = Vcharset_control_1;
2368 *charset = Vcharset_latin_iso8859_1;
2372 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
2374 *charset = Vcharset_greek_iso8859_7;
2375 return c - MIN_CHAR_GREEK + 0x20;
2377 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
2379 *charset = Vcharset_cyrillic_iso8859_5;
2380 return c - MIN_CHAR_CYRILLIC + 0x20;
2383 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
2385 *charset = Vcharset_hebrew_iso8859_8;
2386 return c - MIN_CHAR_HEBREW + 0x20;
2388 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
2390 *charset = Vcharset_thai_tis620;
2391 return c - MIN_CHAR_THAI + 0x20;
2394 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
2395 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
2397 return list2 (Vcharset_katakana_jisx0201,
2398 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
2401 else if (c <= MAX_CHAR_BMP)
2403 *charset = Vcharset_ucs_bmp;
2406 else if (c < MIN_CHAR_DAIKANWA)
2408 *charset = Vcharset_ucs;
2411 else if (c <= MAX_CHAR_DAIKANWA)
2413 *charset = Vcharset_ideograph_daikanwa;
2414 return c - MIN_CHAR_DAIKANWA;
2416 else if (c <= MAX_CHAR_MOJIKYO_0)
2418 *charset = Vcharset_mojikyo;
2419 return c - MIN_CHAR_MOJIKYO_0;
2421 else if (c < MIN_CHAR_94)
2423 *charset = Vcharset_ucs;
2426 else if (c <= MAX_CHAR_94)
2428 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
2429 ((c - MIN_CHAR_94) / 94) + '0',
2430 CHARSET_LEFT_TO_RIGHT);
2431 if (!NILP (*charset))
2432 return ((c - MIN_CHAR_94) % 94) + 33;
2435 *charset = Vcharset_ucs;
2439 else if (c <= MAX_CHAR_96)
2441 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
2442 ((c - MIN_CHAR_96) / 96) + '0',
2443 CHARSET_LEFT_TO_RIGHT);
2444 if (!NILP (*charset))
2445 return ((c - MIN_CHAR_96) % 96) + 32;
2448 *charset = Vcharset_ucs;
2452 else if (c <= MAX_CHAR_94x94)
2455 = CHARSET_BY_ATTRIBUTES (94, 2,
2456 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
2457 CHARSET_LEFT_TO_RIGHT);
2458 if (!NILP (*charset))
2459 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
2460 | (((c - MIN_CHAR_94x94) % 94) + 33);
2463 *charset = Vcharset_ucs;
2467 else if (c <= MAX_CHAR_96x96)
2470 = CHARSET_BY_ATTRIBUTES (96, 2,
2471 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
2472 CHARSET_LEFT_TO_RIGHT);
2473 if (!NILP (*charset))
2474 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
2475 | (((c - MIN_CHAR_96x96) % 96) + 32);
2478 *charset = Vcharset_ucs;
2482 else if (c < MIN_CHAR_MOJIKYO)
2484 *charset = Vcharset_ucs;
2487 else if (c <= MAX_CHAR_MOJIKYO)
2489 *charset = Vcharset_mojikyo;
2490 return c - MIN_CHAR_MOJIKYO;
2494 *charset = Vcharset_ucs;
2499 Lisp_Object Vdefault_coded_charset_priority_list;
2503 /************************************************************************/
2504 /* Basic charset Lisp functions */
2505 /************************************************************************/
2507 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
2508 Return non-nil if OBJECT is a charset.
2512 return CHARSETP (object) ? Qt : Qnil;
2515 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
2516 Retrieve the charset of the given name.
2517 If CHARSET-OR-NAME is a charset object, it is simply returned.
2518 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
2519 nil is returned. Otherwise the associated charset object is returned.
2523 if (CHARSETP (charset_or_name))
2524 return charset_or_name;
2526 CHECK_SYMBOL (charset_or_name);
2527 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
2530 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
2531 Retrieve the charset of the given name.
2532 Same as `find-charset' except an error is signalled if there is no such
2533 charset instead of returning nil.
2537 Lisp_Object charset = Ffind_charset (name);
2540 signal_simple_error ("No such charset", name);
2544 /* We store the charsets in hash tables with the names as the key and the
2545 actual charset object as the value. Occasionally we need to use them
2546 in a list format. These routines provide us with that. */
2547 struct charset_list_closure
2549 Lisp_Object *charset_list;
2553 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
2554 void *charset_list_closure)
2556 /* This function can GC */
2557 struct charset_list_closure *chcl =
2558 (struct charset_list_closure*) charset_list_closure;
2559 Lisp_Object *charset_list = chcl->charset_list;
2561 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
2565 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
2566 Return a list of the names of all defined charsets.
2570 Lisp_Object charset_list = Qnil;
2571 struct gcpro gcpro1;
2572 struct charset_list_closure charset_list_closure;
2574 GCPRO1 (charset_list);
2575 charset_list_closure.charset_list = &charset_list;
2576 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
2577 &charset_list_closure);
2580 return charset_list;
2583 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
2584 Return the name of charset CHARSET.
2588 return XCHARSET_NAME (Fget_charset (charset));
2591 /* #### SJT Should generic properties be allowed? */
2592 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
2593 Define a new character set.
2594 This function is for use with Mule support.
2595 NAME is a symbol, the name by which the character set is normally referred.
2596 DOC-STRING is a string describing the character set.
2597 PROPS is a property list, describing the specific nature of the
2598 character set. Recognized properties are:
2600 'short-name Short version of the charset name (ex: Latin-1)
2601 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
2602 'registry A regular expression matching the font registry field for
2604 'dimension Number of octets used to index a character in this charset.
2605 Either 1 or 2. Defaults to 1.
2606 'columns Number of columns used to display a character in this charset.
2607 Only used in TTY mode. (Under X, the actual width of a
2608 character can be derived from the font used to display the
2609 characters.) If unspecified, defaults to the dimension
2610 (this is almost always the correct value).
2611 'chars Number of characters in each dimension (94 or 96).
2612 Defaults to 94. Note that if the dimension is 2, the
2613 character set thus described is 94x94 or 96x96.
2614 'final Final byte of ISO 2022 escape sequence. Must be
2615 supplied. Each combination of (DIMENSION, CHARS) defines a
2616 separate namespace for final bytes. Note that ISO
2617 2022 restricts the final byte to the range
2618 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2619 dimension == 2. Note also that final bytes in the range
2620 0x30 - 0x3F are reserved for user-defined (not official)
2622 'graphic 0 (use left half of font on output) or 1 (use right half
2623 of font on output). Defaults to 0. For example, for
2624 a font whose registry is ISO8859-1, the left half
2625 (octets 0x20 - 0x7F) is the `ascii' character set, while
2626 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2627 character set. With 'graphic set to 0, the octets
2628 will have their high bit cleared; with it set to 1,
2629 the octets will have their high bit set.
2630 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2632 'ccl-program A compiled CCL program used to convert a character in
2633 this charset into an index into the font. This is in
2634 addition to the 'graphic property. The CCL program
2635 is passed the octets of the character, with the high
2636 bit cleared and set depending upon whether the value
2637 of the 'graphic property is 0 or 1.
2639 (name, doc_string, props))
2641 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2642 int direction = CHARSET_LEFT_TO_RIGHT;
2643 Lisp_Object registry = Qnil;
2644 Lisp_Object charset;
2645 Lisp_Object ccl_program = Qnil;
2646 Lisp_Object short_name = Qnil, long_name = Qnil;
2647 int byte_offset = -1;
2649 CHECK_SYMBOL (name);
2650 if (!NILP (doc_string))
2651 CHECK_STRING (doc_string);
2653 charset = Ffind_charset (name);
2654 if (!NILP (charset))
2655 signal_simple_error ("Cannot redefine existing charset", name);
2658 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
2660 if (EQ (keyword, Qshort_name))
2662 CHECK_STRING (value);
2666 if (EQ (keyword, Qlong_name))
2668 CHECK_STRING (value);
2672 else if (EQ (keyword, Qdimension))
2675 dimension = XINT (value);
2676 if (dimension < 1 || dimension > 2)
2677 signal_simple_error ("Invalid value for 'dimension", value);
2680 else if (EQ (keyword, Qchars))
2683 chars = XINT (value);
2684 if (chars != 94 && chars != 96)
2685 signal_simple_error ("Invalid value for 'chars", value);
2688 else if (EQ (keyword, Qcolumns))
2691 columns = XINT (value);
2692 if (columns != 1 && columns != 2)
2693 signal_simple_error ("Invalid value for 'columns", value);
2696 else if (EQ (keyword, Qgraphic))
2699 graphic = XINT (value);
2701 if (graphic < 0 || graphic > 2)
2703 if (graphic < 0 || graphic > 1)
2705 signal_simple_error ("Invalid value for 'graphic", value);
2708 else if (EQ (keyword, Qregistry))
2710 CHECK_STRING (value);
2714 else if (EQ (keyword, Qdirection))
2716 if (EQ (value, Ql2r))
2717 direction = CHARSET_LEFT_TO_RIGHT;
2718 else if (EQ (value, Qr2l))
2719 direction = CHARSET_RIGHT_TO_LEFT;
2721 signal_simple_error ("Invalid value for 'direction", value);
2724 else if (EQ (keyword, Qfinal))
2726 CHECK_CHAR_COERCE_INT (value);
2727 final = XCHAR (value);
2728 if (final < '0' || final > '~')
2729 signal_simple_error ("Invalid value for 'final", value);
2732 else if (EQ (keyword, Qccl_program))
2734 struct ccl_program test_ccl;
2736 if (setup_ccl_program (&test_ccl, value) < 0)
2737 signal_simple_error ("Invalid value for 'ccl-program", value);
2738 ccl_program = value;
2742 signal_simple_error ("Unrecognized property", keyword);
2747 error ("'final must be specified");
2748 if (dimension == 2 && final > 0x5F)
2750 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2753 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2754 CHARSET_LEFT_TO_RIGHT)) ||
2755 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2756 CHARSET_RIGHT_TO_LEFT)))
2758 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2760 id = get_unallocated_leading_byte (dimension);
2762 if (NILP (doc_string))
2763 doc_string = build_string ("");
2765 if (NILP (registry))
2766 registry = build_string ("");
2768 if (NILP (short_name))
2769 XSETSTRING (short_name, XSYMBOL (name)->name);
2771 if (NILP (long_name))
2772 long_name = doc_string;
2775 columns = dimension;
2777 if (byte_offset < 0)
2781 else if (chars == 96)
2787 charset = make_charset (id, name, chars, dimension, columns, graphic,
2788 final, direction, short_name, long_name,
2789 doc_string, registry,
2790 Qnil, 0, 0, 0, byte_offset);
2791 if (!NILP (ccl_program))
2792 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2796 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2798 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2799 NEW-NAME is the name of the new charset. Return the new charset.
2801 (charset, new_name))
2803 Lisp_Object new_charset = Qnil;
2804 int id, chars, dimension, columns, graphic, final;
2806 Lisp_Object registry, doc_string, short_name, long_name;
2809 charset = Fget_charset (charset);
2810 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2811 signal_simple_error ("Charset already has reverse-direction charset",
2814 CHECK_SYMBOL (new_name);
2815 if (!NILP (Ffind_charset (new_name)))
2816 signal_simple_error ("Cannot redefine existing charset", new_name);
2818 cs = XCHARSET (charset);
2820 chars = CHARSET_CHARS (cs);
2821 dimension = CHARSET_DIMENSION (cs);
2822 columns = CHARSET_COLUMNS (cs);
2823 id = get_unallocated_leading_byte (dimension);
2825 graphic = CHARSET_GRAPHIC (cs);
2826 final = CHARSET_FINAL (cs);
2827 direction = CHARSET_RIGHT_TO_LEFT;
2828 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2829 direction = CHARSET_LEFT_TO_RIGHT;
2830 doc_string = CHARSET_DOC_STRING (cs);
2831 short_name = CHARSET_SHORT_NAME (cs);
2832 long_name = CHARSET_LONG_NAME (cs);
2833 registry = CHARSET_REGISTRY (cs);
2835 new_charset = make_charset (id, new_name, chars, dimension, columns,
2836 graphic, final, direction, short_name, long_name,
2837 doc_string, registry,
2839 CHARSET_DECODING_TABLE(cs),
2840 CHARSET_UCS_MIN(cs),
2841 CHARSET_UCS_MAX(cs),
2842 CHARSET_CODE_OFFSET(cs),
2843 CHARSET_BYTE_OFFSET(cs)
2849 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2850 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2855 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2856 Define symbol ALIAS as an alias for CHARSET.
2860 CHECK_SYMBOL (alias);
2861 charset = Fget_charset (charset);
2862 return Fputhash (alias, charset, Vcharset_hash_table);
2865 /* #### Reverse direction charsets not yet implemented. */
2867 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2869 Return the reverse-direction charset parallel to CHARSET, if any.
2870 This is the charset with the same properties (in particular, the same
2871 dimension, number of characters per dimension, and final byte) as
2872 CHARSET but whose characters are displayed in the opposite direction.
2876 charset = Fget_charset (charset);
2877 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2881 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2882 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2883 If DIRECTION is omitted, both directions will be checked (left-to-right
2884 will be returned if character sets exist for both directions).
2886 (dimension, chars, final, direction))
2888 int dm, ch, fi, di = -1;
2889 Lisp_Object obj = Qnil;
2891 CHECK_INT (dimension);
2892 dm = XINT (dimension);
2893 if (dm < 1 || dm > 2)
2894 signal_simple_error ("Invalid value for DIMENSION", dimension);
2898 if (ch != 94 && ch != 96)
2899 signal_simple_error ("Invalid value for CHARS", chars);
2901 CHECK_CHAR_COERCE_INT (final);
2903 if (fi < '0' || fi > '~')
2904 signal_simple_error ("Invalid value for FINAL", final);
2906 if (EQ (direction, Ql2r))
2907 di = CHARSET_LEFT_TO_RIGHT;
2908 else if (EQ (direction, Qr2l))
2909 di = CHARSET_RIGHT_TO_LEFT;
2910 else if (!NILP (direction))
2911 signal_simple_error ("Invalid value for DIRECTION", direction);
2913 if (dm == 2 && fi > 0x5F)
2915 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2919 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
2921 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
2924 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
2927 return XCHARSET_NAME (obj);
2931 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2932 Return short name of CHARSET.
2936 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2939 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2940 Return long name of CHARSET.
2944 return XCHARSET_LONG_NAME (Fget_charset (charset));
2947 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2948 Return description of CHARSET.
2952 return XCHARSET_DOC_STRING (Fget_charset (charset));
2955 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2956 Return dimension of CHARSET.
2960 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2963 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2964 Return property PROP of CHARSET, a charset object or symbol naming a charset.
2965 Recognized properties are those listed in `make-charset', as well as
2966 'name and 'doc-string.
2972 charset = Fget_charset (charset);
2973 cs = XCHARSET (charset);
2975 CHECK_SYMBOL (prop);
2976 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2977 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2978 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2979 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2980 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2981 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2982 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2983 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2984 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2985 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2986 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2987 if (EQ (prop, Qdirection))
2988 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2989 if (EQ (prop, Qreverse_direction_charset))
2991 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2992 /* #### Is this translation OK? If so, error checking sufficient? */
2993 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
2995 signal_simple_error ("Unrecognized charset property name", prop);
2996 return Qnil; /* not reached */
2999 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
3000 Return charset identification number of CHARSET.
3004 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
3007 /* #### We need to figure out which properties we really want to
3010 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
3011 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
3013 (charset, ccl_program))
3015 struct ccl_program test_ccl;
3017 charset = Fget_charset (charset);
3018 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
3019 signal_simple_error ("Invalid ccl-program", ccl_program);
3020 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
3025 invalidate_charset_font_caches (Lisp_Object charset)
3027 /* Invalidate font cache entries for charset on all devices. */
3028 Lisp_Object devcons, concons, hash_table;
3029 DEVICE_LOOP_NO_BREAK (devcons, concons)
3031 struct device *d = XDEVICE (XCAR (devcons));
3032 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
3033 if (!UNBOUNDP (hash_table))
3034 Fclrhash (hash_table);
3038 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
3039 Set the 'registry property of CHARSET to REGISTRY.
3041 (charset, registry))
3043 charset = Fget_charset (charset);
3044 CHECK_STRING (registry);
3045 XCHARSET_REGISTRY (charset) = registry;
3046 invalidate_charset_font_caches (charset);
3047 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
3052 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
3053 Return mapping-table of CHARSET.
3057 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
3060 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
3061 Set mapping-table of CHARSET to TABLE.
3065 struct Lisp_Charset *cs;
3069 charset = Fget_charset (charset);
3070 cs = XCHARSET (charset);
3074 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
3075 make_vector_newer (CHARSET_DECODING_TABLE(cs));
3076 CHARSET_DECODING_TABLE(cs) = Qnil;
3079 else if (VECTORP (table))
3081 int ccs_len = CHARSET_BYTE_SIZE (cs);
3082 int ret = decoding_table_check_elements (table,
3083 CHARSET_DIMENSION (cs),
3088 signal_simple_error ("Too big table", table);
3090 signal_simple_error ("Invalid element is found", table);
3092 signal_simple_error ("Something wrong", table);
3094 CHARSET_DECODING_TABLE(cs) = Qnil;
3097 signal_error (Qwrong_type_argument,
3098 list2 (build_translated_string ("vector-or-nil-p"),
3101 byte_offset = CHARSET_BYTE_OFFSET (cs);
3102 switch (CHARSET_DIMENSION (cs))
3105 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3107 Lisp_Object c = XVECTOR_DATA(table)[i];
3110 put_char_ccs_code_point (c, charset,
3111 make_int (i + byte_offset));
3115 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3117 Lisp_Object v = XVECTOR_DATA(table)[i];
3123 for (j = 0; j < XVECTOR_LENGTH (v); j++)
3125 Lisp_Object c = XVECTOR_DATA(v)[j];
3128 put_char_ccs_code_point
3130 make_int ( ( (i + byte_offset) << 8 )
3136 put_char_ccs_code_point (v, charset,
3137 make_int (i + byte_offset));
3146 /************************************************************************/
3147 /* Lisp primitives for working with characters */
3148 /************************************************************************/
3151 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
3152 Make a character from CHARSET and code-point CODE.
3158 charset = Fget_charset (charset);
3161 if (XCHARSET_GRAPHIC (charset) == 1)
3163 c = DECODE_CHAR (charset, c);
3164 return c ? make_char (c) : Qnil;
3167 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
3168 Make a builtin character from CHARSET and code-point CODE.
3174 charset = Fget_charset (charset);
3176 if (EQ (charset, Vcharset_latin_viscii))
3178 Lisp_Object chr = Fdecode_char (charset, code);
3184 (ret = Fget_char_attribute (chr,
3185 Vcharset_latin_viscii_lower,
3188 charset = Vcharset_latin_viscii_lower;
3192 (ret = Fget_char_attribute (chr,
3193 Vcharset_latin_viscii_upper,
3196 charset = Vcharset_latin_viscii_upper;
3203 if (XCHARSET_GRAPHIC (charset) == 1)
3206 c = decode_builtin_char (charset, c);
3207 return c >= 0 ? make_char (c) : Fdecode_char (charset, code);
3211 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
3212 Make a character from CHARSET and octets ARG1 and ARG2.
3213 ARG2 is required only for characters from two-dimensional charsets.
3214 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
3215 character s with caron.
3217 (charset, arg1, arg2))
3221 int lowlim, highlim;
3223 charset = Fget_charset (charset);
3224 cs = XCHARSET (charset);
3226 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
3227 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
3229 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
3231 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
3232 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
3235 /* It is useful (and safe, according to Olivier Galibert) to strip
3236 the 8th bit off ARG1 and ARG2 because it allows programmers to
3237 write (make-char 'latin-iso8859-2 CODE) where code is the actual
3238 Latin 2 code of the character. */
3246 if (a1 < lowlim || a1 > highlim)
3247 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
3249 if (CHARSET_DIMENSION (cs) == 1)
3253 ("Charset is of dimension one; second octet must be nil", arg2);
3254 return make_char (MAKE_CHAR (charset, a1, 0));
3263 a2 = XINT (arg2) & 0x7f;
3265 if (a2 < lowlim || a2 > highlim)
3266 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
3268 return make_char (MAKE_CHAR (charset, a1, a2));
3271 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
3272 Return the character set of CHARACTER.
3276 CHECK_CHAR_COERCE_INT (character);
3278 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
3281 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
3282 Return the octet numbered N (should be 0 or 1) of CHARACTER.
3283 N defaults to 0 if omitted.
3287 Lisp_Object charset;
3290 CHECK_CHAR_COERCE_INT (character);
3292 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
3294 if (NILP (n) || EQ (n, Qzero))
3295 return make_int (octet0);
3296 else if (EQ (n, make_int (1)))
3297 return make_int (octet1);
3299 signal_simple_error ("Octet number must be 0 or 1", n);
3302 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
3303 Return list of charset and one or two position-codes of CHARACTER.
3307 /* This function can GC */
3308 struct gcpro gcpro1, gcpro2;
3309 Lisp_Object charset = Qnil;
3310 Lisp_Object rc = Qnil;
3318 GCPRO2 (charset, rc);
3319 CHECK_CHAR_COERCE_INT (character);
3322 code_point = ENCODE_CHAR (XCHAR (character), charset);
3323 dimension = XCHARSET_DIMENSION (charset);
3324 while (dimension > 0)
3326 rc = Fcons (make_int (code_point & 255), rc);
3330 rc = Fcons (XCHARSET_NAME (charset), rc);
3332 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
3334 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
3336 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
3340 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
3349 #ifdef ENABLE_COMPOSITE_CHARS
3350 /************************************************************************/
3351 /* composite character functions */
3352 /************************************************************************/
3355 lookup_composite_char (Bufbyte *str, int len)
3357 Lisp_Object lispstr = make_string (str, len);
3358 Lisp_Object ch = Fgethash (lispstr,
3359 Vcomposite_char_string2char_hash_table,
3365 if (composite_char_row_next >= 128)
3366 signal_simple_error ("No more composite chars available", lispstr);
3367 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
3368 composite_char_col_next);
3369 Fputhash (make_char (emch), lispstr,
3370 Vcomposite_char_char2string_hash_table);
3371 Fputhash (lispstr, make_char (emch),
3372 Vcomposite_char_string2char_hash_table);
3373 composite_char_col_next++;
3374 if (composite_char_col_next >= 128)
3376 composite_char_col_next = 32;
3377 composite_char_row_next++;
3386 composite_char_string (Emchar ch)
3388 Lisp_Object str = Fgethash (make_char (ch),
3389 Vcomposite_char_char2string_hash_table,
3391 assert (!UNBOUNDP (str));
3395 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
3396 Convert a string into a single composite character.
3397 The character is the result of overstriking all the characters in
3402 CHECK_STRING (string);
3403 return make_char (lookup_composite_char (XSTRING_DATA (string),
3404 XSTRING_LENGTH (string)));
3407 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3408 Return a string of the characters comprising a composite character.
3416 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
3417 signal_simple_error ("Must be composite char", ch);
3418 return composite_char_string (emch);
3420 #endif /* ENABLE_COMPOSITE_CHARS */
3423 /************************************************************************/
3424 /* initialization */
3425 /************************************************************************/
3428 syms_of_mule_charset (void)
3431 INIT_LRECORD_IMPLEMENTATION (uint8_byte_table);
3432 INIT_LRECORD_IMPLEMENTATION (uint16_byte_table);
3433 INIT_LRECORD_IMPLEMENTATION (byte_table);
3434 INIT_LRECORD_IMPLEMENTATION (char_id_table);
3436 INIT_LRECORD_IMPLEMENTATION (charset);
3438 DEFSUBR (Fcharsetp);
3439 DEFSUBR (Ffind_charset);
3440 DEFSUBR (Fget_charset);
3441 DEFSUBR (Fcharset_list);
3442 DEFSUBR (Fcharset_name);
3443 DEFSUBR (Fmake_charset);
3444 DEFSUBR (Fmake_reverse_direction_charset);
3445 /* DEFSUBR (Freverse_direction_charset); */
3446 DEFSUBR (Fdefine_charset_alias);
3447 DEFSUBR (Fcharset_from_attributes);
3448 DEFSUBR (Fcharset_short_name);
3449 DEFSUBR (Fcharset_long_name);
3450 DEFSUBR (Fcharset_description);
3451 DEFSUBR (Fcharset_dimension);
3452 DEFSUBR (Fcharset_property);
3453 DEFSUBR (Fcharset_id);
3454 DEFSUBR (Fset_charset_ccl_program);
3455 DEFSUBR (Fset_charset_registry);
3457 DEFSUBR (Fchar_attribute_list);
3458 DEFSUBR (Ffind_char_attribute_table);
3459 DEFSUBR (Fchar_attribute_alist);
3460 DEFSUBR (Fget_char_attribute);
3461 DEFSUBR (Fput_char_attribute);
3462 DEFSUBR (Fremove_char_attribute);
3463 DEFSUBR (Fdefine_char);
3464 DEFSUBR (Fchar_variants);
3465 DEFSUBR (Fget_composite_char);
3466 DEFSUBR (Fcharset_mapping_table);
3467 DEFSUBR (Fset_charset_mapping_table);
3471 DEFSUBR (Fdecode_char);
3472 DEFSUBR (Fdecode_builtin_char);
3474 DEFSUBR (Fmake_char);
3475 DEFSUBR (Fchar_charset);
3476 DEFSUBR (Fchar_octet);
3477 DEFSUBR (Fsplit_char);
3479 #ifdef ENABLE_COMPOSITE_CHARS
3480 DEFSUBR (Fmake_composite_char);
3481 DEFSUBR (Fcomposite_char_string);
3484 defsymbol (&Qcharsetp, "charsetp");
3485 defsymbol (&Qregistry, "registry");
3486 defsymbol (&Qfinal, "final");
3487 defsymbol (&Qgraphic, "graphic");
3488 defsymbol (&Qdirection, "direction");
3489 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3490 defsymbol (&Qshort_name, "short-name");
3491 defsymbol (&Qlong_name, "long-name");
3493 defsymbol (&Ql2r, "l2r");
3494 defsymbol (&Qr2l, "r2l");
3496 /* Charsets, compatible with FSF 20.3
3497 Naming convention is Script-Charset[-Edition] */
3498 defsymbol (&Qascii, "ascii");
3499 defsymbol (&Qcontrol_1, "control-1");
3500 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3501 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3502 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3503 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3504 defsymbol (&Qthai_tis620, "thai-tis620");
3505 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3506 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3507 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3508 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3509 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3510 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3511 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3512 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
3513 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
3514 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
3515 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
3516 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
3517 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
3518 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
3519 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
3520 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
3522 defsymbol (&Q_ucs, "->ucs");
3523 defsymbol (&Q_decomposition, "->decomposition");
3524 defsymbol (&Qcompat, "compat");
3525 defsymbol (&Qisolated, "isolated");
3526 defsymbol (&Qinitial, "initial");
3527 defsymbol (&Qmedial, "medial");
3528 defsymbol (&Qfinal, "final");
3529 defsymbol (&Qvertical, "vertical");
3530 defsymbol (&QnoBreak, "noBreak");
3531 defsymbol (&Qfraction, "fraction");
3532 defsymbol (&Qsuper, "super");
3533 defsymbol (&Qsub, "sub");
3534 defsymbol (&Qcircle, "circle");
3535 defsymbol (&Qsquare, "square");
3536 defsymbol (&Qwide, "wide");
3537 defsymbol (&Qnarrow, "narrow");
3538 defsymbol (&Qsmall, "small");
3539 defsymbol (&Qfont, "font");
3540 defsymbol (&Qucs, "ucs");
3541 defsymbol (&Qucs_bmp, "ucs-bmp");
3542 defsymbol (&Qucs_cns, "ucs-cns");
3543 defsymbol (&Qlatin_viscii, "latin-viscii");
3544 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3545 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3546 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3547 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3548 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3549 defsymbol (&Qideograph_gt, "ideograph-gt");
3550 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
3551 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
3552 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
3553 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
3554 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
3555 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
3556 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
3557 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
3558 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
3559 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
3560 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
3561 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
3562 defsymbol (&Qchinese_big5, "chinese-big5");
3563 defsymbol (&Qmojikyo, "mojikyo");
3564 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
3565 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
3566 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
3567 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
3568 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
3569 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
3570 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
3571 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
3572 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
3573 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
3574 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
3575 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
3576 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
3577 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
3578 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
3579 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
3580 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
3581 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
3582 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
3583 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
3584 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
3585 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
3586 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3588 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3589 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3591 defsymbol (&Qcomposite, "composite");
3595 vars_of_mule_charset (void)
3602 chlook = xnew (struct charset_lookup);
3603 dumpstruct (&chlook, &charset_lookup_description);
3605 /* Table of charsets indexed by leading byte. */
3606 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3607 chlook->charset_by_leading_byte[i] = Qnil;
3610 /* Table of charsets indexed by type/final-byte. */
3611 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3612 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3613 chlook->charset_by_attributes[i][j] = Qnil;
3615 /* Table of charsets indexed by type/final-byte/direction. */
3616 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3617 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3618 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3619 chlook->charset_by_attributes[i][j][k] = Qnil;
3623 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3625 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3626 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3630 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3631 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3632 Leading-code of private TYPE9N charset of column-width 1.
3634 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3638 Vutf_2000_version = build_string("0.17 (Hōryūji)");
3639 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
3640 Version number of UTF-2000.
3643 staticpro (&Vcharacter_composition_table);
3644 Vcharacter_composition_table = make_char_id_table (Qnil);
3646 staticpro (&Vcharacter_variant_table);
3647 Vcharacter_variant_table = make_char_id_table (Qnil);
3649 Vdefault_coded_charset_priority_list = Qnil;
3650 DEFVAR_LISP ("default-coded-charset-priority-list",
3651 &Vdefault_coded_charset_priority_list /*
3652 Default order of preferred coded-character-sets.
3658 complex_vars_of_mule_charset (void)
3660 staticpro (&Vcharset_hash_table);
3661 Vcharset_hash_table =
3662 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3664 /* Predefined character sets. We store them into variables for
3668 staticpro (&Vchar_attribute_hash_table);
3669 Vchar_attribute_hash_table
3670 = make_lisp_hash_table (16, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3672 staticpro (&Vcharset_ucs);
3674 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
3675 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3676 build_string ("UCS"),
3677 build_string ("UCS"),
3678 build_string ("ISO/IEC 10646"),
3680 Qnil, 0, 0xFFFFFFF, 0, 0);
3681 staticpro (&Vcharset_ucs_bmp);
3683 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3684 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3685 build_string ("BMP"),
3686 build_string ("BMP"),
3687 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3688 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
3689 Qnil, 0, 0xFFFF, 0, 0);
3690 staticpro (&Vcharset_ucs_cns);
3692 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
3693 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3694 build_string ("UCS for CNS"),
3695 build_string ("UCS for CNS 11643"),
3696 build_string ("ISO/IEC 10646 for CNS 11643"),
3700 # define MIN_CHAR_THAI 0
3701 # define MAX_CHAR_THAI 0
3702 # define MIN_CHAR_HEBREW 0
3703 # define MAX_CHAR_HEBREW 0
3704 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3705 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3707 staticpro (&Vcharset_ascii);
3709 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3710 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3711 build_string ("ASCII"),
3712 build_string ("ASCII)"),
3713 build_string ("ASCII (ISO646 IRV)"),
3714 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3715 Qnil, 0, 0x7F, 0, 0);
3716 staticpro (&Vcharset_control_1);
3717 Vcharset_control_1 =
3718 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3719 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3720 build_string ("C1"),
3721 build_string ("Control characters"),
3722 build_string ("Control characters 128-191"),
3724 Qnil, 0x80, 0x9F, 0, 0);
3725 staticpro (&Vcharset_latin_iso8859_1);
3726 Vcharset_latin_iso8859_1 =
3727 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3728 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3729 build_string ("Latin-1"),
3730 build_string ("ISO8859-1 (Latin-1)"),
3731 build_string ("ISO8859-1 (Latin-1)"),
3732 build_string ("iso8859-1"),
3733 Qnil, 0xA0, 0xFF, 0, 32);
3734 staticpro (&Vcharset_latin_iso8859_2);
3735 Vcharset_latin_iso8859_2 =
3736 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3737 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3738 build_string ("Latin-2"),
3739 build_string ("ISO8859-2 (Latin-2)"),
3740 build_string ("ISO8859-2 (Latin-2)"),
3741 build_string ("iso8859-2"),
3743 staticpro (&Vcharset_latin_iso8859_3);
3744 Vcharset_latin_iso8859_3 =
3745 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3746 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3747 build_string ("Latin-3"),
3748 build_string ("ISO8859-3 (Latin-3)"),
3749 build_string ("ISO8859-3 (Latin-3)"),
3750 build_string ("iso8859-3"),
3752 staticpro (&Vcharset_latin_iso8859_4);
3753 Vcharset_latin_iso8859_4 =
3754 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3755 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3756 build_string ("Latin-4"),
3757 build_string ("ISO8859-4 (Latin-4)"),
3758 build_string ("ISO8859-4 (Latin-4)"),
3759 build_string ("iso8859-4"),
3761 staticpro (&Vcharset_thai_tis620);
3762 Vcharset_thai_tis620 =
3763 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3764 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3765 build_string ("TIS620"),
3766 build_string ("TIS620 (Thai)"),
3767 build_string ("TIS620.2529 (Thai)"),
3768 build_string ("tis620"),
3769 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3770 staticpro (&Vcharset_greek_iso8859_7);
3771 Vcharset_greek_iso8859_7 =
3772 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3773 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3774 build_string ("ISO8859-7"),
3775 build_string ("ISO8859-7 (Greek)"),
3776 build_string ("ISO8859-7 (Greek)"),
3777 build_string ("iso8859-7"),
3779 0 /* MIN_CHAR_GREEK */,
3780 0 /* MAX_CHAR_GREEK */, 0, 32);
3781 staticpro (&Vcharset_arabic_iso8859_6);
3782 Vcharset_arabic_iso8859_6 =
3783 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3784 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3785 build_string ("ISO8859-6"),
3786 build_string ("ISO8859-6 (Arabic)"),
3787 build_string ("ISO8859-6 (Arabic)"),
3788 build_string ("iso8859-6"),
3790 staticpro (&Vcharset_hebrew_iso8859_8);
3791 Vcharset_hebrew_iso8859_8 =
3792 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3793 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3794 build_string ("ISO8859-8"),
3795 build_string ("ISO8859-8 (Hebrew)"),
3796 build_string ("ISO8859-8 (Hebrew)"),
3797 build_string ("iso8859-8"),
3798 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3799 staticpro (&Vcharset_katakana_jisx0201);
3800 Vcharset_katakana_jisx0201 =
3801 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3802 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3803 build_string ("JISX0201 Kana"),
3804 build_string ("JISX0201.1976 (Japanese Kana)"),
3805 build_string ("JISX0201.1976 Japanese Kana"),
3806 build_string ("jisx0201\\.1976"),
3808 staticpro (&Vcharset_latin_jisx0201);
3809 Vcharset_latin_jisx0201 =
3810 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3811 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3812 build_string ("JISX0201 Roman"),
3813 build_string ("JISX0201.1976 (Japanese Roman)"),
3814 build_string ("JISX0201.1976 Japanese Roman"),
3815 build_string ("jisx0201\\.1976"),
3817 staticpro (&Vcharset_cyrillic_iso8859_5);
3818 Vcharset_cyrillic_iso8859_5 =
3819 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3820 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3821 build_string ("ISO8859-5"),
3822 build_string ("ISO8859-5 (Cyrillic)"),
3823 build_string ("ISO8859-5 (Cyrillic)"),
3824 build_string ("iso8859-5"),
3826 0 /* MIN_CHAR_CYRILLIC */,
3827 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3828 staticpro (&Vcharset_latin_iso8859_9);
3829 Vcharset_latin_iso8859_9 =
3830 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3831 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3832 build_string ("Latin-5"),
3833 build_string ("ISO8859-9 (Latin-5)"),
3834 build_string ("ISO8859-9 (Latin-5)"),
3835 build_string ("iso8859-9"),
3837 staticpro (&Vcharset_japanese_jisx0208_1978);
3838 Vcharset_japanese_jisx0208_1978 =
3839 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3840 Qjapanese_jisx0208_1978, 94, 2,
3841 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3842 build_string ("JIS X0208:1978"),
3843 build_string ("JIS X0208:1978 (Japanese)"),
3845 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3846 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3848 staticpro (&Vcharset_chinese_gb2312);
3849 Vcharset_chinese_gb2312 =
3850 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3851 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3852 build_string ("GB2312"),
3853 build_string ("GB2312)"),
3854 build_string ("GB2312 Chinese simplified"),
3855 build_string ("gb2312"),
3857 staticpro (&Vcharset_chinese_gb12345);
3858 Vcharset_chinese_gb12345 =
3859 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
3860 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3861 build_string ("G1"),
3862 build_string ("GB 12345)"),
3863 build_string ("GB 12345-1990"),
3864 build_string ("GB12345\\(\\.1990\\)?-0"),
3866 staticpro (&Vcharset_japanese_jisx0208);
3867 Vcharset_japanese_jisx0208 =
3868 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3869 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3870 build_string ("JISX0208"),
3871 build_string ("JIS X0208:1983 (Japanese)"),
3872 build_string ("JIS X0208:1983 Japanese Kanji"),
3873 build_string ("jisx0208\\.1983"),
3876 staticpro (&Vcharset_japanese_jisx0208_1990);
3877 Vcharset_japanese_jisx0208_1990 =
3878 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3879 Qjapanese_jisx0208_1990, 94, 2,
3880 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3881 build_string ("JISX0208-1990"),
3882 build_string ("JIS X0208:1990 (Japanese)"),
3883 build_string ("JIS X0208:1990 Japanese Kanji"),
3884 build_string ("jisx0208\\.1990"),
3886 MIN_CHAR_JIS_X0208_1990,
3887 MAX_CHAR_JIS_X0208_1990, 0, 33);
3889 staticpro (&Vcharset_korean_ksc5601);
3890 Vcharset_korean_ksc5601 =
3891 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3892 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3893 build_string ("KSC5601"),
3894 build_string ("KSC5601 (Korean"),
3895 build_string ("KSC5601 Korean Hangul and Hanja"),
3896 build_string ("ksc5601"),
3898 staticpro (&Vcharset_japanese_jisx0212);
3899 Vcharset_japanese_jisx0212 =
3900 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3901 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3902 build_string ("JISX0212"),
3903 build_string ("JISX0212 (Japanese)"),
3904 build_string ("JISX0212 Japanese Supplement"),
3905 build_string ("jisx0212"),
3908 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3909 staticpro (&Vcharset_chinese_cns11643_1);
3910 Vcharset_chinese_cns11643_1 =
3911 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3912 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3913 build_string ("CNS11643-1"),
3914 build_string ("CNS11643-1 (Chinese traditional)"),
3916 ("CNS 11643 Plane 1 Chinese traditional"),
3917 build_string (CHINESE_CNS_PLANE_RE("1")),
3919 staticpro (&Vcharset_chinese_cns11643_2);
3920 Vcharset_chinese_cns11643_2 =
3921 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3922 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3923 build_string ("CNS11643-2"),
3924 build_string ("CNS11643-2 (Chinese traditional)"),
3926 ("CNS 11643 Plane 2 Chinese traditional"),
3927 build_string (CHINESE_CNS_PLANE_RE("2")),
3930 staticpro (&Vcharset_latin_tcvn5712);
3931 Vcharset_latin_tcvn5712 =
3932 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3933 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3934 build_string ("TCVN 5712"),
3935 build_string ("TCVN 5712 (VSCII-2)"),
3936 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3937 build_string ("tcvn5712-1"),
3939 staticpro (&Vcharset_latin_viscii_lower);
3940 Vcharset_latin_viscii_lower =
3941 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3942 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3943 build_string ("VISCII lower"),
3944 build_string ("VISCII lower (Vietnamese)"),
3945 build_string ("VISCII lower (Vietnamese)"),
3946 build_string ("MULEVISCII-LOWER"),
3948 staticpro (&Vcharset_latin_viscii_upper);
3949 Vcharset_latin_viscii_upper =
3950 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3951 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3952 build_string ("VISCII upper"),
3953 build_string ("VISCII upper (Vietnamese)"),
3954 build_string ("VISCII upper (Vietnamese)"),
3955 build_string ("MULEVISCII-UPPER"),
3957 staticpro (&Vcharset_latin_viscii);
3958 Vcharset_latin_viscii =
3959 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3960 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3961 build_string ("VISCII"),
3962 build_string ("VISCII 1.1 (Vietnamese)"),
3963 build_string ("VISCII 1.1 (Vietnamese)"),
3964 build_string ("VISCII1\\.1"),
3966 staticpro (&Vcharset_chinese_big5);
3967 Vcharset_chinese_big5 =
3968 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3969 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3970 build_string ("Big5"),
3971 build_string ("Big5"),
3972 build_string ("Big5 Chinese traditional"),
3973 build_string ("big5"),
3975 staticpro (&Vcharset_ideograph_gt);
3976 Vcharset_ideograph_gt =
3977 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
3978 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3979 build_string ("GT"),
3980 build_string ("GT"),
3981 build_string ("GT"),
3983 Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0);
3984 #define DEF_GT_PJ(n) \
3985 staticpro (&Vcharset_ideograph_gt_pj_##n); \
3986 Vcharset_ideograph_gt_pj_##n = \
3987 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
3988 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3989 build_string ("GT-PJ-"#n), \
3990 build_string ("GT (pseudo JIS encoding) part "#n), \
3991 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
3993 ("\\(GT2000PJ-"#n "\\|jisx0208\\.GT2000-"#n "\\)$"), \
4007 staticpro (&Vcharset_ideograph_daikanwa);
4008 Vcharset_ideograph_daikanwa =
4009 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
4010 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4011 build_string ("Daikanwa"),
4012 build_string ("Morohashi's Daikanwa"),
4013 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
4014 build_string ("Daikanwa"),
4015 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
4016 staticpro (&Vcharset_mojikyo);
4018 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
4019 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4020 build_string ("Mojikyo"),
4021 build_string ("Mojikyo"),
4022 build_string ("Konjaku-Mojikyo"),
4024 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
4025 staticpro (&Vcharset_mojikyo_2022_1);
4026 Vcharset_mojikyo_2022_1 =
4027 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
4028 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
4029 build_string ("Mojikyo-2022-1"),
4030 build_string ("Mojikyo ISO-2022 Part 1"),
4031 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
4035 #define DEF_MOJIKYO_PJ(n) \
4036 staticpro (&Vcharset_mojikyo_pj_##n); \
4037 Vcharset_mojikyo_pj_##n = \
4038 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
4039 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
4040 build_string ("Mojikyo-PJ-"#n), \
4041 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
4043 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
4045 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
4057 DEF_MOJIKYO_PJ (10);
4058 DEF_MOJIKYO_PJ (11);
4059 DEF_MOJIKYO_PJ (12);
4060 DEF_MOJIKYO_PJ (13);
4061 DEF_MOJIKYO_PJ (14);
4062 DEF_MOJIKYO_PJ (15);
4063 DEF_MOJIKYO_PJ (16);
4064 DEF_MOJIKYO_PJ (17);
4065 DEF_MOJIKYO_PJ (18);
4066 DEF_MOJIKYO_PJ (19);
4067 DEF_MOJIKYO_PJ (20);
4068 DEF_MOJIKYO_PJ (21);
4070 staticpro (&Vcharset_ethiopic_ucs);
4071 Vcharset_ethiopic_ucs =
4072 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
4073 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4074 build_string ("Ethiopic (UCS)"),
4075 build_string ("Ethiopic (UCS)"),
4076 build_string ("Ethiopic of UCS"),
4077 build_string ("Ethiopic-Unicode"),
4078 Qnil, 0x1200, 0x137F, 0x1200, 0);
4080 staticpro (&Vcharset_chinese_big5_1);
4081 Vcharset_chinese_big5_1 =
4082 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
4083 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
4084 build_string ("Big5"),
4085 build_string ("Big5 (Level-1)"),
4087 ("Big5 Level-1 Chinese traditional"),
4088 build_string ("big5"),
4090 staticpro (&Vcharset_chinese_big5_2);
4091 Vcharset_chinese_big5_2 =
4092 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
4093 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
4094 build_string ("Big5"),
4095 build_string ("Big5 (Level-2)"),
4097 ("Big5 Level-2 Chinese traditional"),
4098 build_string ("big5"),
4101 #ifdef ENABLE_COMPOSITE_CHARS
4102 /* #### For simplicity, we put composite chars into a 96x96 charset.
4103 This is going to lead to problems because you can run out of
4104 room, esp. as we don't yet recycle numbers. */
4105 staticpro (&Vcharset_composite);
4106 Vcharset_composite =
4107 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
4108 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4109 build_string ("Composite"),
4110 build_string ("Composite characters"),
4111 build_string ("Composite characters"),
4114 /* #### not dumped properly */
4115 composite_char_row_next = 32;
4116 composite_char_col_next = 32;
4118 Vcomposite_char_string2char_hash_table =
4119 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
4120 Vcomposite_char_char2string_hash_table =
4121 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
4122 staticpro (&Vcomposite_char_string2char_hash_table);
4123 staticpro (&Vcomposite_char_char2string_hash_table);
4124 #endif /* ENABLE_COMPOSITE_CHARS */