1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_cns;
70 Lisp_Object Vcharset_ucs_big5;
71 Lisp_Object Vcharset_latin_viscii;
72 Lisp_Object Vcharset_latin_tcvn5712;
73 Lisp_Object Vcharset_latin_viscii_lower;
74 Lisp_Object Vcharset_latin_viscii_upper;
75 Lisp_Object Vcharset_chinese_big5;
76 Lisp_Object Vcharset_ideograph_gt;
77 Lisp_Object Vcharset_ideograph_gt_pj_1;
78 Lisp_Object Vcharset_ideograph_gt_pj_2;
79 Lisp_Object Vcharset_ideograph_gt_pj_3;
80 Lisp_Object Vcharset_ideograph_gt_pj_4;
81 Lisp_Object Vcharset_ideograph_gt_pj_5;
82 Lisp_Object Vcharset_ideograph_gt_pj_6;
83 Lisp_Object Vcharset_ideograph_gt_pj_7;
84 Lisp_Object Vcharset_ideograph_gt_pj_8;
85 Lisp_Object Vcharset_ideograph_gt_pj_9;
86 Lisp_Object Vcharset_ideograph_gt_pj_10;
87 Lisp_Object Vcharset_ideograph_gt_pj_11;
88 Lisp_Object Vcharset_ideograph_daikanwa;
89 Lisp_Object Vcharset_mojikyo;
90 Lisp_Object Vcharset_mojikyo_2022_1;
91 Lisp_Object Vcharset_mojikyo_pj_1;
92 Lisp_Object Vcharset_mojikyo_pj_2;
93 Lisp_Object Vcharset_mojikyo_pj_3;
94 Lisp_Object Vcharset_mojikyo_pj_4;
95 Lisp_Object Vcharset_mojikyo_pj_5;
96 Lisp_Object Vcharset_mojikyo_pj_6;
97 Lisp_Object Vcharset_mojikyo_pj_7;
98 Lisp_Object Vcharset_mojikyo_pj_8;
99 Lisp_Object Vcharset_mojikyo_pj_9;
100 Lisp_Object Vcharset_mojikyo_pj_10;
101 Lisp_Object Vcharset_mojikyo_pj_11;
102 Lisp_Object Vcharset_mojikyo_pj_12;
103 Lisp_Object Vcharset_mojikyo_pj_13;
104 Lisp_Object Vcharset_mojikyo_pj_14;
105 Lisp_Object Vcharset_mojikyo_pj_15;
106 Lisp_Object Vcharset_mojikyo_pj_16;
107 Lisp_Object Vcharset_mojikyo_pj_17;
108 Lisp_Object Vcharset_mojikyo_pj_18;
109 Lisp_Object Vcharset_mojikyo_pj_19;
110 Lisp_Object Vcharset_mojikyo_pj_20;
111 Lisp_Object Vcharset_mojikyo_pj_21;
112 Lisp_Object Vcharset_ethiopic_ucs;
114 Lisp_Object Vcharset_chinese_big5_1;
115 Lisp_Object Vcharset_chinese_big5_2;
117 #ifdef ENABLE_COMPOSITE_CHARS
118 Lisp_Object Vcharset_composite;
120 /* Hash tables for composite chars. One maps string representing
121 composed chars to their equivalent chars; one goes the
123 Lisp_Object Vcomposite_char_char2string_hash_table;
124 Lisp_Object Vcomposite_char_string2char_hash_table;
126 static int composite_char_row_next;
127 static int composite_char_col_next;
129 #endif /* ENABLE_COMPOSITE_CHARS */
131 struct charset_lookup *chlook;
133 static const struct lrecord_description charset_lookup_description_1[] = {
134 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
143 static const struct struct_description charset_lookup_description = {
144 sizeof (struct charset_lookup),
145 charset_lookup_description_1
149 /* Table of number of bytes in the string representation of a character
150 indexed by the first byte of that representation.
152 rep_bytes_by_first_byte(c) is more efficient than the equivalent
153 canonical computation:
155 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
157 const Bytecount rep_bytes_by_first_byte[0xA0] =
158 { /* 0x00 - 0x7f are for straight ASCII */
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
165 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
166 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
167 /* 0x80 - 0x8f are for Dimension-1 official charsets */
169 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
171 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
173 /* 0x90 - 0x9d are for Dimension-2 official charsets */
174 /* 0x9e is for Dimension-1 private charsets */
175 /* 0x9f is for Dimension-2 private charsets */
176 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
182 #define BT_UINT8_MIN 0
183 #define BT_UINT8_MAX (UCHAR_MAX - 3)
184 #define BT_UINT8_t (UCHAR_MAX - 2)
185 #define BT_UINT8_nil (UCHAR_MAX - 1)
186 #define BT_UINT8_unbound UCHAR_MAX
188 INLINE_HEADER int INT_UINT8_P (Lisp_Object obj);
189 INLINE_HEADER int UINT8_VALUE_P (Lisp_Object obj);
190 INLINE_HEADER unsigned char UINT8_ENCODE (Lisp_Object obj);
191 INLINE_HEADER Lisp_Object UINT8_DECODE (unsigned char n);
192 INLINE_HEADER unsigned short UINT8_TO_UINT16 (unsigned char n);
195 INT_UINT8_P (Lisp_Object obj)
199 int num = XINT (obj);
201 return (BT_UINT8_MIN <= num) && (num <= BT_UINT8_MAX);
208 UINT8_VALUE_P (Lisp_Object obj)
210 return EQ (obj, Qunbound)
211 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT8_P (obj);
214 INLINE_HEADER unsigned char
215 UINT8_ENCODE (Lisp_Object obj)
217 if (EQ (obj, Qunbound))
218 return BT_UINT8_unbound;
219 else if (EQ (obj, Qnil))
221 else if (EQ (obj, Qt))
227 INLINE_HEADER Lisp_Object
228 UINT8_DECODE (unsigned char n)
230 if (n == BT_UINT8_unbound)
232 else if (n == BT_UINT8_nil)
234 else if (n == BT_UINT8_t)
241 mark_uint8_byte_table (Lisp_Object obj)
247 print_uint8_byte_table (Lisp_Object obj,
248 Lisp_Object printcharfun, int escapeflag)
250 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
252 struct gcpro gcpro1, gcpro2;
253 GCPRO2 (obj, printcharfun);
255 write_c_string ("\n#<uint8-byte-table", printcharfun);
256 for (i = 0; i < 256; i++)
258 unsigned char n = bte->property[i];
260 write_c_string ("\n ", printcharfun);
261 write_c_string (" ", printcharfun);
262 if (n == BT_UINT8_unbound)
263 write_c_string ("void", printcharfun);
264 else if (n == BT_UINT8_nil)
265 write_c_string ("nil", printcharfun);
266 else if (n == BT_UINT8_t)
267 write_c_string ("t", printcharfun);
272 sprintf (buf, "%hd", n);
273 write_c_string (buf, printcharfun);
277 write_c_string (">", printcharfun);
281 uint8_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
283 Lisp_Uint8_Byte_Table *te1 = XUINT8_BYTE_TABLE (obj1);
284 Lisp_Uint8_Byte_Table *te2 = XUINT8_BYTE_TABLE (obj2);
287 for (i = 0; i < 256; i++)
288 if (te1->property[i] != te2->property[i])
294 uint8_byte_table_hash (Lisp_Object obj, int depth)
296 Lisp_Uint8_Byte_Table *te = XUINT8_BYTE_TABLE (obj);
300 for (i = 0; i < 256; i++)
301 hash = HASH2 (hash, te->property[i]);
305 DEFINE_LRECORD_IMPLEMENTATION ("uint8-byte-table", uint8_byte_table,
306 mark_uint8_byte_table,
307 print_uint8_byte_table,
308 0, uint8_byte_table_equal,
309 uint8_byte_table_hash,
310 0 /* uint8_byte_table_description */,
311 Lisp_Uint8_Byte_Table);
314 make_uint8_byte_table (unsigned char initval)
318 Lisp_Uint8_Byte_Table *cte;
320 cte = alloc_lcrecord_type (Lisp_Uint8_Byte_Table,
321 &lrecord_uint8_byte_table);
323 for (i = 0; i < 256; i++)
324 cte->property[i] = initval;
326 XSETUINT8_BYTE_TABLE (obj, cte);
331 uint8_byte_table_same_value_p (Lisp_Object obj)
333 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
334 unsigned char v0 = bte->property[0];
337 for (i = 1; i < 256; i++)
339 if (bte->property[i] != v0)
346 #define BT_UINT16_MIN 0
347 #define BT_UINT16_MAX (USHRT_MAX - 3)
348 #define BT_UINT16_t (USHRT_MAX - 2)
349 #define BT_UINT16_nil (USHRT_MAX - 1)
350 #define BT_UINT16_unbound USHRT_MAX
352 INLINE_HEADER int INT_UINT16_P (Lisp_Object obj);
353 INLINE_HEADER int UINT16_VALUE_P (Lisp_Object obj);
354 INLINE_HEADER unsigned short UINT16_ENCODE (Lisp_Object obj);
355 INLINE_HEADER Lisp_Object UINT16_DECODE (unsigned short us);
358 INT_UINT16_P (Lisp_Object obj)
362 int num = XINT (obj);
364 return (BT_UINT16_MIN <= num) && (num <= BT_UINT16_MAX);
371 UINT16_VALUE_P (Lisp_Object obj)
373 return EQ (obj, Qunbound)
374 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT16_P (obj);
377 INLINE_HEADER unsigned short
378 UINT16_ENCODE (Lisp_Object obj)
380 if (EQ (obj, Qunbound))
381 return BT_UINT16_unbound;
382 else if (EQ (obj, Qnil))
383 return BT_UINT16_nil;
384 else if (EQ (obj, Qt))
390 INLINE_HEADER Lisp_Object
391 UINT16_DECODE (unsigned short n)
393 if (n == BT_UINT16_unbound)
395 else if (n == BT_UINT16_nil)
397 else if (n == BT_UINT16_t)
403 INLINE_HEADER unsigned short
404 UINT8_TO_UINT16 (unsigned char n)
406 if (n == BT_UINT8_unbound)
407 return BT_UINT16_unbound;
408 else if (n == BT_UINT8_nil)
409 return BT_UINT16_nil;
410 else if (n == BT_UINT8_t)
417 mark_uint16_byte_table (Lisp_Object obj)
423 print_uint16_byte_table (Lisp_Object obj,
424 Lisp_Object printcharfun, int escapeflag)
426 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
428 struct gcpro gcpro1, gcpro2;
429 GCPRO2 (obj, printcharfun);
431 write_c_string ("\n#<uint16-byte-table", printcharfun);
432 for (i = 0; i < 256; i++)
434 unsigned short n = bte->property[i];
436 write_c_string ("\n ", printcharfun);
437 write_c_string (" ", printcharfun);
438 if (n == BT_UINT16_unbound)
439 write_c_string ("void", printcharfun);
440 else if (n == BT_UINT16_nil)
441 write_c_string ("nil", printcharfun);
442 else if (n == BT_UINT16_t)
443 write_c_string ("t", printcharfun);
448 sprintf (buf, "%hd", n);
449 write_c_string (buf, printcharfun);
453 write_c_string (">", printcharfun);
457 uint16_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
459 Lisp_Uint16_Byte_Table *te1 = XUINT16_BYTE_TABLE (obj1);
460 Lisp_Uint16_Byte_Table *te2 = XUINT16_BYTE_TABLE (obj2);
463 for (i = 0; i < 256; i++)
464 if (te1->property[i] != te2->property[i])
470 uint16_byte_table_hash (Lisp_Object obj, int depth)
472 Lisp_Uint16_Byte_Table *te = XUINT16_BYTE_TABLE (obj);
476 for (i = 0; i < 256; i++)
477 hash = HASH2 (hash, te->property[i]);
481 DEFINE_LRECORD_IMPLEMENTATION ("uint16-byte-table", uint16_byte_table,
482 mark_uint16_byte_table,
483 print_uint16_byte_table,
484 0, uint16_byte_table_equal,
485 uint16_byte_table_hash,
486 0 /* uint16_byte_table_description */,
487 Lisp_Uint16_Byte_Table);
490 make_uint16_byte_table (unsigned short initval)
494 Lisp_Uint16_Byte_Table *cte;
496 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
497 &lrecord_uint16_byte_table);
499 for (i = 0; i < 256; i++)
500 cte->property[i] = initval;
502 XSETUINT16_BYTE_TABLE (obj, cte);
507 expand_uint8_byte_table_to_uint16 (Lisp_Object table)
511 Lisp_Uint8_Byte_Table* bte = XUINT8_BYTE_TABLE(table);
512 Lisp_Uint16_Byte_Table* cte;
514 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
515 &lrecord_uint16_byte_table);
516 for (i = 0; i < 256; i++)
518 cte->property[i] = UINT8_TO_UINT16 (bte->property[i]);
520 XSETUINT16_BYTE_TABLE (obj, cte);
525 uint16_byte_table_same_value_p (Lisp_Object obj)
527 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
528 unsigned short v0 = bte->property[0];
531 for (i = 1; i < 256; i++)
533 if (bte->property[i] != v0)
541 mark_byte_table (Lisp_Object obj)
543 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
546 for (i = 0; i < 256; i++)
548 mark_object (cte->property[i]);
554 print_byte_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
556 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
558 struct gcpro gcpro1, gcpro2;
559 GCPRO2 (obj, printcharfun);
561 write_c_string ("\n#<byte-table", printcharfun);
562 for (i = 0; i < 256; i++)
564 Lisp_Object elt = bte->property[i];
566 write_c_string ("\n ", printcharfun);
567 write_c_string (" ", printcharfun);
568 if (EQ (elt, Qunbound))
569 write_c_string ("void", printcharfun);
571 print_internal (elt, printcharfun, escapeflag);
574 write_c_string (">", printcharfun);
578 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
580 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
581 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
584 for (i = 0; i < 256; i++)
585 if (BYTE_TABLE_P (cte1->property[i]))
587 if (BYTE_TABLE_P (cte2->property[i]))
589 if (!byte_table_equal (cte1->property[i],
590 cte2->property[i], depth + 1))
597 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
603 byte_table_hash (Lisp_Object obj, int depth)
605 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
607 return internal_array_hash (cte->property, 256, depth);
610 static const struct lrecord_description byte_table_description[] = {
611 { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Byte_Table, property), 256 },
615 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
620 byte_table_description,
624 make_byte_table (Lisp_Object initval)
628 Lisp_Byte_Table *cte;
630 cte = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
632 for (i = 0; i < 256; i++)
633 cte->property[i] = initval;
635 XSETBYTE_TABLE (obj, cte);
640 byte_table_same_value_p (Lisp_Object obj)
642 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
643 Lisp_Object v0 = bte->property[0];
646 for (i = 1; i < 256; i++)
648 if (!internal_equal (bte->property[i], v0, 0))
655 Lisp_Object get_byte_table (Lisp_Object table, unsigned char idx);
656 Lisp_Object put_byte_table (Lisp_Object table, unsigned char idx,
660 get_byte_table (Lisp_Object table, unsigned char idx)
662 if (UINT8_BYTE_TABLE_P (table))
663 return UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[idx]);
664 else if (UINT16_BYTE_TABLE_P (table))
665 return UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[idx]);
666 else if (BYTE_TABLE_P (table))
667 return XBYTE_TABLE(table)->property[idx];
673 put_byte_table (Lisp_Object table, unsigned char idx, Lisp_Object value)
675 if (UINT8_BYTE_TABLE_P (table))
677 if (UINT8_VALUE_P (value))
679 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
680 if (!UINT8_BYTE_TABLE_P (value) &&
681 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
682 && uint8_byte_table_same_value_p (table))
687 else if (UINT16_VALUE_P (value))
689 Lisp_Object new = expand_uint8_byte_table_to_uint16 (table);
691 XUINT16_BYTE_TABLE(new)->property[idx] = UINT16_ENCODE (value);
696 Lisp_Object new = make_byte_table (Qnil);
699 for (i = 0; i < 256; i++)
701 XBYTE_TABLE(new)->property[i]
702 = UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[i]);
704 XBYTE_TABLE(new)->property[idx] = value;
708 else if (UINT16_BYTE_TABLE_P (table))
710 if (UINT16_VALUE_P (value))
712 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
713 if (!UINT8_BYTE_TABLE_P (value) &&
714 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
715 && uint16_byte_table_same_value_p (table))
722 Lisp_Object new = make_byte_table (Qnil);
725 for (i = 0; i < 256; i++)
727 XBYTE_TABLE(new)->property[i]
728 = UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[i]);
730 XBYTE_TABLE(new)->property[idx] = value;
734 else if (BYTE_TABLE_P (table))
736 XBYTE_TABLE(table)->property[idx] = value;
737 if (!UINT8_BYTE_TABLE_P (value) &&
738 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
739 && byte_table_same_value_p (table))
744 else if (!internal_equal (table, value, 0))
746 if (UINT8_VALUE_P (table) && UINT8_VALUE_P (value))
748 table = make_uint8_byte_table (UINT8_ENCODE (table));
749 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
751 else if (UINT16_VALUE_P (table) && UINT16_VALUE_P (value))
753 table = make_uint16_byte_table (UINT16_ENCODE (table));
754 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
758 table = make_byte_table (table);
759 XBYTE_TABLE(table)->property[idx] = value;
766 mark_char_id_table (Lisp_Object obj)
768 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
774 print_char_id_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
776 Lisp_Object table = XCHAR_ID_TABLE (obj)->table;
778 struct gcpro gcpro1, gcpro2;
779 GCPRO2 (obj, printcharfun);
781 write_c_string ("#<char-id-table ", printcharfun);
782 for (i = 0; i < 256; i++)
784 Lisp_Object elt = get_byte_table (table, i);
785 if (i != 0) write_c_string ("\n ", printcharfun);
786 if (EQ (elt, Qunbound))
787 write_c_string ("void", printcharfun);
789 print_internal (elt, printcharfun, escapeflag);
792 write_c_string (">", printcharfun);
796 char_id_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
798 Lisp_Object table1 = XCHAR_ID_TABLE (obj1)->table;
799 Lisp_Object table2 = XCHAR_ID_TABLE (obj2)->table;
802 for (i = 0; i < 256; i++)
804 if (!internal_equal (get_byte_table (table1, i),
805 get_byte_table (table2, i), 0))
812 char_id_table_hash (Lisp_Object obj, int depth)
814 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
816 return char_id_table_hash (cte->table, depth + 1);
819 static const struct lrecord_description char_id_table_description[] = {
820 { XD_LISP_OBJECT, offsetof(Lisp_Char_ID_Table, table) },
824 DEFINE_LRECORD_IMPLEMENTATION ("char-id-table", char_id_table,
827 0, char_id_table_equal,
829 char_id_table_description,
833 make_char_id_table (Lisp_Object initval)
836 Lisp_Char_ID_Table *cte;
838 cte = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
840 cte->table = make_byte_table (initval);
842 XSETCHAR_ID_TABLE (obj, cte);
848 get_char_id_table (Emchar ch, Lisp_Object table)
850 unsigned int code = ch;
857 (XCHAR_ID_TABLE (table)->table,
858 (unsigned char)(code >> 24)),
859 (unsigned char) (code >> 16)),
860 (unsigned char) (code >> 8)),
861 (unsigned char) code);
864 void put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table);
866 put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table)
868 unsigned int code = ch;
869 Lisp_Object table1, table2, table3, table4;
871 table1 = XCHAR_ID_TABLE (table)->table;
872 table2 = get_byte_table (table1, (unsigned char)(code >> 24));
873 table3 = get_byte_table (table2, (unsigned char)(code >> 16));
874 table4 = get_byte_table (table3, (unsigned char)(code >> 8));
876 table4 = put_byte_table (table4, (unsigned char)code, value);
877 table3 = put_byte_table (table3, (unsigned char)(code >> 8), table4);
878 table2 = put_byte_table (table2, (unsigned char)(code >> 16), table3);
879 XCHAR_ID_TABLE (table)->table
880 = put_byte_table (table1, (unsigned char)(code >> 24), table2);
884 Lisp_Object Vchar_attribute_hash_table;
885 Lisp_Object Vcharacter_composition_table;
886 Lisp_Object Vcharacter_variant_table;
888 Lisp_Object Qideograph_daikanwa;
889 Lisp_Object Q_decomposition;
894 Lisp_Object Qisolated;
895 Lisp_Object Qinitial;
898 Lisp_Object Qvertical;
899 Lisp_Object QnoBreak;
900 Lisp_Object Qfraction;
910 Emchar to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
912 Lisp_Object put_char_ccs_code_point (Lisp_Object character,
913 Lisp_Object ccs, Lisp_Object value);
914 Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
917 to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
923 else if (EQ (v, Qcompat))
925 else if (EQ (v, Qisolated))
927 else if (EQ (v, Qinitial))
929 else if (EQ (v, Qmedial))
931 else if (EQ (v, Qfinal))
933 else if (EQ (v, Qvertical))
935 else if (EQ (v, QnoBreak))
937 else if (EQ (v, Qfraction))
939 else if (EQ (v, Qsuper))
941 else if (EQ (v, Qsub))
943 else if (EQ (v, Qcircle))
945 else if (EQ (v, Qsquare))
947 else if (EQ (v, Qwide))
949 else if (EQ (v, Qnarrow))
951 else if (EQ (v, Qsmall))
953 else if (EQ (v, Qfont))
956 signal_simple_error (err_msg, err_arg);
959 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
960 Return character corresponding with list.
964 Lisp_Object table = Vcharacter_composition_table;
965 Lisp_Object rest = list;
969 Lisp_Object v = Fcar (rest);
971 Emchar c = to_char_id (v, "Invalid value for composition", list);
973 ret = get_char_id_table (c, table);
978 if (!CHAR_ID_TABLE_P (ret))
983 else if (!CONSP (rest))
985 else if (CHAR_ID_TABLE_P (ret))
988 signal_simple_error ("Invalid table is found with", list);
990 signal_simple_error ("Invalid value for composition", list);
993 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
994 Return variants of CHARACTER.
998 CHECK_CHAR (character);
999 return Fcopy_list (get_char_id_table (XCHAR (character),
1000 Vcharacter_variant_table));
1004 /* We store the char-attributes in hash tables with the names as the
1005 key and the actual char-id-table object as the value. Occasionally
1006 we need to use them in a list format. These routines provide us
1008 struct char_attribute_list_closure
1010 Lisp_Object *char_attribute_list;
1014 add_char_attribute_to_list_mapper (Lisp_Object key, Lisp_Object value,
1015 void *char_attribute_list_closure)
1017 /* This function can GC */
1018 struct char_attribute_list_closure *calcl
1019 = (struct char_attribute_list_closure*) char_attribute_list_closure;
1020 Lisp_Object *char_attribute_list = calcl->char_attribute_list;
1022 *char_attribute_list = Fcons (key, *char_attribute_list);
1026 DEFUN ("char-attribute-list", Fchar_attribute_list, 0, 0, 0, /*
1027 Return the list of all existing character attributes except coded-charsets.
1031 Lisp_Object char_attribute_list = Qnil;
1032 struct gcpro gcpro1;
1033 struct char_attribute_list_closure char_attribute_list_closure;
1035 GCPRO1 (char_attribute_list);
1036 char_attribute_list_closure.char_attribute_list = &char_attribute_list;
1037 elisp_maphash (add_char_attribute_to_list_mapper,
1038 Vchar_attribute_hash_table,
1039 &char_attribute_list_closure);
1041 return char_attribute_list;
1044 DEFUN ("find-char-attribute-table", Ffind_char_attribute_table, 1, 1, 0, /*
1045 Return char-id-table corresponding to ATTRIBUTE.
1049 return Fgethash (attribute, Vchar_attribute_hash_table, Qnil);
1053 /* We store the char-id-tables in hash tables with the attributes as
1054 the key and the actual char-id-table object as the value. Each
1055 char-id-table stores values of an attribute corresponding with
1056 characters. Occasionally we need to get attributes of a character
1057 in a association-list format. These routines provide us with
1059 struct char_attribute_alist_closure
1062 Lisp_Object *char_attribute_alist;
1066 add_char_attribute_alist_mapper (Lisp_Object key, Lisp_Object value,
1067 void *char_attribute_alist_closure)
1069 /* This function can GC */
1070 struct char_attribute_alist_closure *caacl =
1071 (struct char_attribute_alist_closure*) char_attribute_alist_closure;
1072 Lisp_Object ret = get_char_id_table (caacl->char_id, value);
1073 if (!UNBOUNDP (ret))
1075 Lisp_Object *char_attribute_alist = caacl->char_attribute_alist;
1076 *char_attribute_alist
1077 = Fcons (Fcons (key, ret), *char_attribute_alist);
1082 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
1083 Return the alist of attributes of CHARACTER.
1087 Lisp_Object alist = Qnil;
1090 CHECK_CHAR (character);
1092 struct gcpro gcpro1;
1093 struct char_attribute_alist_closure char_attribute_alist_closure;
1096 char_attribute_alist_closure.char_id = XCHAR (character);
1097 char_attribute_alist_closure.char_attribute_alist = &alist;
1098 elisp_maphash (add_char_attribute_alist_mapper,
1099 Vchar_attribute_hash_table,
1100 &char_attribute_alist_closure);
1104 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1106 Lisp_Object ccs = chlook->charset_by_leading_byte[i];
1110 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1113 if ( CHAR_ID_TABLE_P (encoding_table)
1114 && INTP (cpos = get_char_id_table (XCHAR (character),
1117 alist = Fcons (Fcons (ccs, cpos), alist);
1124 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 3, 0, /*
1125 Return the value of CHARACTER's ATTRIBUTE.
1126 Return DEFAULT-VALUE if the value is not exist.
1128 (character, attribute, default_value))
1132 CHECK_CHAR (character);
1133 if (!NILP (ccs = Ffind_charset (attribute)))
1135 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1137 if (CHAR_ID_TABLE_P (encoding_table))
1138 return get_char_id_table (XCHAR (character), encoding_table);
1142 Lisp_Object table = Fgethash (attribute,
1143 Vchar_attribute_hash_table,
1145 if (!UNBOUNDP (table))
1147 Lisp_Object ret = get_char_id_table (XCHAR (character), table);
1148 if (!UNBOUNDP (ret))
1152 return default_value;
1155 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
1156 Store CHARACTER's ATTRIBUTE with VALUE.
1158 (character, attribute, value))
1162 CHECK_CHAR (character);
1163 ccs = Ffind_charset (attribute);
1166 return put_char_ccs_code_point (character, ccs, value);
1168 else if (EQ (attribute, Q_decomposition))
1173 signal_simple_error ("Invalid value for ->decomposition",
1176 if (CONSP (Fcdr (value)))
1178 Lisp_Object rest = value;
1179 Lisp_Object table = Vcharacter_composition_table;
1183 GET_EXTERNAL_LIST_LENGTH (rest, len);
1184 seq = make_vector (len, Qnil);
1186 while (CONSP (rest))
1188 Lisp_Object v = Fcar (rest);
1191 = to_char_id (v, "Invalid value for ->decomposition", value);
1194 XVECTOR_DATA(seq)[i++] = v;
1196 XVECTOR_DATA(seq)[i++] = make_char (c);
1200 put_char_id_table (c, character, table);
1205 ntable = get_char_id_table (c, table);
1206 if (!CHAR_ID_TABLE_P (ntable))
1208 ntable = make_char_id_table (Qnil);
1209 put_char_id_table (c, ntable, table);
1217 Lisp_Object v = Fcar (value);
1221 Emchar c = XINT (v);
1223 = get_char_id_table (c, Vcharacter_variant_table);
1225 if (NILP (Fmemq (v, ret)))
1227 put_char_id_table (c, Fcons (character, ret),
1228 Vcharacter_variant_table);
1231 seq = make_vector (1, v);
1235 else if (EQ (attribute, Qto_ucs) || EQ (attribute, Q_ucs))
1241 signal_simple_error ("Invalid value for ->ucs", value);
1245 ret = get_char_id_table (c, Vcharacter_variant_table);
1246 if (NILP (Fmemq (character, ret)))
1248 put_char_id_table (c, Fcons (character, ret),
1249 Vcharacter_variant_table);
1252 if (EQ (attribute, Q_ucs))
1253 attribute = Qto_ucs;
1257 Lisp_Object table = Fgethash (attribute,
1258 Vchar_attribute_hash_table,
1263 table = make_char_id_table (Qunbound);
1264 Fputhash (attribute, table, Vchar_attribute_hash_table);
1266 put_char_id_table (XCHAR (character), value, table);
1271 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
1272 Remove CHARACTER's ATTRIBUTE.
1274 (character, attribute))
1278 CHECK_CHAR (character);
1279 ccs = Ffind_charset (attribute);
1282 return remove_char_ccs (character, ccs);
1286 Lisp_Object table = Fgethash (attribute,
1287 Vchar_attribute_hash_table,
1289 if (!UNBOUNDP (table))
1291 put_char_id_table (XCHAR (character), Qunbound, table);
1298 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
1300 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
1302 /* ad-hoc method for `ascii' */
1303 if ((CHARSET_CHARS (cs) == 94) &&
1304 (CHARSET_BYTE_OFFSET (cs) != 33))
1305 return 128 - CHARSET_BYTE_OFFSET (cs);
1307 return CHARSET_CHARS (cs);
1310 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
1312 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
1314 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
1318 if (XVECTOR_LENGTH (v) > ccs_len)
1321 for (i = 0; i < XVECTOR_LENGTH (v); i++)
1323 Lisp_Object c = XVECTOR_DATA(v)[i];
1325 if (!NILP (c) && !CHARP (c))
1329 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
1341 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1344 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1354 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1355 nv = XVECTOR_DATA(v)[i];
1361 XVECTOR_DATA(v)[i] = Qnil;
1365 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1366 int code_point, Lisp_Object character);
1368 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1369 int code_point, Lisp_Object character)
1373 int ccs_len = XVECTOR_LENGTH (v);
1378 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1379 nv = XVECTOR_DATA(v)[i];
1383 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
1389 XVECTOR_DATA(v)[i] = character;
1393 put_char_ccs_code_point (Lisp_Object character,
1394 Lisp_Object ccs, Lisp_Object value)
1396 Lisp_Object encoding_table;
1398 if (!EQ (XCHARSET_NAME (ccs), Qucs)
1399 || (XCHAR (character) != XINT (value)))
1401 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
1402 int dim = XCHARSET_DIMENSION (ccs);
1403 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
1404 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
1408 { /* obsolete representation: value must be a list of bytes */
1409 Lisp_Object ret = Fcar (value);
1413 signal_simple_error ("Invalid value for coded-charset", value);
1414 code_point = XINT (ret);
1415 if (XCHARSET_GRAPHIC (ccs) == 1)
1417 rest = Fcdr (value);
1418 while (!NILP (rest))
1423 signal_simple_error ("Invalid value for coded-charset",
1427 signal_simple_error ("Invalid value for coded-charset",
1430 if (XCHARSET_GRAPHIC (ccs) == 1)
1432 code_point = (code_point << 8) | j;
1435 value = make_int (code_point);
1437 else if (INTP (value))
1439 code_point = XINT (value);
1440 if (XCHARSET_GRAPHIC (ccs) == 1)
1442 code_point &= 0x7F7F7F7F;
1443 value = make_int (code_point);
1447 signal_simple_error ("Invalid value for coded-charset", value);
1451 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1454 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
1459 XCHARSET_DECODING_TABLE (ccs)
1460 = v = make_older_vector (ccs_len, Qnil);
1463 decoding_table_put_char (v, dim, byte_offset, code_point, character);
1465 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
1467 XCHARSET_ENCODING_TABLE (ccs)
1468 = encoding_table = make_char_id_table (Qnil);
1470 put_char_id_table (XCHAR (character), value, encoding_table);
1475 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
1477 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1478 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1480 if (VECTORP (decoding_table))
1482 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1486 decoding_table_remove_char (decoding_table,
1487 XCHARSET_DIMENSION (ccs),
1488 XCHARSET_BYTE_OFFSET (ccs),
1492 if (CHAR_ID_TABLE_P (encoding_table))
1494 put_char_id_table (XCHAR (character), Qnil, encoding_table);
1499 EXFUN (Fmake_char, 3);
1500 EXFUN (Fdecode_char, 2);
1502 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
1503 Store character's ATTRIBUTES.
1507 Lisp_Object rest = attributes;
1508 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
1509 Lisp_Object character;
1513 while (CONSP (rest))
1515 Lisp_Object cell = Fcar (rest);
1519 signal_simple_error ("Invalid argument", attributes);
1520 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
1521 && ((XCHARSET_FINAL (ccs) != 0) ||
1522 (XCHARSET_UCS_MAX (ccs) > 0)) )
1526 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1528 character = Fdecode_char (ccs, cell);
1529 if (!NILP (character))
1530 goto setup_attributes;
1534 if ( (!NILP (code = Fcdr (Fassq (Qto_ucs, attributes)))) ||
1535 (!NILP (code = Fcdr (Fassq (Q_ucs, attributes)))) )
1539 signal_simple_error ("Invalid argument", attributes);
1541 character = make_char (XINT (code) + 0x100000);
1542 goto setup_attributes;
1546 else if (!INTP (code))
1547 signal_simple_error ("Invalid argument", attributes);
1549 character = make_char (XINT (code));
1553 while (CONSP (rest))
1555 Lisp_Object cell = Fcar (rest);
1558 signal_simple_error ("Invalid argument", attributes);
1560 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
1566 Lisp_Object Vutf_2000_version;
1570 int leading_code_private_11;
1573 Lisp_Object Qcharsetp;
1575 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1576 Lisp_Object Qregistry, Qfinal, Qgraphic;
1577 Lisp_Object Qdirection;
1578 Lisp_Object Qreverse_direction_charset;
1579 Lisp_Object Qleading_byte;
1580 Lisp_Object Qshort_name, Qlong_name;
1594 Qcyrillic_iso8859_5,
1596 Qjapanese_jisx0208_1978,
1600 Qjapanese_jisx0208_1990,
1603 Qchinese_cns11643_1,
1604 Qchinese_cns11643_2,
1611 Qlatin_viscii_lower,
1612 Qlatin_viscii_upper,
1613 Qvietnamese_viscii_lower,
1614 Qvietnamese_viscii_upper,
1626 Qideograph_gt_pj_10,
1627 Qideograph_gt_pj_11,
1657 Lisp_Object Ql2r, Qr2l;
1659 Lisp_Object Vcharset_hash_table;
1661 /* Composite characters are characters constructed by overstriking two
1662 or more regular characters.
1664 1) The old Mule implementation involves storing composite characters
1665 in a buffer as a tag followed by all of the actual characters
1666 used to make up the composite character. I think this is a bad
1667 idea; it greatly complicates code that wants to handle strings
1668 one character at a time because it has to deal with the possibility
1669 of great big ungainly characters. It's much more reasonable to
1670 simply store an index into a table of composite characters.
1672 2) The current implementation only allows for 16,384 separate
1673 composite characters over the lifetime of the XEmacs process.
1674 This could become a potential problem if the user
1675 edited lots of different files that use composite characters.
1676 Due to FSF bogosity, increasing the number of allowable
1677 composite characters under Mule would decrease the number
1678 of possible faces that can exist. Mule already has shrunk
1679 this to 2048, and further shrinkage would become uncomfortable.
1680 No such problems exist in XEmacs.
1682 Composite characters could be represented as 0x80 C1 C2 C3,
1683 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1684 for slightly under 2^20 (one million) composite characters
1685 over the XEmacs process lifetime, and you only need to
1686 increase the size of a Mule character from 19 to 21 bits.
1687 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1688 85 million (slightly over 2^26) composite characters. */
1691 /************************************************************************/
1692 /* Basic Emchar functions */
1693 /************************************************************************/
1695 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1696 string in STR. Returns the number of bytes stored.
1697 Do not call this directly. Use the macro set_charptr_emchar() instead.
1701 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1707 Lisp_Object charset;
1716 else if ( c <= 0x7ff )
1718 *p++ = (c >> 6) | 0xc0;
1719 *p++ = (c & 0x3f) | 0x80;
1721 else if ( c <= 0xffff )
1723 *p++ = (c >> 12) | 0xe0;
1724 *p++ = ((c >> 6) & 0x3f) | 0x80;
1725 *p++ = (c & 0x3f) | 0x80;
1727 else if ( c <= 0x1fffff )
1729 *p++ = (c >> 18) | 0xf0;
1730 *p++ = ((c >> 12) & 0x3f) | 0x80;
1731 *p++ = ((c >> 6) & 0x3f) | 0x80;
1732 *p++ = (c & 0x3f) | 0x80;
1734 else if ( c <= 0x3ffffff )
1736 *p++ = (c >> 24) | 0xf8;
1737 *p++ = ((c >> 18) & 0x3f) | 0x80;
1738 *p++ = ((c >> 12) & 0x3f) | 0x80;
1739 *p++ = ((c >> 6) & 0x3f) | 0x80;
1740 *p++ = (c & 0x3f) | 0x80;
1744 *p++ = (c >> 30) | 0xfc;
1745 *p++ = ((c >> 24) & 0x3f) | 0x80;
1746 *p++ = ((c >> 18) & 0x3f) | 0x80;
1747 *p++ = ((c >> 12) & 0x3f) | 0x80;
1748 *p++ = ((c >> 6) & 0x3f) | 0x80;
1749 *p++ = (c & 0x3f) | 0x80;
1752 BREAKUP_CHAR (c, charset, c1, c2);
1753 lb = CHAR_LEADING_BYTE (c);
1754 if (LEADING_BYTE_PRIVATE_P (lb))
1755 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1757 if (EQ (charset, Vcharset_control_1))
1766 /* Return the first character from a Mule-encoded string in STR,
1767 assuming it's non-ASCII. Do not call this directly.
1768 Use the macro charptr_emchar() instead. */
1771 non_ascii_charptr_emchar (const Bufbyte *str)
1784 else if ( b >= 0xf8 )
1789 else if ( b >= 0xf0 )
1794 else if ( b >= 0xe0 )
1799 else if ( b >= 0xc0 )
1809 for( ; len > 0; len-- )
1812 ch = ( ch << 6 ) | ( b & 0x3f );
1816 Bufbyte i0 = *str, i1, i2 = 0;
1817 Lisp_Object charset;
1819 if (i0 == LEADING_BYTE_CONTROL_1)
1820 return (Emchar) (*++str - 0x20);
1822 if (LEADING_BYTE_PREFIX_P (i0))
1827 charset = CHARSET_BY_LEADING_BYTE (i0);
1828 if (XCHARSET_DIMENSION (charset) == 2)
1831 return MAKE_CHAR (charset, i1, i2);
1835 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1836 Do not call this directly. Use the macro valid_char_p() instead. */
1840 non_ascii_valid_char_p (Emchar ch)
1844 /* Must have only lowest 19 bits set */
1848 f1 = CHAR_FIELD1 (ch);
1849 f2 = CHAR_FIELD2 (ch);
1850 f3 = CHAR_FIELD3 (ch);
1854 Lisp_Object charset;
1856 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1857 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1858 f2 > MAX_CHAR_FIELD2_PRIVATE)
1863 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1864 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1868 NOTE: This takes advantage of the fact that
1869 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1870 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1872 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1873 if (EQ (charset, Qnil))
1875 return (XCHARSET_CHARS (charset) == 96);
1879 Lisp_Object charset;
1881 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1882 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1883 f1 > MAX_CHAR_FIELD1_PRIVATE)
1885 if (f2 < 0x20 || f3 < 0x20)
1888 #ifdef ENABLE_COMPOSITE_CHARS
1889 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1891 if (UNBOUNDP (Fgethash (make_int (ch),
1892 Vcomposite_char_char2string_hash_table,
1897 #endif /* ENABLE_COMPOSITE_CHARS */
1899 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1900 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1903 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1905 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1908 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1910 if (EQ (charset, Qnil))
1912 return (XCHARSET_CHARS (charset) == 96);
1918 /************************************************************************/
1919 /* Basic string functions */
1920 /************************************************************************/
1922 /* Copy the character pointed to by SRC into DST. Do not call this
1923 directly. Use the macro charptr_copy_char() instead.
1924 Return the number of bytes copied. */
1927 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
1929 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
1931 for (i = bytes; i; i--, dst++, src++)
1937 /************************************************************************/
1938 /* streams of Emchars */
1939 /************************************************************************/
1941 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1942 The functions below are not meant to be called directly; use
1943 the macros in insdel.h. */
1946 Lstream_get_emchar_1 (Lstream *stream, int ch)
1948 Bufbyte str[MAX_EMCHAR_LEN];
1949 Bufbyte *strptr = str;
1952 str[0] = (Bufbyte) ch;
1954 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
1956 int c = Lstream_getc (stream);
1957 bufpos_checking_assert (c >= 0);
1958 *++strptr = (Bufbyte) c;
1960 return charptr_emchar (str);
1964 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1966 Bufbyte str[MAX_EMCHAR_LEN];
1967 Bytecount len = set_charptr_emchar (str, ch);
1968 return Lstream_write (stream, str, len);
1972 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1974 Bufbyte str[MAX_EMCHAR_LEN];
1975 Bytecount len = set_charptr_emchar (str, ch);
1976 Lstream_unread (stream, str, len);
1980 /************************************************************************/
1981 /* charset object */
1982 /************************************************************************/
1985 mark_charset (Lisp_Object obj)
1987 Lisp_Charset *cs = XCHARSET (obj);
1989 mark_object (cs->short_name);
1990 mark_object (cs->long_name);
1991 mark_object (cs->doc_string);
1992 mark_object (cs->registry);
1993 mark_object (cs->ccl_program);
1995 mark_object (cs->encoding_table);
1996 /* mark_object (cs->decoding_table); */
2002 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
2004 Lisp_Charset *cs = XCHARSET (obj);
2008 error ("printing unreadable object #<charset %s 0x%x>",
2009 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
2012 write_c_string ("#<charset ", printcharfun);
2013 print_internal (CHARSET_NAME (cs), printcharfun, 0);
2014 write_c_string (" ", printcharfun);
2015 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
2016 write_c_string (" ", printcharfun);
2017 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
2018 write_c_string (" ", printcharfun);
2019 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
2020 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
2022 CHARSET_DIMENSION (cs),
2023 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
2024 CHARSET_COLUMNS (cs),
2025 CHARSET_GRAPHIC (cs),
2026 CHARSET_FINAL (cs));
2027 write_c_string (buf, printcharfun);
2028 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
2029 sprintf (buf, " 0x%x>", cs->header.uid);
2030 write_c_string (buf, printcharfun);
2033 static const struct lrecord_description charset_description[] = {
2034 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
2035 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
2036 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
2037 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
2038 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
2039 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
2040 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
2042 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
2043 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
2048 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
2049 mark_charset, print_charset, 0, 0, 0,
2050 charset_description,
2053 /* Make a new charset. */
2054 /* #### SJT Should generic properties be allowed? */
2056 make_charset (Charset_ID id, Lisp_Object name,
2057 unsigned short chars, unsigned char dimension,
2058 unsigned char columns, unsigned char graphic,
2059 Bufbyte final, unsigned char direction, Lisp_Object short_name,
2060 Lisp_Object long_name, Lisp_Object doc,
2062 Lisp_Object decoding_table,
2063 Emchar ucs_min, Emchar ucs_max,
2064 Emchar code_offset, unsigned char byte_offset)
2067 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
2071 XSETCHARSET (obj, cs);
2073 CHARSET_ID (cs) = id;
2074 CHARSET_NAME (cs) = name;
2075 CHARSET_SHORT_NAME (cs) = short_name;
2076 CHARSET_LONG_NAME (cs) = long_name;
2077 CHARSET_CHARS (cs) = chars;
2078 CHARSET_DIMENSION (cs) = dimension;
2079 CHARSET_DIRECTION (cs) = direction;
2080 CHARSET_COLUMNS (cs) = columns;
2081 CHARSET_GRAPHIC (cs) = graphic;
2082 CHARSET_FINAL (cs) = final;
2083 CHARSET_DOC_STRING (cs) = doc;
2084 CHARSET_REGISTRY (cs) = reg;
2085 CHARSET_CCL_PROGRAM (cs) = Qnil;
2086 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
2088 CHARSET_DECODING_TABLE(cs) = Qnil;
2089 CHARSET_ENCODING_TABLE(cs) = Qnil;
2090 CHARSET_UCS_MIN(cs) = ucs_min;
2091 CHARSET_UCS_MAX(cs) = ucs_max;
2092 CHARSET_CODE_OFFSET(cs) = code_offset;
2093 CHARSET_BYTE_OFFSET(cs) = byte_offset;
2097 if (id == LEADING_BYTE_ASCII)
2098 CHARSET_REP_BYTES (cs) = 1;
2100 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
2102 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
2107 /* some charsets do not have final characters. This includes
2108 ASCII, Control-1, Composite, and the two faux private
2110 unsigned char iso2022_type
2111 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
2113 if (code_offset == 0)
2115 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
2116 chlook->charset_by_attributes[iso2022_type][final] = obj;
2120 (chlook->charset_by_attributes[iso2022_type][final][direction]));
2121 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
2125 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
2126 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
2128 /* Some charsets are "faux" and don't have names or really exist at
2129 all except in the leading-byte table. */
2131 Fputhash (name, obj, Vcharset_hash_table);
2136 get_unallocated_leading_byte (int dimension)
2141 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
2144 lb = chlook->next_allocated_leading_byte++;
2148 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
2151 lb = chlook->next_allocated_1_byte_leading_byte++;
2155 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
2158 lb = chlook->next_allocated_2_byte_leading_byte++;
2164 ("No more character sets free for this dimension",
2165 make_int (dimension));
2171 /* Number of Big5 characters which have the same code in 1st byte. */
2173 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2176 decode_builtin_char (Lisp_Object charset, int code_point)
2180 if (EQ (charset, Vcharset_chinese_big5))
2182 int c1 = code_point >> 8;
2183 int c2 = code_point & 0xFF;
2186 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
2188 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
2189 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
2191 I = (c1 - 0xA1) * BIG5_SAME_ROW
2192 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
2196 charset = Vcharset_chinese_big5_1;
2200 charset = Vcharset_chinese_big5_2;
2201 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
2203 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
2206 if ((final = XCHARSET_FINAL (charset)) >= '0')
2208 if (XCHARSET_DIMENSION (charset) == 1)
2210 switch (XCHARSET_CHARS (charset))
2214 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
2217 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
2225 switch (XCHARSET_CHARS (charset))
2228 return MIN_CHAR_94x94
2229 + (final - '0') * 94 * 94
2230 + (((code_point >> 8) & 0x7F) - 33) * 94
2231 + ((code_point & 0x7F) - 33);
2233 return MIN_CHAR_96x96
2234 + (final - '0') * 96 * 96
2235 + (((code_point >> 8) & 0x7F) - 32) * 96
2236 + ((code_point & 0x7F) - 32);
2243 else if (XCHARSET_UCS_MAX (charset))
2246 = (XCHARSET_DIMENSION (charset) == 1
2248 code_point - XCHARSET_BYTE_OFFSET (charset)
2250 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
2251 * XCHARSET_CHARS (charset)
2252 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
2253 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
2254 if ((cid < XCHARSET_UCS_MIN (charset))
2255 || (XCHARSET_UCS_MAX (charset) < cid))
2264 range_charset_code_point (Lisp_Object charset, Emchar ch)
2268 if ((XCHARSET_UCS_MIN (charset) <= ch)
2269 && (ch <= XCHARSET_UCS_MAX (charset)))
2271 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
2273 if (XCHARSET_CHARS (charset) == 256)
2275 else if (XCHARSET_DIMENSION (charset) == 1)
2276 return d + XCHARSET_BYTE_OFFSET (charset);
2277 else if (XCHARSET_DIMENSION (charset) == 2)
2279 ((d / XCHARSET_CHARS (charset)
2280 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2281 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2282 else if (XCHARSET_DIMENSION (charset) == 3)
2284 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2285 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2286 | ((d / XCHARSET_CHARS (charset)
2287 % XCHARSET_CHARS (charset)
2288 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2289 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2290 else /* if (XCHARSET_DIMENSION (charset) == 4) */
2292 ((d / (XCHARSET_CHARS (charset)
2293 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2294 + XCHARSET_BYTE_OFFSET (charset)) << 24)
2295 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2296 % XCHARSET_CHARS (charset)
2297 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2298 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
2299 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2300 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2302 else if (XCHARSET_CODE_OFFSET (charset) == 0)
2304 if (XCHARSET_DIMENSION (charset) == 1)
2306 if (XCHARSET_CHARS (charset) == 94)
2308 if (((d = ch - (MIN_CHAR_94
2309 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
2313 else if (XCHARSET_CHARS (charset) == 96)
2315 if (((d = ch - (MIN_CHAR_96
2316 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
2323 else if (XCHARSET_DIMENSION (charset) == 2)
2325 if (XCHARSET_CHARS (charset) == 94)
2327 if (((d = ch - (MIN_CHAR_94x94
2328 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
2331 return (((d / 94) + 33) << 8) | (d % 94 + 33);
2333 else if (XCHARSET_CHARS (charset) == 96)
2335 if (((d = ch - (MIN_CHAR_96x96
2336 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
2339 return (((d / 96) + 32) << 8) | (d % 96 + 32);
2345 if (EQ (charset, Vcharset_mojikyo_2022_1)
2346 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
2348 int m = ch - MIN_CHAR_MOJIKYO - 1;
2349 int byte1 = m / (94 * 60) + 33;
2350 int byte2 = (m % (94 * 60)) / 94;
2351 int byte3 = m % 94 + 33;
2357 return (byte1 << 16) | (byte2 << 8) | byte3;
2363 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
2365 if (c <= MAX_CHAR_BASIC_LATIN)
2367 *charset = Vcharset_ascii;
2372 *charset = Vcharset_control_1;
2377 *charset = Vcharset_latin_iso8859_1;
2381 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
2383 *charset = Vcharset_greek_iso8859_7;
2384 return c - MIN_CHAR_GREEK + 0x20;
2386 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
2388 *charset = Vcharset_cyrillic_iso8859_5;
2389 return c - MIN_CHAR_CYRILLIC + 0x20;
2392 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
2394 *charset = Vcharset_hebrew_iso8859_8;
2395 return c - MIN_CHAR_HEBREW + 0x20;
2397 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
2399 *charset = Vcharset_thai_tis620;
2400 return c - MIN_CHAR_THAI + 0x20;
2403 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
2404 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
2406 return list2 (Vcharset_katakana_jisx0201,
2407 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
2410 else if (c <= MAX_CHAR_BMP)
2412 *charset = Vcharset_ucs_bmp;
2415 else if (c < MIN_CHAR_DAIKANWA)
2417 *charset = Vcharset_ucs;
2420 else if (c <= MAX_CHAR_DAIKANWA)
2422 *charset = Vcharset_ideograph_daikanwa;
2423 return c - MIN_CHAR_DAIKANWA;
2425 else if (c <= MAX_CHAR_MOJIKYO_0)
2427 *charset = Vcharset_mojikyo;
2428 return c - MIN_CHAR_MOJIKYO_0;
2430 else if (c < MIN_CHAR_94)
2432 *charset = Vcharset_ucs;
2435 else if (c <= MAX_CHAR_94)
2437 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
2438 ((c - MIN_CHAR_94) / 94) + '0',
2439 CHARSET_LEFT_TO_RIGHT);
2440 if (!NILP (*charset))
2441 return ((c - MIN_CHAR_94) % 94) + 33;
2444 *charset = Vcharset_ucs;
2448 else if (c <= MAX_CHAR_96)
2450 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
2451 ((c - MIN_CHAR_96) / 96) + '0',
2452 CHARSET_LEFT_TO_RIGHT);
2453 if (!NILP (*charset))
2454 return ((c - MIN_CHAR_96) % 96) + 32;
2457 *charset = Vcharset_ucs;
2461 else if (c <= MAX_CHAR_94x94)
2464 = CHARSET_BY_ATTRIBUTES (94, 2,
2465 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
2466 CHARSET_LEFT_TO_RIGHT);
2467 if (!NILP (*charset))
2468 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
2469 | (((c - MIN_CHAR_94x94) % 94) + 33);
2472 *charset = Vcharset_ucs;
2476 else if (c <= MAX_CHAR_96x96)
2479 = CHARSET_BY_ATTRIBUTES (96, 2,
2480 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
2481 CHARSET_LEFT_TO_RIGHT);
2482 if (!NILP (*charset))
2483 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
2484 | (((c - MIN_CHAR_96x96) % 96) + 32);
2487 *charset = Vcharset_ucs;
2491 else if (c < MIN_CHAR_MOJIKYO)
2493 *charset = Vcharset_ucs;
2496 else if (c <= MAX_CHAR_MOJIKYO)
2498 *charset = Vcharset_mojikyo;
2499 return c - MIN_CHAR_MOJIKYO;
2503 *charset = Vcharset_ucs;
2508 Lisp_Object Vdefault_coded_charset_priority_list;
2512 /************************************************************************/
2513 /* Basic charset Lisp functions */
2514 /************************************************************************/
2516 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
2517 Return non-nil if OBJECT is a charset.
2521 return CHARSETP (object) ? Qt : Qnil;
2524 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
2525 Retrieve the charset of the given name.
2526 If CHARSET-OR-NAME is a charset object, it is simply returned.
2527 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
2528 nil is returned. Otherwise the associated charset object is returned.
2532 if (CHARSETP (charset_or_name))
2533 return charset_or_name;
2535 CHECK_SYMBOL (charset_or_name);
2536 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
2539 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
2540 Retrieve the charset of the given name.
2541 Same as `find-charset' except an error is signalled if there is no such
2542 charset instead of returning nil.
2546 Lisp_Object charset = Ffind_charset (name);
2549 signal_simple_error ("No such charset", name);
2553 /* We store the charsets in hash tables with the names as the key and the
2554 actual charset object as the value. Occasionally we need to use them
2555 in a list format. These routines provide us with that. */
2556 struct charset_list_closure
2558 Lisp_Object *charset_list;
2562 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
2563 void *charset_list_closure)
2565 /* This function can GC */
2566 struct charset_list_closure *chcl =
2567 (struct charset_list_closure*) charset_list_closure;
2568 Lisp_Object *charset_list = chcl->charset_list;
2570 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
2574 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
2575 Return a list of the names of all defined charsets.
2579 Lisp_Object charset_list = Qnil;
2580 struct gcpro gcpro1;
2581 struct charset_list_closure charset_list_closure;
2583 GCPRO1 (charset_list);
2584 charset_list_closure.charset_list = &charset_list;
2585 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
2586 &charset_list_closure);
2589 return charset_list;
2592 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
2593 Return the name of charset CHARSET.
2597 return XCHARSET_NAME (Fget_charset (charset));
2600 /* #### SJT Should generic properties be allowed? */
2601 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
2602 Define a new character set.
2603 This function is for use with Mule support.
2604 NAME is a symbol, the name by which the character set is normally referred.
2605 DOC-STRING is a string describing the character set.
2606 PROPS is a property list, describing the specific nature of the
2607 character set. Recognized properties are:
2609 'short-name Short version of the charset name (ex: Latin-1)
2610 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
2611 'registry A regular expression matching the font registry field for
2613 'dimension Number of octets used to index a character in this charset.
2614 Either 1 or 2. Defaults to 1.
2615 'columns Number of columns used to display a character in this charset.
2616 Only used in TTY mode. (Under X, the actual width of a
2617 character can be derived from the font used to display the
2618 characters.) If unspecified, defaults to the dimension
2619 (this is almost always the correct value).
2620 'chars Number of characters in each dimension (94 or 96).
2621 Defaults to 94. Note that if the dimension is 2, the
2622 character set thus described is 94x94 or 96x96.
2623 'final Final byte of ISO 2022 escape sequence. Must be
2624 supplied. Each combination of (DIMENSION, CHARS) defines a
2625 separate namespace for final bytes. Note that ISO
2626 2022 restricts the final byte to the range
2627 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2628 dimension == 2. Note also that final bytes in the range
2629 0x30 - 0x3F are reserved for user-defined (not official)
2631 'graphic 0 (use left half of font on output) or 1 (use right half
2632 of font on output). Defaults to 0. For example, for
2633 a font whose registry is ISO8859-1, the left half
2634 (octets 0x20 - 0x7F) is the `ascii' character set, while
2635 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2636 character set. With 'graphic set to 0, the octets
2637 will have their high bit cleared; with it set to 1,
2638 the octets will have their high bit set.
2639 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2641 'ccl-program A compiled CCL program used to convert a character in
2642 this charset into an index into the font. This is in
2643 addition to the 'graphic property. The CCL program
2644 is passed the octets of the character, with the high
2645 bit cleared and set depending upon whether the value
2646 of the 'graphic property is 0 or 1.
2648 (name, doc_string, props))
2650 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2651 int direction = CHARSET_LEFT_TO_RIGHT;
2652 Lisp_Object registry = Qnil;
2653 Lisp_Object charset;
2654 Lisp_Object ccl_program = Qnil;
2655 Lisp_Object short_name = Qnil, long_name = Qnil;
2656 int byte_offset = -1;
2658 CHECK_SYMBOL (name);
2659 if (!NILP (doc_string))
2660 CHECK_STRING (doc_string);
2662 charset = Ffind_charset (name);
2663 if (!NILP (charset))
2664 signal_simple_error ("Cannot redefine existing charset", name);
2667 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
2669 if (EQ (keyword, Qshort_name))
2671 CHECK_STRING (value);
2675 if (EQ (keyword, Qlong_name))
2677 CHECK_STRING (value);
2681 else if (EQ (keyword, Qdimension))
2684 dimension = XINT (value);
2685 if (dimension < 1 || dimension > 2)
2686 signal_simple_error ("Invalid value for 'dimension", value);
2689 else if (EQ (keyword, Qchars))
2692 chars = XINT (value);
2693 if (chars != 94 && chars != 96)
2694 signal_simple_error ("Invalid value for 'chars", value);
2697 else if (EQ (keyword, Qcolumns))
2700 columns = XINT (value);
2701 if (columns != 1 && columns != 2)
2702 signal_simple_error ("Invalid value for 'columns", value);
2705 else if (EQ (keyword, Qgraphic))
2708 graphic = XINT (value);
2710 if (graphic < 0 || graphic > 2)
2712 if (graphic < 0 || graphic > 1)
2714 signal_simple_error ("Invalid value for 'graphic", value);
2717 else if (EQ (keyword, Qregistry))
2719 CHECK_STRING (value);
2723 else if (EQ (keyword, Qdirection))
2725 if (EQ (value, Ql2r))
2726 direction = CHARSET_LEFT_TO_RIGHT;
2727 else if (EQ (value, Qr2l))
2728 direction = CHARSET_RIGHT_TO_LEFT;
2730 signal_simple_error ("Invalid value for 'direction", value);
2733 else if (EQ (keyword, Qfinal))
2735 CHECK_CHAR_COERCE_INT (value);
2736 final = XCHAR (value);
2737 if (final < '0' || final > '~')
2738 signal_simple_error ("Invalid value for 'final", value);
2741 else if (EQ (keyword, Qccl_program))
2743 struct ccl_program test_ccl;
2745 if (setup_ccl_program (&test_ccl, value) < 0)
2746 signal_simple_error ("Invalid value for 'ccl-program", value);
2747 ccl_program = value;
2751 signal_simple_error ("Unrecognized property", keyword);
2756 error ("'final must be specified");
2757 if (dimension == 2 && final > 0x5F)
2759 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2762 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2763 CHARSET_LEFT_TO_RIGHT)) ||
2764 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2765 CHARSET_RIGHT_TO_LEFT)))
2767 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2769 id = get_unallocated_leading_byte (dimension);
2771 if (NILP (doc_string))
2772 doc_string = build_string ("");
2774 if (NILP (registry))
2775 registry = build_string ("");
2777 if (NILP (short_name))
2778 XSETSTRING (short_name, XSYMBOL (name)->name);
2780 if (NILP (long_name))
2781 long_name = doc_string;
2784 columns = dimension;
2786 if (byte_offset < 0)
2790 else if (chars == 96)
2796 charset = make_charset (id, name, chars, dimension, columns, graphic,
2797 final, direction, short_name, long_name,
2798 doc_string, registry,
2799 Qnil, 0, 0, 0, byte_offset);
2800 if (!NILP (ccl_program))
2801 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2805 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2807 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2808 NEW-NAME is the name of the new charset. Return the new charset.
2810 (charset, new_name))
2812 Lisp_Object new_charset = Qnil;
2813 int id, chars, dimension, columns, graphic, final;
2815 Lisp_Object registry, doc_string, short_name, long_name;
2818 charset = Fget_charset (charset);
2819 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2820 signal_simple_error ("Charset already has reverse-direction charset",
2823 CHECK_SYMBOL (new_name);
2824 if (!NILP (Ffind_charset (new_name)))
2825 signal_simple_error ("Cannot redefine existing charset", new_name);
2827 cs = XCHARSET (charset);
2829 chars = CHARSET_CHARS (cs);
2830 dimension = CHARSET_DIMENSION (cs);
2831 columns = CHARSET_COLUMNS (cs);
2832 id = get_unallocated_leading_byte (dimension);
2834 graphic = CHARSET_GRAPHIC (cs);
2835 final = CHARSET_FINAL (cs);
2836 direction = CHARSET_RIGHT_TO_LEFT;
2837 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2838 direction = CHARSET_LEFT_TO_RIGHT;
2839 doc_string = CHARSET_DOC_STRING (cs);
2840 short_name = CHARSET_SHORT_NAME (cs);
2841 long_name = CHARSET_LONG_NAME (cs);
2842 registry = CHARSET_REGISTRY (cs);
2844 new_charset = make_charset (id, new_name, chars, dimension, columns,
2845 graphic, final, direction, short_name, long_name,
2846 doc_string, registry,
2848 CHARSET_DECODING_TABLE(cs),
2849 CHARSET_UCS_MIN(cs),
2850 CHARSET_UCS_MAX(cs),
2851 CHARSET_CODE_OFFSET(cs),
2852 CHARSET_BYTE_OFFSET(cs)
2858 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2859 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2864 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2865 Define symbol ALIAS as an alias for CHARSET.
2869 CHECK_SYMBOL (alias);
2870 charset = Fget_charset (charset);
2871 return Fputhash (alias, charset, Vcharset_hash_table);
2874 /* #### Reverse direction charsets not yet implemented. */
2876 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2878 Return the reverse-direction charset parallel to CHARSET, if any.
2879 This is the charset with the same properties (in particular, the same
2880 dimension, number of characters per dimension, and final byte) as
2881 CHARSET but whose characters are displayed in the opposite direction.
2885 charset = Fget_charset (charset);
2886 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2890 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2891 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2892 If DIRECTION is omitted, both directions will be checked (left-to-right
2893 will be returned if character sets exist for both directions).
2895 (dimension, chars, final, direction))
2897 int dm, ch, fi, di = -1;
2898 Lisp_Object obj = Qnil;
2900 CHECK_INT (dimension);
2901 dm = XINT (dimension);
2902 if (dm < 1 || dm > 2)
2903 signal_simple_error ("Invalid value for DIMENSION", dimension);
2907 if (ch != 94 && ch != 96)
2908 signal_simple_error ("Invalid value for CHARS", chars);
2910 CHECK_CHAR_COERCE_INT (final);
2912 if (fi < '0' || fi > '~')
2913 signal_simple_error ("Invalid value for FINAL", final);
2915 if (EQ (direction, Ql2r))
2916 di = CHARSET_LEFT_TO_RIGHT;
2917 else if (EQ (direction, Qr2l))
2918 di = CHARSET_RIGHT_TO_LEFT;
2919 else if (!NILP (direction))
2920 signal_simple_error ("Invalid value for DIRECTION", direction);
2922 if (dm == 2 && fi > 0x5F)
2924 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2928 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
2930 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
2933 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
2936 return XCHARSET_NAME (obj);
2940 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2941 Return short name of CHARSET.
2945 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2948 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2949 Return long name of CHARSET.
2953 return XCHARSET_LONG_NAME (Fget_charset (charset));
2956 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2957 Return description of CHARSET.
2961 return XCHARSET_DOC_STRING (Fget_charset (charset));
2964 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2965 Return dimension of CHARSET.
2969 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2972 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2973 Return property PROP of CHARSET, a charset object or symbol naming a charset.
2974 Recognized properties are those listed in `make-charset', as well as
2975 'name and 'doc-string.
2981 charset = Fget_charset (charset);
2982 cs = XCHARSET (charset);
2984 CHECK_SYMBOL (prop);
2985 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2986 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2987 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2988 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2989 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2990 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2991 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2992 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2993 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2994 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2995 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2996 if (EQ (prop, Qdirection))
2997 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2998 if (EQ (prop, Qreverse_direction_charset))
3000 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
3001 /* #### Is this translation OK? If so, error checking sufficient? */
3002 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
3004 signal_simple_error ("Unrecognized charset property name", prop);
3005 return Qnil; /* not reached */
3008 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
3009 Return charset identification number of CHARSET.
3013 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
3016 /* #### We need to figure out which properties we really want to
3019 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
3020 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
3022 (charset, ccl_program))
3024 struct ccl_program test_ccl;
3026 charset = Fget_charset (charset);
3027 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
3028 signal_simple_error ("Invalid ccl-program", ccl_program);
3029 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
3034 invalidate_charset_font_caches (Lisp_Object charset)
3036 /* Invalidate font cache entries for charset on all devices. */
3037 Lisp_Object devcons, concons, hash_table;
3038 DEVICE_LOOP_NO_BREAK (devcons, concons)
3040 struct device *d = XDEVICE (XCAR (devcons));
3041 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
3042 if (!UNBOUNDP (hash_table))
3043 Fclrhash (hash_table);
3047 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
3048 Set the 'registry property of CHARSET to REGISTRY.
3050 (charset, registry))
3052 charset = Fget_charset (charset);
3053 CHECK_STRING (registry);
3054 XCHARSET_REGISTRY (charset) = registry;
3055 invalidate_charset_font_caches (charset);
3056 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
3061 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
3062 Return mapping-table of CHARSET.
3066 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
3069 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
3070 Set mapping-table of CHARSET to TABLE.
3074 struct Lisp_Charset *cs;
3078 charset = Fget_charset (charset);
3079 cs = XCHARSET (charset);
3083 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
3084 make_vector_newer (CHARSET_DECODING_TABLE(cs));
3085 CHARSET_DECODING_TABLE(cs) = Qnil;
3088 else if (VECTORP (table))
3090 int ccs_len = CHARSET_BYTE_SIZE (cs);
3091 int ret = decoding_table_check_elements (table,
3092 CHARSET_DIMENSION (cs),
3097 signal_simple_error ("Too big table", table);
3099 signal_simple_error ("Invalid element is found", table);
3101 signal_simple_error ("Something wrong", table);
3103 CHARSET_DECODING_TABLE(cs) = Qnil;
3106 signal_error (Qwrong_type_argument,
3107 list2 (build_translated_string ("vector-or-nil-p"),
3110 byte_offset = CHARSET_BYTE_OFFSET (cs);
3111 switch (CHARSET_DIMENSION (cs))
3114 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3116 Lisp_Object c = XVECTOR_DATA(table)[i];
3119 put_char_ccs_code_point (c, charset,
3120 make_int (i + byte_offset));
3124 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3126 Lisp_Object v = XVECTOR_DATA(table)[i];
3132 for (j = 0; j < XVECTOR_LENGTH (v); j++)
3134 Lisp_Object c = XVECTOR_DATA(v)[j];
3137 put_char_ccs_code_point
3139 make_int ( ( (i + byte_offset) << 8 )
3145 put_char_ccs_code_point (v, charset,
3146 make_int (i + byte_offset));
3155 /************************************************************************/
3156 /* Lisp primitives for working with characters */
3157 /************************************************************************/
3160 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
3161 Make a character from CHARSET and code-point CODE.
3167 charset = Fget_charset (charset);
3170 if (XCHARSET_GRAPHIC (charset) == 1)
3172 c = DECODE_CHAR (charset, c);
3173 return c >= 0 ? make_char (c) : Qnil;
3176 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
3177 Make a builtin character from CHARSET and code-point CODE.
3183 charset = Fget_charset (charset);
3185 if (EQ (charset, Vcharset_latin_viscii))
3187 Lisp_Object chr = Fdecode_char (charset, code);
3193 (ret = Fget_char_attribute (chr,
3194 Vcharset_latin_viscii_lower,
3197 charset = Vcharset_latin_viscii_lower;
3201 (ret = Fget_char_attribute (chr,
3202 Vcharset_latin_viscii_upper,
3205 charset = Vcharset_latin_viscii_upper;
3212 if (XCHARSET_GRAPHIC (charset) == 1)
3215 c = decode_builtin_char (charset, c);
3216 return c >= 0 ? make_char (c) : Fdecode_char (charset, code);
3220 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
3221 Make a character from CHARSET and octets ARG1 and ARG2.
3222 ARG2 is required only for characters from two-dimensional charsets.
3223 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
3224 character s with caron.
3226 (charset, arg1, arg2))
3230 int lowlim, highlim;
3232 charset = Fget_charset (charset);
3233 cs = XCHARSET (charset);
3235 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
3236 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
3238 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
3240 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
3241 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
3244 /* It is useful (and safe, according to Olivier Galibert) to strip
3245 the 8th bit off ARG1 and ARG2 because it allows programmers to
3246 write (make-char 'latin-iso8859-2 CODE) where code is the actual
3247 Latin 2 code of the character. */
3255 if (a1 < lowlim || a1 > highlim)
3256 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
3258 if (CHARSET_DIMENSION (cs) == 1)
3262 ("Charset is of dimension one; second octet must be nil", arg2);
3263 return make_char (MAKE_CHAR (charset, a1, 0));
3272 a2 = XINT (arg2) & 0x7f;
3274 if (a2 < lowlim || a2 > highlim)
3275 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
3277 return make_char (MAKE_CHAR (charset, a1, a2));
3280 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
3281 Return the character set of CHARACTER.
3285 CHECK_CHAR_COERCE_INT (character);
3287 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
3290 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
3291 Return the octet numbered N (should be 0 or 1) of CHARACTER.
3292 N defaults to 0 if omitted.
3296 Lisp_Object charset;
3299 CHECK_CHAR_COERCE_INT (character);
3301 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
3303 if (NILP (n) || EQ (n, Qzero))
3304 return make_int (octet0);
3305 else if (EQ (n, make_int (1)))
3306 return make_int (octet1);
3308 signal_simple_error ("Octet number must be 0 or 1", n);
3311 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
3312 Return list of charset and one or two position-codes of CHARACTER.
3316 /* This function can GC */
3317 struct gcpro gcpro1, gcpro2;
3318 Lisp_Object charset = Qnil;
3319 Lisp_Object rc = Qnil;
3327 GCPRO2 (charset, rc);
3328 CHECK_CHAR_COERCE_INT (character);
3331 code_point = ENCODE_CHAR (XCHAR (character), charset);
3332 dimension = XCHARSET_DIMENSION (charset);
3333 while (dimension > 0)
3335 rc = Fcons (make_int (code_point & 255), rc);
3339 rc = Fcons (XCHARSET_NAME (charset), rc);
3341 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
3343 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
3345 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
3349 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
3358 #ifdef ENABLE_COMPOSITE_CHARS
3359 /************************************************************************/
3360 /* composite character functions */
3361 /************************************************************************/
3364 lookup_composite_char (Bufbyte *str, int len)
3366 Lisp_Object lispstr = make_string (str, len);
3367 Lisp_Object ch = Fgethash (lispstr,
3368 Vcomposite_char_string2char_hash_table,
3374 if (composite_char_row_next >= 128)
3375 signal_simple_error ("No more composite chars available", lispstr);
3376 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
3377 composite_char_col_next);
3378 Fputhash (make_char (emch), lispstr,
3379 Vcomposite_char_char2string_hash_table);
3380 Fputhash (lispstr, make_char (emch),
3381 Vcomposite_char_string2char_hash_table);
3382 composite_char_col_next++;
3383 if (composite_char_col_next >= 128)
3385 composite_char_col_next = 32;
3386 composite_char_row_next++;
3395 composite_char_string (Emchar ch)
3397 Lisp_Object str = Fgethash (make_char (ch),
3398 Vcomposite_char_char2string_hash_table,
3400 assert (!UNBOUNDP (str));
3404 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
3405 Convert a string into a single composite character.
3406 The character is the result of overstriking all the characters in
3411 CHECK_STRING (string);
3412 return make_char (lookup_composite_char (XSTRING_DATA (string),
3413 XSTRING_LENGTH (string)));
3416 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3417 Return a string of the characters comprising a composite character.
3425 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
3426 signal_simple_error ("Must be composite char", ch);
3427 return composite_char_string (emch);
3429 #endif /* ENABLE_COMPOSITE_CHARS */
3432 /************************************************************************/
3433 /* initialization */
3434 /************************************************************************/
3437 syms_of_mule_charset (void)
3440 INIT_LRECORD_IMPLEMENTATION (uint8_byte_table);
3441 INIT_LRECORD_IMPLEMENTATION (uint16_byte_table);
3442 INIT_LRECORD_IMPLEMENTATION (byte_table);
3443 INIT_LRECORD_IMPLEMENTATION (char_id_table);
3445 INIT_LRECORD_IMPLEMENTATION (charset);
3447 DEFSUBR (Fcharsetp);
3448 DEFSUBR (Ffind_charset);
3449 DEFSUBR (Fget_charset);
3450 DEFSUBR (Fcharset_list);
3451 DEFSUBR (Fcharset_name);
3452 DEFSUBR (Fmake_charset);
3453 DEFSUBR (Fmake_reverse_direction_charset);
3454 /* DEFSUBR (Freverse_direction_charset); */
3455 DEFSUBR (Fdefine_charset_alias);
3456 DEFSUBR (Fcharset_from_attributes);
3457 DEFSUBR (Fcharset_short_name);
3458 DEFSUBR (Fcharset_long_name);
3459 DEFSUBR (Fcharset_description);
3460 DEFSUBR (Fcharset_dimension);
3461 DEFSUBR (Fcharset_property);
3462 DEFSUBR (Fcharset_id);
3463 DEFSUBR (Fset_charset_ccl_program);
3464 DEFSUBR (Fset_charset_registry);
3466 DEFSUBR (Fchar_attribute_list);
3467 DEFSUBR (Ffind_char_attribute_table);
3468 DEFSUBR (Fchar_attribute_alist);
3469 DEFSUBR (Fget_char_attribute);
3470 DEFSUBR (Fput_char_attribute);
3471 DEFSUBR (Fremove_char_attribute);
3472 DEFSUBR (Fdefine_char);
3473 DEFSUBR (Fchar_variants);
3474 DEFSUBR (Fget_composite_char);
3475 DEFSUBR (Fcharset_mapping_table);
3476 DEFSUBR (Fset_charset_mapping_table);
3480 DEFSUBR (Fdecode_char);
3481 DEFSUBR (Fdecode_builtin_char);
3483 DEFSUBR (Fmake_char);
3484 DEFSUBR (Fchar_charset);
3485 DEFSUBR (Fchar_octet);
3486 DEFSUBR (Fsplit_char);
3488 #ifdef ENABLE_COMPOSITE_CHARS
3489 DEFSUBR (Fmake_composite_char);
3490 DEFSUBR (Fcomposite_char_string);
3493 defsymbol (&Qcharsetp, "charsetp");
3494 defsymbol (&Qregistry, "registry");
3495 defsymbol (&Qfinal, "final");
3496 defsymbol (&Qgraphic, "graphic");
3497 defsymbol (&Qdirection, "direction");
3498 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3499 defsymbol (&Qshort_name, "short-name");
3500 defsymbol (&Qlong_name, "long-name");
3502 defsymbol (&Ql2r, "l2r");
3503 defsymbol (&Qr2l, "r2l");
3505 /* Charsets, compatible with FSF 20.3
3506 Naming convention is Script-Charset[-Edition] */
3507 defsymbol (&Qascii, "ascii");
3508 defsymbol (&Qcontrol_1, "control-1");
3509 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3510 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3511 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3512 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3513 defsymbol (&Qthai_tis620, "thai-tis620");
3514 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3515 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3516 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3517 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3518 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3519 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3520 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3521 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
3522 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
3523 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
3524 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
3525 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
3526 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
3527 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
3528 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
3529 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
3531 defsymbol (&Qto_ucs, "=>ucs");
3532 defsymbol (&Q_ucs, "->ucs");
3533 defsymbol (&Q_decomposition, "->decomposition");
3534 defsymbol (&Qcompat, "compat");
3535 defsymbol (&Qisolated, "isolated");
3536 defsymbol (&Qinitial, "initial");
3537 defsymbol (&Qmedial, "medial");
3538 defsymbol (&Qfinal, "final");
3539 defsymbol (&Qvertical, "vertical");
3540 defsymbol (&QnoBreak, "noBreak");
3541 defsymbol (&Qfraction, "fraction");
3542 defsymbol (&Qsuper, "super");
3543 defsymbol (&Qsub, "sub");
3544 defsymbol (&Qcircle, "circle");
3545 defsymbol (&Qsquare, "square");
3546 defsymbol (&Qwide, "wide");
3547 defsymbol (&Qnarrow, "narrow");
3548 defsymbol (&Qsmall, "small");
3549 defsymbol (&Qfont, "font");
3550 defsymbol (&Qucs, "ucs");
3551 defsymbol (&Qucs_bmp, "ucs-bmp");
3552 defsymbol (&Qucs_cns, "ucs-cns");
3553 defsymbol (&Qucs_big5, "ucs-big5");
3554 defsymbol (&Qlatin_viscii, "latin-viscii");
3555 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3556 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3557 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3558 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3559 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3560 defsymbol (&Qideograph_gt, "ideograph-gt");
3561 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
3562 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
3563 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
3564 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
3565 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
3566 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
3567 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
3568 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
3569 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
3570 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
3571 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
3572 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
3573 defsymbol (&Qchinese_big5, "chinese-big5");
3574 defsymbol (&Qmojikyo, "mojikyo");
3575 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
3576 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
3577 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
3578 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
3579 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
3580 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
3581 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
3582 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
3583 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
3584 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
3585 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
3586 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
3587 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
3588 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
3589 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
3590 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
3591 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
3592 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
3593 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
3594 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
3595 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
3596 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
3597 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3599 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3600 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3602 defsymbol (&Qcomposite, "composite");
3606 vars_of_mule_charset (void)
3613 chlook = xnew (struct charset_lookup);
3614 dumpstruct (&chlook, &charset_lookup_description);
3616 /* Table of charsets indexed by leading byte. */
3617 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3618 chlook->charset_by_leading_byte[i] = Qnil;
3621 /* Table of charsets indexed by type/final-byte. */
3622 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3623 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3624 chlook->charset_by_attributes[i][j] = Qnil;
3626 /* Table of charsets indexed by type/final-byte/direction. */
3627 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3628 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3629 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3630 chlook->charset_by_attributes[i][j][k] = Qnil;
3634 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3636 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3637 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3641 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3642 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3643 Leading-code of private TYPE9N charset of column-width 1.
3645 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3649 Vutf_2000_version = build_string("0.17 (Hōryūji)");
3650 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
3651 Version number of UTF-2000.
3654 staticpro (&Vcharacter_composition_table);
3655 Vcharacter_composition_table = make_char_id_table (Qnil);
3657 staticpro (&Vcharacter_variant_table);
3658 Vcharacter_variant_table = make_char_id_table (Qnil);
3660 Vdefault_coded_charset_priority_list = Qnil;
3661 DEFVAR_LISP ("default-coded-charset-priority-list",
3662 &Vdefault_coded_charset_priority_list /*
3663 Default order of preferred coded-character-sets.
3669 complex_vars_of_mule_charset (void)
3671 staticpro (&Vcharset_hash_table);
3672 Vcharset_hash_table =
3673 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3675 /* Predefined character sets. We store them into variables for
3679 staticpro (&Vchar_attribute_hash_table);
3680 Vchar_attribute_hash_table
3681 = make_lisp_hash_table (16, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3683 staticpro (&Vcharset_ucs);
3685 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
3686 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3687 build_string ("UCS"),
3688 build_string ("UCS"),
3689 build_string ("ISO/IEC 10646"),
3691 Qnil, 0, 0xFFFFFFF, 0, 0);
3692 staticpro (&Vcharset_ucs_bmp);
3694 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3695 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3696 build_string ("BMP"),
3697 build_string ("BMP"),
3698 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3699 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
3700 Qnil, 0, 0xFFFF, 0, 0);
3701 staticpro (&Vcharset_ucs_cns);
3703 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
3704 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3705 build_string ("UCS for CNS"),
3706 build_string ("UCS for CNS 11643"),
3707 build_string ("ISO/IEC 10646 for CNS 11643"),
3710 staticpro (&Vcharset_ucs_big5);
3712 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
3713 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3714 build_string ("UCS for Big5"),
3715 build_string ("UCS for Big5"),
3716 build_string ("ISO/IEC 10646 for Big5"),
3720 # define MIN_CHAR_THAI 0
3721 # define MAX_CHAR_THAI 0
3722 # define MIN_CHAR_HEBREW 0
3723 # define MAX_CHAR_HEBREW 0
3724 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3725 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3727 staticpro (&Vcharset_ascii);
3729 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3730 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3731 build_string ("ASCII"),
3732 build_string ("ASCII)"),
3733 build_string ("ASCII (ISO646 IRV)"),
3734 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3735 Qnil, 0, 0x7F, 0, 0);
3736 staticpro (&Vcharset_control_1);
3737 Vcharset_control_1 =
3738 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3739 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3740 build_string ("C1"),
3741 build_string ("Control characters"),
3742 build_string ("Control characters 128-191"),
3744 Qnil, 0x80, 0x9F, 0, 0);
3745 staticpro (&Vcharset_latin_iso8859_1);
3746 Vcharset_latin_iso8859_1 =
3747 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3748 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3749 build_string ("Latin-1"),
3750 build_string ("ISO8859-1 (Latin-1)"),
3751 build_string ("ISO8859-1 (Latin-1)"),
3752 build_string ("iso8859-1"),
3753 Qnil, 0xA0, 0xFF, 0, 32);
3754 staticpro (&Vcharset_latin_iso8859_2);
3755 Vcharset_latin_iso8859_2 =
3756 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3757 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3758 build_string ("Latin-2"),
3759 build_string ("ISO8859-2 (Latin-2)"),
3760 build_string ("ISO8859-2 (Latin-2)"),
3761 build_string ("iso8859-2"),
3763 staticpro (&Vcharset_latin_iso8859_3);
3764 Vcharset_latin_iso8859_3 =
3765 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3766 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3767 build_string ("Latin-3"),
3768 build_string ("ISO8859-3 (Latin-3)"),
3769 build_string ("ISO8859-3 (Latin-3)"),
3770 build_string ("iso8859-3"),
3772 staticpro (&Vcharset_latin_iso8859_4);
3773 Vcharset_latin_iso8859_4 =
3774 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3775 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3776 build_string ("Latin-4"),
3777 build_string ("ISO8859-4 (Latin-4)"),
3778 build_string ("ISO8859-4 (Latin-4)"),
3779 build_string ("iso8859-4"),
3781 staticpro (&Vcharset_thai_tis620);
3782 Vcharset_thai_tis620 =
3783 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3784 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3785 build_string ("TIS620"),
3786 build_string ("TIS620 (Thai)"),
3787 build_string ("TIS620.2529 (Thai)"),
3788 build_string ("tis620"),
3789 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3790 staticpro (&Vcharset_greek_iso8859_7);
3791 Vcharset_greek_iso8859_7 =
3792 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3793 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3794 build_string ("ISO8859-7"),
3795 build_string ("ISO8859-7 (Greek)"),
3796 build_string ("ISO8859-7 (Greek)"),
3797 build_string ("iso8859-7"),
3799 0 /* MIN_CHAR_GREEK */,
3800 0 /* MAX_CHAR_GREEK */, 0, 32);
3801 staticpro (&Vcharset_arabic_iso8859_6);
3802 Vcharset_arabic_iso8859_6 =
3803 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3804 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3805 build_string ("ISO8859-6"),
3806 build_string ("ISO8859-6 (Arabic)"),
3807 build_string ("ISO8859-6 (Arabic)"),
3808 build_string ("iso8859-6"),
3810 staticpro (&Vcharset_hebrew_iso8859_8);
3811 Vcharset_hebrew_iso8859_8 =
3812 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3813 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3814 build_string ("ISO8859-8"),
3815 build_string ("ISO8859-8 (Hebrew)"),
3816 build_string ("ISO8859-8 (Hebrew)"),
3817 build_string ("iso8859-8"),
3818 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3819 staticpro (&Vcharset_katakana_jisx0201);
3820 Vcharset_katakana_jisx0201 =
3821 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3822 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3823 build_string ("JISX0201 Kana"),
3824 build_string ("JISX0201.1976 (Japanese Kana)"),
3825 build_string ("JISX0201.1976 Japanese Kana"),
3826 build_string ("jisx0201\\.1976"),
3828 staticpro (&Vcharset_latin_jisx0201);
3829 Vcharset_latin_jisx0201 =
3830 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3831 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3832 build_string ("JISX0201 Roman"),
3833 build_string ("JISX0201.1976 (Japanese Roman)"),
3834 build_string ("JISX0201.1976 Japanese Roman"),
3835 build_string ("jisx0201\\.1976"),
3837 staticpro (&Vcharset_cyrillic_iso8859_5);
3838 Vcharset_cyrillic_iso8859_5 =
3839 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3840 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3841 build_string ("ISO8859-5"),
3842 build_string ("ISO8859-5 (Cyrillic)"),
3843 build_string ("ISO8859-5 (Cyrillic)"),
3844 build_string ("iso8859-5"),
3846 0 /* MIN_CHAR_CYRILLIC */,
3847 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3848 staticpro (&Vcharset_latin_iso8859_9);
3849 Vcharset_latin_iso8859_9 =
3850 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3851 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3852 build_string ("Latin-5"),
3853 build_string ("ISO8859-9 (Latin-5)"),
3854 build_string ("ISO8859-9 (Latin-5)"),
3855 build_string ("iso8859-9"),
3857 staticpro (&Vcharset_japanese_jisx0208_1978);
3858 Vcharset_japanese_jisx0208_1978 =
3859 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3860 Qjapanese_jisx0208_1978, 94, 2,
3861 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3862 build_string ("JIS X0208:1978"),
3863 build_string ("JIS X0208:1978 (Japanese)"),
3865 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3866 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3868 staticpro (&Vcharset_chinese_gb2312);
3869 Vcharset_chinese_gb2312 =
3870 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3871 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3872 build_string ("GB2312"),
3873 build_string ("GB2312)"),
3874 build_string ("GB2312 Chinese simplified"),
3875 build_string ("gb2312"),
3877 staticpro (&Vcharset_chinese_gb12345);
3878 Vcharset_chinese_gb12345 =
3879 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
3880 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3881 build_string ("G1"),
3882 build_string ("GB 12345)"),
3883 build_string ("GB 12345-1990"),
3884 build_string ("GB12345\\(\\.1990\\)?-0"),
3886 staticpro (&Vcharset_japanese_jisx0208);
3887 Vcharset_japanese_jisx0208 =
3888 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3889 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3890 build_string ("JISX0208"),
3891 build_string ("JIS X0208:1983 (Japanese)"),
3892 build_string ("JIS X0208:1983 Japanese Kanji"),
3893 build_string ("jisx0208\\.1983"),
3896 staticpro (&Vcharset_japanese_jisx0208_1990);
3897 Vcharset_japanese_jisx0208_1990 =
3898 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3899 Qjapanese_jisx0208_1990, 94, 2,
3900 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3901 build_string ("JISX0208-1990"),
3902 build_string ("JIS X0208:1990 (Japanese)"),
3903 build_string ("JIS X0208:1990 Japanese Kanji"),
3904 build_string ("jisx0208\\.1990"),
3906 MIN_CHAR_JIS_X0208_1990,
3907 MAX_CHAR_JIS_X0208_1990, 0, 33);
3909 staticpro (&Vcharset_korean_ksc5601);
3910 Vcharset_korean_ksc5601 =
3911 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3912 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3913 build_string ("KSC5601"),
3914 build_string ("KSC5601 (Korean"),
3915 build_string ("KSC5601 Korean Hangul and Hanja"),
3916 build_string ("ksc5601"),
3918 staticpro (&Vcharset_japanese_jisx0212);
3919 Vcharset_japanese_jisx0212 =
3920 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3921 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3922 build_string ("JISX0212"),
3923 build_string ("JISX0212 (Japanese)"),
3924 build_string ("JISX0212 Japanese Supplement"),
3925 build_string ("jisx0212"),
3928 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3929 staticpro (&Vcharset_chinese_cns11643_1);
3930 Vcharset_chinese_cns11643_1 =
3931 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3932 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3933 build_string ("CNS11643-1"),
3934 build_string ("CNS11643-1 (Chinese traditional)"),
3936 ("CNS 11643 Plane 1 Chinese traditional"),
3937 build_string (CHINESE_CNS_PLANE_RE("1")),
3939 staticpro (&Vcharset_chinese_cns11643_2);
3940 Vcharset_chinese_cns11643_2 =
3941 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3942 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3943 build_string ("CNS11643-2"),
3944 build_string ("CNS11643-2 (Chinese traditional)"),
3946 ("CNS 11643 Plane 2 Chinese traditional"),
3947 build_string (CHINESE_CNS_PLANE_RE("2")),
3950 staticpro (&Vcharset_latin_tcvn5712);
3951 Vcharset_latin_tcvn5712 =
3952 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3953 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3954 build_string ("TCVN 5712"),
3955 build_string ("TCVN 5712 (VSCII-2)"),
3956 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3957 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3959 staticpro (&Vcharset_latin_viscii_lower);
3960 Vcharset_latin_viscii_lower =
3961 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3962 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3963 build_string ("VISCII lower"),
3964 build_string ("VISCII lower (Vietnamese)"),
3965 build_string ("VISCII lower (Vietnamese)"),
3966 build_string ("MULEVISCII-LOWER"),
3968 staticpro (&Vcharset_latin_viscii_upper);
3969 Vcharset_latin_viscii_upper =
3970 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3971 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3972 build_string ("VISCII upper"),
3973 build_string ("VISCII upper (Vietnamese)"),
3974 build_string ("VISCII upper (Vietnamese)"),
3975 build_string ("MULEVISCII-UPPER"),
3977 staticpro (&Vcharset_latin_viscii);
3978 Vcharset_latin_viscii =
3979 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3980 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3981 build_string ("VISCII"),
3982 build_string ("VISCII 1.1 (Vietnamese)"),
3983 build_string ("VISCII 1.1 (Vietnamese)"),
3984 build_string ("VISCII1\\.1"),
3986 staticpro (&Vcharset_chinese_big5);
3987 Vcharset_chinese_big5 =
3988 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3989 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3990 build_string ("Big5"),
3991 build_string ("Big5"),
3992 build_string ("Big5 Chinese traditional"),
3993 build_string ("big5"),
3995 staticpro (&Vcharset_ideograph_gt);
3996 Vcharset_ideograph_gt =
3997 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
3998 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3999 build_string ("GT"),
4000 build_string ("GT"),
4001 build_string ("GT"),
4003 Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0);
4004 #define DEF_GT_PJ(n) \
4005 staticpro (&Vcharset_ideograph_gt_pj_##n); \
4006 Vcharset_ideograph_gt_pj_##n = \
4007 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
4008 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
4009 build_string ("GT-PJ-"#n), \
4010 build_string ("GT (pseudo JIS encoding) part "#n), \
4011 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
4013 ("\\(GT2000PJ-"#n "\\|jisx0208\\.GT2000-"#n "\\)$"), \
4027 staticpro (&Vcharset_ideograph_daikanwa);
4028 Vcharset_ideograph_daikanwa =
4029 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
4030 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4031 build_string ("Daikanwa"),
4032 build_string ("Morohashi's Daikanwa"),
4033 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
4034 build_string ("Daikanwa"),
4035 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
4036 staticpro (&Vcharset_mojikyo);
4038 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
4039 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4040 build_string ("Mojikyo"),
4041 build_string ("Mojikyo"),
4042 build_string ("Konjaku-Mojikyo"),
4044 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
4045 staticpro (&Vcharset_mojikyo_2022_1);
4046 Vcharset_mojikyo_2022_1 =
4047 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
4048 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
4049 build_string ("Mojikyo-2022-1"),
4050 build_string ("Mojikyo ISO-2022 Part 1"),
4051 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
4055 #define DEF_MOJIKYO_PJ(n) \
4056 staticpro (&Vcharset_mojikyo_pj_##n); \
4057 Vcharset_mojikyo_pj_##n = \
4058 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
4059 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
4060 build_string ("Mojikyo-PJ-"#n), \
4061 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
4063 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
4065 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
4077 DEF_MOJIKYO_PJ (10);
4078 DEF_MOJIKYO_PJ (11);
4079 DEF_MOJIKYO_PJ (12);
4080 DEF_MOJIKYO_PJ (13);
4081 DEF_MOJIKYO_PJ (14);
4082 DEF_MOJIKYO_PJ (15);
4083 DEF_MOJIKYO_PJ (16);
4084 DEF_MOJIKYO_PJ (17);
4085 DEF_MOJIKYO_PJ (18);
4086 DEF_MOJIKYO_PJ (19);
4087 DEF_MOJIKYO_PJ (20);
4088 DEF_MOJIKYO_PJ (21);
4090 staticpro (&Vcharset_ethiopic_ucs);
4091 Vcharset_ethiopic_ucs =
4092 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
4093 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4094 build_string ("Ethiopic (UCS)"),
4095 build_string ("Ethiopic (UCS)"),
4096 build_string ("Ethiopic of UCS"),
4097 build_string ("Ethiopic-Unicode"),
4098 Qnil, 0x1200, 0x137F, 0x1200, 0);
4100 staticpro (&Vcharset_chinese_big5_1);
4101 Vcharset_chinese_big5_1 =
4102 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
4103 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
4104 build_string ("Big5"),
4105 build_string ("Big5 (Level-1)"),
4107 ("Big5 Level-1 Chinese traditional"),
4108 build_string ("big5"),
4110 staticpro (&Vcharset_chinese_big5_2);
4111 Vcharset_chinese_big5_2 =
4112 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
4113 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
4114 build_string ("Big5"),
4115 build_string ("Big5 (Level-2)"),
4117 ("Big5 Level-2 Chinese traditional"),
4118 build_string ("big5"),
4121 #ifdef ENABLE_COMPOSITE_CHARS
4122 /* #### For simplicity, we put composite chars into a 96x96 charset.
4123 This is going to lead to problems because you can run out of
4124 room, esp. as we don't yet recycle numbers. */
4125 staticpro (&Vcharset_composite);
4126 Vcharset_composite =
4127 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
4128 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4129 build_string ("Composite"),
4130 build_string ("Composite characters"),
4131 build_string ("Composite characters"),
4134 /* #### not dumped properly */
4135 composite_char_row_next = 32;
4136 composite_char_col_next = 32;
4138 Vcomposite_char_string2char_hash_table =
4139 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
4140 Vcomposite_char_char2string_hash_table =
4141 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
4142 staticpro (&Vcomposite_char_string2char_hash_table);
4143 staticpro (&Vcomposite_char_char2string_hash_table);
4144 #endif /* ENABLE_COMPOSITE_CHARS */