1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_cns;
70 Lisp_Object Vcharset_ucs_big5;
71 Lisp_Object Vcharset_latin_viscii;
72 Lisp_Object Vcharset_latin_tcvn5712;
73 Lisp_Object Vcharset_latin_viscii_lower;
74 Lisp_Object Vcharset_latin_viscii_upper;
75 Lisp_Object Vcharset_chinese_big5;
76 Lisp_Object Vcharset_ideograph_gt;
77 Lisp_Object Vcharset_ideograph_gt_pj_1;
78 Lisp_Object Vcharset_ideograph_gt_pj_2;
79 Lisp_Object Vcharset_ideograph_gt_pj_3;
80 Lisp_Object Vcharset_ideograph_gt_pj_4;
81 Lisp_Object Vcharset_ideograph_gt_pj_5;
82 Lisp_Object Vcharset_ideograph_gt_pj_6;
83 Lisp_Object Vcharset_ideograph_gt_pj_7;
84 Lisp_Object Vcharset_ideograph_gt_pj_8;
85 Lisp_Object Vcharset_ideograph_gt_pj_9;
86 Lisp_Object Vcharset_ideograph_gt_pj_10;
87 Lisp_Object Vcharset_ideograph_gt_pj_11;
88 Lisp_Object Vcharset_ideograph_daikanwa;
89 Lisp_Object Vcharset_mojikyo;
90 Lisp_Object Vcharset_mojikyo_2022_1;
91 Lisp_Object Vcharset_mojikyo_pj_1;
92 Lisp_Object Vcharset_mojikyo_pj_2;
93 Lisp_Object Vcharset_mojikyo_pj_3;
94 Lisp_Object Vcharset_mojikyo_pj_4;
95 Lisp_Object Vcharset_mojikyo_pj_5;
96 Lisp_Object Vcharset_mojikyo_pj_6;
97 Lisp_Object Vcharset_mojikyo_pj_7;
98 Lisp_Object Vcharset_mojikyo_pj_8;
99 Lisp_Object Vcharset_mojikyo_pj_9;
100 Lisp_Object Vcharset_mojikyo_pj_10;
101 Lisp_Object Vcharset_mojikyo_pj_11;
102 Lisp_Object Vcharset_mojikyo_pj_12;
103 Lisp_Object Vcharset_mojikyo_pj_13;
104 Lisp_Object Vcharset_mojikyo_pj_14;
105 Lisp_Object Vcharset_mojikyo_pj_15;
106 Lisp_Object Vcharset_mojikyo_pj_16;
107 Lisp_Object Vcharset_mojikyo_pj_17;
108 Lisp_Object Vcharset_mojikyo_pj_18;
109 Lisp_Object Vcharset_mojikyo_pj_19;
110 Lisp_Object Vcharset_mojikyo_pj_20;
111 Lisp_Object Vcharset_mojikyo_pj_21;
112 Lisp_Object Vcharset_ethiopic_ucs;
114 Lisp_Object Vcharset_chinese_big5_1;
115 Lisp_Object Vcharset_chinese_big5_2;
117 #ifdef ENABLE_COMPOSITE_CHARS
118 Lisp_Object Vcharset_composite;
120 /* Hash tables for composite chars. One maps string representing
121 composed chars to their equivalent chars; one goes the
123 Lisp_Object Vcomposite_char_char2string_hash_table;
124 Lisp_Object Vcomposite_char_string2char_hash_table;
126 static int composite_char_row_next;
127 static int composite_char_col_next;
129 #endif /* ENABLE_COMPOSITE_CHARS */
131 struct charset_lookup *chlook;
133 static const struct lrecord_description charset_lookup_description_1[] = {
134 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
143 static const struct struct_description charset_lookup_description = {
144 sizeof (struct charset_lookup),
145 charset_lookup_description_1
149 /* Table of number of bytes in the string representation of a character
150 indexed by the first byte of that representation.
152 rep_bytes_by_first_byte(c) is more efficient than the equivalent
153 canonical computation:
155 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
157 const Bytecount rep_bytes_by_first_byte[0xA0] =
158 { /* 0x00 - 0x7f are for straight ASCII */
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
165 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
166 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
167 /* 0x80 - 0x8f are for Dimension-1 official charsets */
169 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
171 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
173 /* 0x90 - 0x9d are for Dimension-2 official charsets */
174 /* 0x9e is for Dimension-1 private charsets */
175 /* 0x9f is for Dimension-2 private charsets */
176 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
182 #define BT_UINT8_MIN 0
183 #define BT_UINT8_MAX (UCHAR_MAX - 3)
184 #define BT_UINT8_t (UCHAR_MAX - 2)
185 #define BT_UINT8_nil (UCHAR_MAX - 1)
186 #define BT_UINT8_unbound UCHAR_MAX
188 INLINE_HEADER int INT_UINT8_P (Lisp_Object obj);
189 INLINE_HEADER int UINT8_VALUE_P (Lisp_Object obj);
190 INLINE_HEADER unsigned char UINT8_ENCODE (Lisp_Object obj);
191 INLINE_HEADER Lisp_Object UINT8_DECODE (unsigned char n);
192 INLINE_HEADER unsigned short UINT8_TO_UINT16 (unsigned char n);
195 INT_UINT8_P (Lisp_Object obj)
199 int num = XINT (obj);
201 return (BT_UINT8_MIN <= num) && (num <= BT_UINT8_MAX);
208 UINT8_VALUE_P (Lisp_Object obj)
210 return EQ (obj, Qunbound)
211 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT8_P (obj);
214 INLINE_HEADER unsigned char
215 UINT8_ENCODE (Lisp_Object obj)
217 if (EQ (obj, Qunbound))
218 return BT_UINT8_unbound;
219 else if (EQ (obj, Qnil))
221 else if (EQ (obj, Qt))
227 INLINE_HEADER Lisp_Object
228 UINT8_DECODE (unsigned char n)
230 if (n == BT_UINT8_unbound)
232 else if (n == BT_UINT8_nil)
234 else if (n == BT_UINT8_t)
241 mark_uint8_byte_table (Lisp_Object obj)
247 print_uint8_byte_table (Lisp_Object obj,
248 Lisp_Object printcharfun, int escapeflag)
250 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
252 struct gcpro gcpro1, gcpro2;
253 GCPRO2 (obj, printcharfun);
255 write_c_string ("\n#<uint8-byte-table", printcharfun);
256 for (i = 0; i < 256; i++)
258 unsigned char n = bte->property[i];
260 write_c_string ("\n ", printcharfun);
261 write_c_string (" ", printcharfun);
262 if (n == BT_UINT8_unbound)
263 write_c_string ("void", printcharfun);
264 else if (n == BT_UINT8_nil)
265 write_c_string ("nil", printcharfun);
266 else if (n == BT_UINT8_t)
267 write_c_string ("t", printcharfun);
272 sprintf (buf, "%hd", n);
273 write_c_string (buf, printcharfun);
277 write_c_string (">", printcharfun);
281 uint8_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
283 Lisp_Uint8_Byte_Table *te1 = XUINT8_BYTE_TABLE (obj1);
284 Lisp_Uint8_Byte_Table *te2 = XUINT8_BYTE_TABLE (obj2);
287 for (i = 0; i < 256; i++)
288 if (te1->property[i] != te2->property[i])
294 uint8_byte_table_hash (Lisp_Object obj, int depth)
296 Lisp_Uint8_Byte_Table *te = XUINT8_BYTE_TABLE (obj);
300 for (i = 0; i < 256; i++)
301 hash = HASH2 (hash, te->property[i]);
305 DEFINE_LRECORD_IMPLEMENTATION ("uint8-byte-table", uint8_byte_table,
306 mark_uint8_byte_table,
307 print_uint8_byte_table,
308 0, uint8_byte_table_equal,
309 uint8_byte_table_hash,
310 0 /* uint8_byte_table_description */,
311 Lisp_Uint8_Byte_Table);
314 make_uint8_byte_table (unsigned char initval)
318 Lisp_Uint8_Byte_Table *cte;
320 cte = alloc_lcrecord_type (Lisp_Uint8_Byte_Table,
321 &lrecord_uint8_byte_table);
323 for (i = 0; i < 256; i++)
324 cte->property[i] = initval;
326 XSETUINT8_BYTE_TABLE (obj, cte);
331 uint8_byte_table_same_value_p (Lisp_Object obj)
333 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
334 unsigned char v0 = bte->property[0];
337 for (i = 1; i < 256; i++)
339 if (bte->property[i] != v0)
346 #define BT_UINT16_MIN 0
347 #define BT_UINT16_MAX (USHRT_MAX - 3)
348 #define BT_UINT16_t (USHRT_MAX - 2)
349 #define BT_UINT16_nil (USHRT_MAX - 1)
350 #define BT_UINT16_unbound USHRT_MAX
352 INLINE_HEADER int INT_UINT16_P (Lisp_Object obj);
353 INLINE_HEADER int UINT16_VALUE_P (Lisp_Object obj);
354 INLINE_HEADER unsigned short UINT16_ENCODE (Lisp_Object obj);
355 INLINE_HEADER Lisp_Object UINT16_DECODE (unsigned short us);
358 INT_UINT16_P (Lisp_Object obj)
362 int num = XINT (obj);
364 return (BT_UINT16_MIN <= num) && (num <= BT_UINT16_MAX);
371 UINT16_VALUE_P (Lisp_Object obj)
373 return EQ (obj, Qunbound)
374 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT16_P (obj);
377 INLINE_HEADER unsigned short
378 UINT16_ENCODE (Lisp_Object obj)
380 if (EQ (obj, Qunbound))
381 return BT_UINT16_unbound;
382 else if (EQ (obj, Qnil))
383 return BT_UINT16_nil;
384 else if (EQ (obj, Qt))
390 INLINE_HEADER Lisp_Object
391 UINT16_DECODE (unsigned short n)
393 if (n == BT_UINT16_unbound)
395 else if (n == BT_UINT16_nil)
397 else if (n == BT_UINT16_t)
403 INLINE_HEADER unsigned short
404 UINT8_TO_UINT16 (unsigned char n)
406 if (n == BT_UINT8_unbound)
407 return BT_UINT16_unbound;
408 else if (n == BT_UINT8_nil)
409 return BT_UINT16_nil;
410 else if (n == BT_UINT8_t)
417 mark_uint16_byte_table (Lisp_Object obj)
423 print_uint16_byte_table (Lisp_Object obj,
424 Lisp_Object printcharfun, int escapeflag)
426 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
428 struct gcpro gcpro1, gcpro2;
429 GCPRO2 (obj, printcharfun);
431 write_c_string ("\n#<uint16-byte-table", printcharfun);
432 for (i = 0; i < 256; i++)
434 unsigned short n = bte->property[i];
436 write_c_string ("\n ", printcharfun);
437 write_c_string (" ", printcharfun);
438 if (n == BT_UINT16_unbound)
439 write_c_string ("void", printcharfun);
440 else if (n == BT_UINT16_nil)
441 write_c_string ("nil", printcharfun);
442 else if (n == BT_UINT16_t)
443 write_c_string ("t", printcharfun);
448 sprintf (buf, "%hd", n);
449 write_c_string (buf, printcharfun);
453 write_c_string (">", printcharfun);
457 uint16_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
459 Lisp_Uint16_Byte_Table *te1 = XUINT16_BYTE_TABLE (obj1);
460 Lisp_Uint16_Byte_Table *te2 = XUINT16_BYTE_TABLE (obj2);
463 for (i = 0; i < 256; i++)
464 if (te1->property[i] != te2->property[i])
470 uint16_byte_table_hash (Lisp_Object obj, int depth)
472 Lisp_Uint16_Byte_Table *te = XUINT16_BYTE_TABLE (obj);
476 for (i = 0; i < 256; i++)
477 hash = HASH2 (hash, te->property[i]);
481 DEFINE_LRECORD_IMPLEMENTATION ("uint16-byte-table", uint16_byte_table,
482 mark_uint16_byte_table,
483 print_uint16_byte_table,
484 0, uint16_byte_table_equal,
485 uint16_byte_table_hash,
486 0 /* uint16_byte_table_description */,
487 Lisp_Uint16_Byte_Table);
490 make_uint16_byte_table (unsigned short initval)
494 Lisp_Uint16_Byte_Table *cte;
496 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
497 &lrecord_uint16_byte_table);
499 for (i = 0; i < 256; i++)
500 cte->property[i] = initval;
502 XSETUINT16_BYTE_TABLE (obj, cte);
507 expand_uint8_byte_table_to_uint16 (Lisp_Object table)
511 Lisp_Uint8_Byte_Table* bte = XUINT8_BYTE_TABLE(table);
512 Lisp_Uint16_Byte_Table* cte;
514 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
515 &lrecord_uint16_byte_table);
516 for (i = 0; i < 256; i++)
518 cte->property[i] = UINT8_TO_UINT16 (bte->property[i]);
520 XSETUINT16_BYTE_TABLE (obj, cte);
525 uint16_byte_table_same_value_p (Lisp_Object obj)
527 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
528 unsigned short v0 = bte->property[0];
531 for (i = 1; i < 256; i++)
533 if (bte->property[i] != v0)
541 mark_byte_table (Lisp_Object obj)
543 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
546 for (i = 0; i < 256; i++)
548 mark_object (cte->property[i]);
554 print_byte_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
556 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
558 struct gcpro gcpro1, gcpro2;
559 GCPRO2 (obj, printcharfun);
561 write_c_string ("\n#<byte-table", printcharfun);
562 for (i = 0; i < 256; i++)
564 Lisp_Object elt = bte->property[i];
566 write_c_string ("\n ", printcharfun);
567 write_c_string (" ", printcharfun);
568 if (EQ (elt, Qunbound))
569 write_c_string ("void", printcharfun);
571 print_internal (elt, printcharfun, escapeflag);
574 write_c_string (">", printcharfun);
578 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
580 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
581 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
584 for (i = 0; i < 256; i++)
585 if (BYTE_TABLE_P (cte1->property[i]))
587 if (BYTE_TABLE_P (cte2->property[i]))
589 if (!byte_table_equal (cte1->property[i],
590 cte2->property[i], depth + 1))
597 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
603 byte_table_hash (Lisp_Object obj, int depth)
605 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
607 return internal_array_hash (cte->property, 256, depth);
610 static const struct lrecord_description byte_table_description[] = {
611 { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Byte_Table, property), 256 },
615 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
620 byte_table_description,
624 make_byte_table (Lisp_Object initval)
628 Lisp_Byte_Table *cte;
630 cte = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
632 for (i = 0; i < 256; i++)
633 cte->property[i] = initval;
635 XSETBYTE_TABLE (obj, cte);
640 byte_table_same_value_p (Lisp_Object obj)
642 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
643 Lisp_Object v0 = bte->property[0];
646 for (i = 1; i < 256; i++)
648 if (!internal_equal (bte->property[i], v0, 0))
655 Lisp_Object get_byte_table (Lisp_Object table, unsigned char idx);
656 Lisp_Object put_byte_table (Lisp_Object table, unsigned char idx,
660 get_byte_table (Lisp_Object table, unsigned char idx)
662 if (UINT8_BYTE_TABLE_P (table))
663 return UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[idx]);
664 else if (UINT16_BYTE_TABLE_P (table))
665 return UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[idx]);
666 else if (BYTE_TABLE_P (table))
667 return XBYTE_TABLE(table)->property[idx];
673 put_byte_table (Lisp_Object table, unsigned char idx, Lisp_Object value)
675 if (UINT8_BYTE_TABLE_P (table))
677 if (UINT8_VALUE_P (value))
679 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
680 if (!UINT8_BYTE_TABLE_P (value) &&
681 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
682 && uint8_byte_table_same_value_p (table))
687 else if (UINT16_VALUE_P (value))
689 Lisp_Object new = expand_uint8_byte_table_to_uint16 (table);
691 XUINT16_BYTE_TABLE(new)->property[idx] = UINT16_ENCODE (value);
696 Lisp_Object new = make_byte_table (Qnil);
699 for (i = 0; i < 256; i++)
701 XBYTE_TABLE(new)->property[i]
702 = UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[i]);
704 XBYTE_TABLE(new)->property[idx] = value;
708 else if (UINT16_BYTE_TABLE_P (table))
710 if (UINT16_VALUE_P (value))
712 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
713 if (!UINT8_BYTE_TABLE_P (value) &&
714 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
715 && uint16_byte_table_same_value_p (table))
722 Lisp_Object new = make_byte_table (Qnil);
725 for (i = 0; i < 256; i++)
727 XBYTE_TABLE(new)->property[i]
728 = UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[i]);
730 XBYTE_TABLE(new)->property[idx] = value;
734 else if (BYTE_TABLE_P (table))
736 XBYTE_TABLE(table)->property[idx] = value;
737 if (!UINT8_BYTE_TABLE_P (value) &&
738 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
739 && byte_table_same_value_p (table))
744 else if (!internal_equal (table, value, 0))
746 if (UINT8_VALUE_P (table) && UINT8_VALUE_P (value))
748 table = make_uint8_byte_table (UINT8_ENCODE (table));
749 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
751 else if (UINT16_VALUE_P (table) && UINT16_VALUE_P (value))
753 table = make_uint16_byte_table (UINT16_ENCODE (table));
754 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
758 table = make_byte_table (table);
759 XBYTE_TABLE(table)->property[idx] = value;
766 mark_char_id_table (Lisp_Object obj)
768 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
774 print_char_id_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
776 Lisp_Object table = XCHAR_ID_TABLE (obj)->table;
778 struct gcpro gcpro1, gcpro2;
779 GCPRO2 (obj, printcharfun);
781 write_c_string ("#<char-id-table ", printcharfun);
782 for (i = 0; i < 256; i++)
784 Lisp_Object elt = get_byte_table (table, i);
785 if (i != 0) write_c_string ("\n ", printcharfun);
786 if (EQ (elt, Qunbound))
787 write_c_string ("void", printcharfun);
789 print_internal (elt, printcharfun, escapeflag);
792 write_c_string (">", printcharfun);
796 char_id_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
798 Lisp_Object table1 = XCHAR_ID_TABLE (obj1)->table;
799 Lisp_Object table2 = XCHAR_ID_TABLE (obj2)->table;
802 for (i = 0; i < 256; i++)
804 if (!internal_equal (get_byte_table (table1, i),
805 get_byte_table (table2, i), 0))
812 char_id_table_hash (Lisp_Object obj, int depth)
814 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
816 return char_id_table_hash (cte->table, depth + 1);
819 static const struct lrecord_description char_id_table_description[] = {
820 { XD_LISP_OBJECT, offsetof(Lisp_Char_ID_Table, table) },
824 DEFINE_LRECORD_IMPLEMENTATION ("char-id-table", char_id_table,
827 0, char_id_table_equal,
829 char_id_table_description,
833 make_char_id_table (Lisp_Object initval)
836 Lisp_Char_ID_Table *cte;
838 cte = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
840 cte->table = make_byte_table (initval);
842 XSETCHAR_ID_TABLE (obj, cte);
848 get_char_id_table (Emchar ch, Lisp_Object table)
850 unsigned int code = ch;
857 (XCHAR_ID_TABLE (table)->table,
858 (unsigned char)(code >> 24)),
859 (unsigned char) (code >> 16)),
860 (unsigned char) (code >> 8)),
861 (unsigned char) code);
864 void put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table);
866 put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table)
868 unsigned int code = ch;
869 Lisp_Object table1, table2, table3, table4;
871 table1 = XCHAR_ID_TABLE (table)->table;
872 table2 = get_byte_table (table1, (unsigned char)(code >> 24));
873 table3 = get_byte_table (table2, (unsigned char)(code >> 16));
874 table4 = get_byte_table (table3, (unsigned char)(code >> 8));
876 table4 = put_byte_table (table4, (unsigned char)code, value);
877 table3 = put_byte_table (table3, (unsigned char)(code >> 8), table4);
878 table2 = put_byte_table (table2, (unsigned char)(code >> 16), table3);
879 XCHAR_ID_TABLE (table)->table
880 = put_byte_table (table1, (unsigned char)(code >> 24), table2);
884 Lisp_Object Vchar_attribute_hash_table;
885 Lisp_Object Vcharacter_composition_table;
886 Lisp_Object Vcharacter_variant_table;
888 Lisp_Object Qideograph_daikanwa;
889 Lisp_Object Q_decomposition;
894 Lisp_Object Qisolated;
895 Lisp_Object Qinitial;
898 Lisp_Object Qvertical;
899 Lisp_Object QnoBreak;
900 Lisp_Object Qfraction;
910 Emchar to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
912 Lisp_Object put_char_ccs_code_point (Lisp_Object character,
913 Lisp_Object ccs, Lisp_Object value);
914 Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
917 to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
923 else if (EQ (v, Qcompat))
925 else if (EQ (v, Qisolated))
927 else if (EQ (v, Qinitial))
929 else if (EQ (v, Qmedial))
931 else if (EQ (v, Qfinal))
933 else if (EQ (v, Qvertical))
935 else if (EQ (v, QnoBreak))
937 else if (EQ (v, Qfraction))
939 else if (EQ (v, Qsuper))
941 else if (EQ (v, Qsub))
943 else if (EQ (v, Qcircle))
945 else if (EQ (v, Qsquare))
947 else if (EQ (v, Qwide))
949 else if (EQ (v, Qnarrow))
951 else if (EQ (v, Qsmall))
953 else if (EQ (v, Qfont))
956 signal_simple_error (err_msg, err_arg);
959 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
960 Return character corresponding with list.
964 Lisp_Object table = Vcharacter_composition_table;
965 Lisp_Object rest = list;
969 Lisp_Object v = Fcar (rest);
971 Emchar c = to_char_id (v, "Invalid value for composition", list);
973 ret = get_char_id_table (c, table);
978 if (!CHAR_ID_TABLE_P (ret))
983 else if (!CONSP (rest))
985 else if (CHAR_ID_TABLE_P (ret))
988 signal_simple_error ("Invalid table is found with", list);
990 signal_simple_error ("Invalid value for composition", list);
993 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
994 Return variants of CHARACTER.
998 CHECK_CHAR (character);
999 return Fcopy_list (get_char_id_table (XCHAR (character),
1000 Vcharacter_variant_table));
1004 /* We store the char-attributes in hash tables with the names as the
1005 key and the actual char-id-table object as the value. Occasionally
1006 we need to use them in a list format. These routines provide us
1008 struct char_attribute_list_closure
1010 Lisp_Object *char_attribute_list;
1014 add_char_attribute_to_list_mapper (Lisp_Object key, Lisp_Object value,
1015 void *char_attribute_list_closure)
1017 /* This function can GC */
1018 struct char_attribute_list_closure *calcl
1019 = (struct char_attribute_list_closure*) char_attribute_list_closure;
1020 Lisp_Object *char_attribute_list = calcl->char_attribute_list;
1022 *char_attribute_list = Fcons (key, *char_attribute_list);
1026 DEFUN ("char-attribute-list", Fchar_attribute_list, 0, 0, 0, /*
1027 Return the list of all existing character attributes except coded-charsets.
1031 Lisp_Object char_attribute_list = Qnil;
1032 struct gcpro gcpro1;
1033 struct char_attribute_list_closure char_attribute_list_closure;
1035 GCPRO1 (char_attribute_list);
1036 char_attribute_list_closure.char_attribute_list = &char_attribute_list;
1037 elisp_maphash (add_char_attribute_to_list_mapper,
1038 Vchar_attribute_hash_table,
1039 &char_attribute_list_closure);
1041 return char_attribute_list;
1044 DEFUN ("find-char-attribute-table", Ffind_char_attribute_table, 1, 1, 0, /*
1045 Return char-id-table corresponding to ATTRIBUTE.
1049 return Fgethash (attribute, Vchar_attribute_hash_table, Qnil);
1053 /* We store the char-id-tables in hash tables with the attributes as
1054 the key and the actual char-id-table object as the value. Each
1055 char-id-table stores values of an attribute corresponding with
1056 characters. Occasionally we need to get attributes of a character
1057 in a association-list format. These routines provide us with
1059 struct char_attribute_alist_closure
1062 Lisp_Object *char_attribute_alist;
1066 add_char_attribute_alist_mapper (Lisp_Object key, Lisp_Object value,
1067 void *char_attribute_alist_closure)
1069 /* This function can GC */
1070 struct char_attribute_alist_closure *caacl =
1071 (struct char_attribute_alist_closure*) char_attribute_alist_closure;
1072 Lisp_Object ret = get_char_id_table (caacl->char_id, value);
1073 if (!UNBOUNDP (ret))
1075 Lisp_Object *char_attribute_alist = caacl->char_attribute_alist;
1076 *char_attribute_alist
1077 = Fcons (Fcons (key, ret), *char_attribute_alist);
1082 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
1083 Return the alist of attributes of CHARACTER.
1087 Lisp_Object alist = Qnil;
1090 CHECK_CHAR (character);
1092 struct gcpro gcpro1;
1093 struct char_attribute_alist_closure char_attribute_alist_closure;
1096 char_attribute_alist_closure.char_id = XCHAR (character);
1097 char_attribute_alist_closure.char_attribute_alist = &alist;
1098 elisp_maphash (add_char_attribute_alist_mapper,
1099 Vchar_attribute_hash_table,
1100 &char_attribute_alist_closure);
1104 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1106 Lisp_Object ccs = chlook->charset_by_leading_byte[i];
1110 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1113 if ( CHAR_ID_TABLE_P (encoding_table)
1114 && INTP (cpos = get_char_id_table (XCHAR (character),
1117 alist = Fcons (Fcons (ccs, cpos), alist);
1124 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 3, 0, /*
1125 Return the value of CHARACTER's ATTRIBUTE.
1126 Return DEFAULT-VALUE if the value is not exist.
1128 (character, attribute, default_value))
1132 CHECK_CHAR (character);
1133 if (!NILP (ccs = Ffind_charset (attribute)))
1135 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1137 if (CHAR_ID_TABLE_P (encoding_table))
1138 return get_char_id_table (XCHAR (character), encoding_table);
1142 Lisp_Object table = Fgethash (attribute,
1143 Vchar_attribute_hash_table,
1145 if (!UNBOUNDP (table))
1147 Lisp_Object ret = get_char_id_table (XCHAR (character), table);
1148 if (!UNBOUNDP (ret))
1152 return default_value;
1155 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
1156 Store CHARACTER's ATTRIBUTE with VALUE.
1158 (character, attribute, value))
1162 CHECK_CHAR (character);
1163 ccs = Ffind_charset (attribute);
1166 return put_char_ccs_code_point (character, ccs, value);
1168 else if (EQ (attribute, Q_decomposition))
1173 signal_simple_error ("Invalid value for ->decomposition",
1176 if (CONSP (Fcdr (value)))
1178 Lisp_Object rest = value;
1179 Lisp_Object table = Vcharacter_composition_table;
1183 GET_EXTERNAL_LIST_LENGTH (rest, len);
1184 seq = make_vector (len, Qnil);
1186 while (CONSP (rest))
1188 Lisp_Object v = Fcar (rest);
1191 = to_char_id (v, "Invalid value for ->decomposition", value);
1194 XVECTOR_DATA(seq)[i++] = v;
1196 XVECTOR_DATA(seq)[i++] = make_char (c);
1200 put_char_id_table (c, character, table);
1205 ntable = get_char_id_table (c, table);
1206 if (!CHAR_ID_TABLE_P (ntable))
1208 ntable = make_char_id_table (Qnil);
1209 put_char_id_table (c, ntable, table);
1217 Lisp_Object v = Fcar (value);
1221 Emchar c = XINT (v);
1223 = get_char_id_table (c, Vcharacter_variant_table);
1225 if (NILP (Fmemq (v, ret)))
1227 put_char_id_table (c, Fcons (character, ret),
1228 Vcharacter_variant_table);
1231 seq = make_vector (1, v);
1235 else if (EQ (attribute, Qto_ucs) || EQ (attribute, Q_ucs))
1241 signal_simple_error ("Invalid value for ->ucs", value);
1245 ret = get_char_id_table (c, Vcharacter_variant_table);
1246 if (NILP (Fmemq (character, ret)))
1248 put_char_id_table (c, Fcons (character, ret),
1249 Vcharacter_variant_table);
1252 if (EQ (attribute, Q_ucs))
1253 attribute = Qto_ucs;
1257 Lisp_Object table = Fgethash (attribute,
1258 Vchar_attribute_hash_table,
1263 table = make_char_id_table (Qunbound);
1264 Fputhash (attribute, table, Vchar_attribute_hash_table);
1266 put_char_id_table (XCHAR (character), value, table);
1271 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
1272 Remove CHARACTER's ATTRIBUTE.
1274 (character, attribute))
1278 CHECK_CHAR (character);
1279 ccs = Ffind_charset (attribute);
1282 return remove_char_ccs (character, ccs);
1286 Lisp_Object table = Fgethash (attribute,
1287 Vchar_attribute_hash_table,
1289 if (!UNBOUNDP (table))
1291 put_char_id_table (XCHAR (character), Qunbound, table);
1298 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
1300 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
1302 /* ad-hoc method for `ascii' */
1303 if ((CHARSET_CHARS (cs) == 94) &&
1304 (CHARSET_BYTE_OFFSET (cs) != 33))
1305 return 128 - CHARSET_BYTE_OFFSET (cs);
1307 return CHARSET_CHARS (cs);
1310 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
1312 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
1314 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
1318 if (XVECTOR_LENGTH (v) > ccs_len)
1321 for (i = 0; i < XVECTOR_LENGTH (v); i++)
1323 Lisp_Object c = XVECTOR_DATA(v)[i];
1325 if (!NILP (c) && !CHARP (c))
1329 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
1341 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1344 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1354 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1355 nv = XVECTOR_DATA(v)[i];
1361 XVECTOR_DATA(v)[i] = Qnil;
1365 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1366 int code_point, Lisp_Object character);
1368 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1369 int code_point, Lisp_Object character)
1373 int ccs_len = XVECTOR_LENGTH (v);
1378 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1379 nv = XVECTOR_DATA(v)[i];
1383 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
1389 XVECTOR_DATA(v)[i] = character;
1393 put_char_ccs_code_point (Lisp_Object character,
1394 Lisp_Object ccs, Lisp_Object value)
1396 Lisp_Object encoding_table;
1398 if (!EQ (XCHARSET_NAME (ccs), Qucs)
1399 || (XCHAR (character) != XINT (value)))
1401 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
1402 int dim = XCHARSET_DIMENSION (ccs);
1403 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
1404 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
1408 { /* obsolete representation: value must be a list of bytes */
1409 Lisp_Object ret = Fcar (value);
1413 signal_simple_error ("Invalid value for coded-charset", value);
1414 code_point = XINT (ret);
1415 if (XCHARSET_GRAPHIC (ccs) == 1)
1417 rest = Fcdr (value);
1418 while (!NILP (rest))
1423 signal_simple_error ("Invalid value for coded-charset",
1427 signal_simple_error ("Invalid value for coded-charset",
1430 if (XCHARSET_GRAPHIC (ccs) == 1)
1432 code_point = (code_point << 8) | j;
1435 value = make_int (code_point);
1437 else if (INTP (value))
1439 code_point = XINT (value);
1440 if (XCHARSET_GRAPHIC (ccs) == 1)
1442 code_point &= 0x7F7F7F7F;
1443 value = make_int (code_point);
1447 signal_simple_error ("Invalid value for coded-charset", value);
1451 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1454 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
1459 XCHARSET_DECODING_TABLE (ccs)
1460 = v = make_older_vector (ccs_len, Qnil);
1463 decoding_table_put_char (v, dim, byte_offset, code_point, character);
1465 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
1467 XCHARSET_ENCODING_TABLE (ccs)
1468 = encoding_table = make_char_id_table (Qnil);
1470 put_char_id_table (XCHAR (character), value, encoding_table);
1475 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
1477 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1478 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1480 if (VECTORP (decoding_table))
1482 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1486 decoding_table_remove_char (decoding_table,
1487 XCHARSET_DIMENSION (ccs),
1488 XCHARSET_BYTE_OFFSET (ccs),
1492 if (CHAR_ID_TABLE_P (encoding_table))
1494 put_char_id_table (XCHAR (character), Qnil, encoding_table);
1499 EXFUN (Fmake_char, 3);
1500 EXFUN (Fdecode_char, 2);
1502 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
1503 Store character's ATTRIBUTES.
1507 Lisp_Object rest = attributes;
1508 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
1509 Lisp_Object character;
1513 while (CONSP (rest))
1515 Lisp_Object cell = Fcar (rest);
1519 signal_simple_error ("Invalid argument", attributes);
1520 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
1521 && ((XCHARSET_FINAL (ccs) != 0) ||
1522 (XCHARSET_UCS_MAX (ccs) > 0)) )
1526 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1528 character = Fdecode_char (ccs, cell);
1529 if (!NILP (character))
1530 goto setup_attributes;
1534 if ( (!NILP (code = Fcdr (Fassq (Qto_ucs, attributes)))) ||
1535 (!NILP (code = Fcdr (Fassq (Q_ucs, attributes)))) )
1539 signal_simple_error ("Invalid argument", attributes);
1541 character = make_char (XINT (code) + 0x100000);
1542 goto setup_attributes;
1546 else if (!INTP (code))
1547 signal_simple_error ("Invalid argument", attributes);
1549 character = make_char (XINT (code));
1553 while (CONSP (rest))
1555 Lisp_Object cell = Fcar (rest);
1558 signal_simple_error ("Invalid argument", attributes);
1560 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
1566 Lisp_Object Vutf_2000_version;
1570 int leading_code_private_11;
1573 Lisp_Object Qcharsetp;
1575 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1576 Lisp_Object Qregistry, Qfinal, Qgraphic;
1577 Lisp_Object Qdirection;
1578 Lisp_Object Qreverse_direction_charset;
1579 Lisp_Object Qleading_byte;
1580 Lisp_Object Qshort_name, Qlong_name;
1594 Qcyrillic_iso8859_5,
1596 Qjapanese_jisx0208_1978,
1600 Qjapanese_jisx0208_1990,
1603 Qchinese_cns11643_1,
1604 Qchinese_cns11643_2,
1611 Qlatin_viscii_lower,
1612 Qlatin_viscii_upper,
1613 Qvietnamese_viscii_lower,
1614 Qvietnamese_viscii_upper,
1626 Qideograph_gt_pj_10,
1627 Qideograph_gt_pj_11,
1657 Lisp_Object Ql2r, Qr2l;
1659 Lisp_Object Vcharset_hash_table;
1661 /* Composite characters are characters constructed by overstriking two
1662 or more regular characters.
1664 1) The old Mule implementation involves storing composite characters
1665 in a buffer as a tag followed by all of the actual characters
1666 used to make up the composite character. I think this is a bad
1667 idea; it greatly complicates code that wants to handle strings
1668 one character at a time because it has to deal with the possibility
1669 of great big ungainly characters. It's much more reasonable to
1670 simply store an index into a table of composite characters.
1672 2) The current implementation only allows for 16,384 separate
1673 composite characters over the lifetime of the XEmacs process.
1674 This could become a potential problem if the user
1675 edited lots of different files that use composite characters.
1676 Due to FSF bogosity, increasing the number of allowable
1677 composite characters under Mule would decrease the number
1678 of possible faces that can exist. Mule already has shrunk
1679 this to 2048, and further shrinkage would become uncomfortable.
1680 No such problems exist in XEmacs.
1682 Composite characters could be represented as 0x80 C1 C2 C3,
1683 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1684 for slightly under 2^20 (one million) composite characters
1685 over the XEmacs process lifetime, and you only need to
1686 increase the size of a Mule character from 19 to 21 bits.
1687 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1688 85 million (slightly over 2^26) composite characters. */
1691 /************************************************************************/
1692 /* Basic Emchar functions */
1693 /************************************************************************/
1695 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1696 string in STR. Returns the number of bytes stored.
1697 Do not call this directly. Use the macro set_charptr_emchar() instead.
1701 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1707 Lisp_Object charset;
1716 else if ( c <= 0x7ff )
1718 *p++ = (c >> 6) | 0xc0;
1719 *p++ = (c & 0x3f) | 0x80;
1721 else if ( c <= 0xffff )
1723 *p++ = (c >> 12) | 0xe0;
1724 *p++ = ((c >> 6) & 0x3f) | 0x80;
1725 *p++ = (c & 0x3f) | 0x80;
1727 else if ( c <= 0x1fffff )
1729 *p++ = (c >> 18) | 0xf0;
1730 *p++ = ((c >> 12) & 0x3f) | 0x80;
1731 *p++ = ((c >> 6) & 0x3f) | 0x80;
1732 *p++ = (c & 0x3f) | 0x80;
1734 else if ( c <= 0x3ffffff )
1736 *p++ = (c >> 24) | 0xf8;
1737 *p++ = ((c >> 18) & 0x3f) | 0x80;
1738 *p++ = ((c >> 12) & 0x3f) | 0x80;
1739 *p++ = ((c >> 6) & 0x3f) | 0x80;
1740 *p++ = (c & 0x3f) | 0x80;
1744 *p++ = (c >> 30) | 0xfc;
1745 *p++ = ((c >> 24) & 0x3f) | 0x80;
1746 *p++ = ((c >> 18) & 0x3f) | 0x80;
1747 *p++ = ((c >> 12) & 0x3f) | 0x80;
1748 *p++ = ((c >> 6) & 0x3f) | 0x80;
1749 *p++ = (c & 0x3f) | 0x80;
1752 BREAKUP_CHAR (c, charset, c1, c2);
1753 lb = CHAR_LEADING_BYTE (c);
1754 if (LEADING_BYTE_PRIVATE_P (lb))
1755 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1757 if (EQ (charset, Vcharset_control_1))
1766 /* Return the first character from a Mule-encoded string in STR,
1767 assuming it's non-ASCII. Do not call this directly.
1768 Use the macro charptr_emchar() instead. */
1771 non_ascii_charptr_emchar (const Bufbyte *str)
1784 else if ( b >= 0xf8 )
1789 else if ( b >= 0xf0 )
1794 else if ( b >= 0xe0 )
1799 else if ( b >= 0xc0 )
1809 for( ; len > 0; len-- )
1812 ch = ( ch << 6 ) | ( b & 0x3f );
1816 Bufbyte i0 = *str, i1, i2 = 0;
1817 Lisp_Object charset;
1819 if (i0 == LEADING_BYTE_CONTROL_1)
1820 return (Emchar) (*++str - 0x20);
1822 if (LEADING_BYTE_PREFIX_P (i0))
1827 charset = CHARSET_BY_LEADING_BYTE (i0);
1828 if (XCHARSET_DIMENSION (charset) == 2)
1831 return MAKE_CHAR (charset, i1, i2);
1835 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1836 Do not call this directly. Use the macro valid_char_p() instead. */
1840 non_ascii_valid_char_p (Emchar ch)
1844 /* Must have only lowest 19 bits set */
1848 f1 = CHAR_FIELD1 (ch);
1849 f2 = CHAR_FIELD2 (ch);
1850 f3 = CHAR_FIELD3 (ch);
1854 Lisp_Object charset;
1856 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1857 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1858 f2 > MAX_CHAR_FIELD2_PRIVATE)
1863 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1864 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1868 NOTE: This takes advantage of the fact that
1869 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1870 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1872 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1873 if (EQ (charset, Qnil))
1875 return (XCHARSET_CHARS (charset) == 96);
1879 Lisp_Object charset;
1881 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1882 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1883 f1 > MAX_CHAR_FIELD1_PRIVATE)
1885 if (f2 < 0x20 || f3 < 0x20)
1888 #ifdef ENABLE_COMPOSITE_CHARS
1889 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1891 if (UNBOUNDP (Fgethash (make_int (ch),
1892 Vcomposite_char_char2string_hash_table,
1897 #endif /* ENABLE_COMPOSITE_CHARS */
1899 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1900 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1903 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1905 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1908 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1910 if (EQ (charset, Qnil))
1912 return (XCHARSET_CHARS (charset) == 96);
1918 /************************************************************************/
1919 /* Basic string functions */
1920 /************************************************************************/
1922 /* Copy the character pointed to by SRC into DST. Do not call this
1923 directly. Use the macro charptr_copy_char() instead.
1924 Return the number of bytes copied. */
1927 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
1929 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
1931 for (i = bytes; i; i--, dst++, src++)
1937 /************************************************************************/
1938 /* streams of Emchars */
1939 /************************************************************************/
1941 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1942 The functions below are not meant to be called directly; use
1943 the macros in insdel.h. */
1946 Lstream_get_emchar_1 (Lstream *stream, int ch)
1948 Bufbyte str[MAX_EMCHAR_LEN];
1949 Bufbyte *strptr = str;
1952 str[0] = (Bufbyte) ch;
1954 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
1956 int c = Lstream_getc (stream);
1957 bufpos_checking_assert (c >= 0);
1958 *++strptr = (Bufbyte) c;
1960 return charptr_emchar (str);
1964 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1966 Bufbyte str[MAX_EMCHAR_LEN];
1967 Bytecount len = set_charptr_emchar (str, ch);
1968 return Lstream_write (stream, str, len);
1972 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1974 Bufbyte str[MAX_EMCHAR_LEN];
1975 Bytecount len = set_charptr_emchar (str, ch);
1976 Lstream_unread (stream, str, len);
1980 /************************************************************************/
1981 /* charset object */
1982 /************************************************************************/
1985 mark_charset (Lisp_Object obj)
1987 Lisp_Charset *cs = XCHARSET (obj);
1989 mark_object (cs->short_name);
1990 mark_object (cs->long_name);
1991 mark_object (cs->doc_string);
1992 mark_object (cs->registry);
1993 mark_object (cs->ccl_program);
1995 mark_object (cs->encoding_table);
1996 /* mark_object (cs->decoding_table); */
2002 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
2004 Lisp_Charset *cs = XCHARSET (obj);
2008 error ("printing unreadable object #<charset %s 0x%x>",
2009 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
2012 write_c_string ("#<charset ", printcharfun);
2013 print_internal (CHARSET_NAME (cs), printcharfun, 0);
2014 write_c_string (" ", printcharfun);
2015 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
2016 write_c_string (" ", printcharfun);
2017 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
2018 write_c_string (" ", printcharfun);
2019 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
2020 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
2022 CHARSET_DIMENSION (cs),
2023 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
2024 CHARSET_COLUMNS (cs),
2025 CHARSET_GRAPHIC (cs),
2026 CHARSET_FINAL (cs));
2027 write_c_string (buf, printcharfun);
2028 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
2029 sprintf (buf, " 0x%x>", cs->header.uid);
2030 write_c_string (buf, printcharfun);
2033 static const struct lrecord_description charset_description[] = {
2034 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
2035 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
2036 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
2037 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
2038 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
2039 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
2040 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
2042 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
2043 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
2048 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
2049 mark_charset, print_charset, 0, 0, 0,
2050 charset_description,
2053 /* Make a new charset. */
2054 /* #### SJT Should generic properties be allowed? */
2056 make_charset (Charset_ID id, Lisp_Object name,
2057 unsigned short chars, unsigned char dimension,
2058 unsigned char columns, unsigned char graphic,
2059 Bufbyte final, unsigned char direction, Lisp_Object short_name,
2060 Lisp_Object long_name, Lisp_Object doc,
2062 Lisp_Object decoding_table,
2063 Emchar ucs_min, Emchar ucs_max,
2064 Emchar code_offset, unsigned char byte_offset)
2067 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
2071 XSETCHARSET (obj, cs);
2073 CHARSET_ID (cs) = id;
2074 CHARSET_NAME (cs) = name;
2075 CHARSET_SHORT_NAME (cs) = short_name;
2076 CHARSET_LONG_NAME (cs) = long_name;
2077 CHARSET_CHARS (cs) = chars;
2078 CHARSET_DIMENSION (cs) = dimension;
2079 CHARSET_DIRECTION (cs) = direction;
2080 CHARSET_COLUMNS (cs) = columns;
2081 CHARSET_GRAPHIC (cs) = graphic;
2082 CHARSET_FINAL (cs) = final;
2083 CHARSET_DOC_STRING (cs) = doc;
2084 CHARSET_REGISTRY (cs) = reg;
2085 CHARSET_CCL_PROGRAM (cs) = Qnil;
2086 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
2088 CHARSET_DECODING_TABLE(cs) = Qnil;
2089 CHARSET_ENCODING_TABLE(cs) = Qnil;
2090 CHARSET_UCS_MIN(cs) = ucs_min;
2091 CHARSET_UCS_MAX(cs) = ucs_max;
2092 CHARSET_CODE_OFFSET(cs) = code_offset;
2093 CHARSET_BYTE_OFFSET(cs) = byte_offset;
2097 if (id == LEADING_BYTE_ASCII)
2098 CHARSET_REP_BYTES (cs) = 1;
2100 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
2102 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
2107 /* some charsets do not have final characters. This includes
2108 ASCII, Control-1, Composite, and the two faux private
2110 unsigned char iso2022_type
2111 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
2113 if (code_offset == 0)
2115 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
2116 chlook->charset_by_attributes[iso2022_type][final] = obj;
2120 (chlook->charset_by_attributes[iso2022_type][final][direction]));
2121 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
2125 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
2126 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
2128 /* Some charsets are "faux" and don't have names or really exist at
2129 all except in the leading-byte table. */
2131 Fputhash (name, obj, Vcharset_hash_table);
2136 get_unallocated_leading_byte (int dimension)
2141 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
2144 lb = chlook->next_allocated_leading_byte++;
2148 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
2151 lb = chlook->next_allocated_1_byte_leading_byte++;
2155 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
2158 lb = chlook->next_allocated_2_byte_leading_byte++;
2164 ("No more character sets free for this dimension",
2165 make_int (dimension));
2171 /* Number of Big5 characters which have the same code in 1st byte. */
2173 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2176 decode_builtin_char (Lisp_Object charset, int code_point)
2180 if (EQ (charset, Vcharset_chinese_big5))
2182 int c1 = code_point >> 8;
2183 int c2 = code_point & 0xFF;
2186 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
2188 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
2189 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
2191 I = (c1 - 0xA1) * BIG5_SAME_ROW
2192 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
2196 charset = Vcharset_chinese_big5_1;
2200 charset = Vcharset_chinese_big5_2;
2201 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
2203 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
2206 if ((final = XCHARSET_FINAL (charset)) >= '0')
2208 if (XCHARSET_DIMENSION (charset) == 1)
2210 switch (XCHARSET_CHARS (charset))
2214 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
2217 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
2225 switch (XCHARSET_CHARS (charset))
2228 return MIN_CHAR_94x94
2229 + (final - '0') * 94 * 94
2230 + (((code_point >> 8) & 0x7F) - 33) * 94
2231 + ((code_point & 0x7F) - 33);
2233 return MIN_CHAR_96x96
2234 + (final - '0') * 96 * 96
2235 + (((code_point >> 8) & 0x7F) - 32) * 96
2236 + ((code_point & 0x7F) - 32);
2243 else if (XCHARSET_UCS_MAX (charset))
2246 = (XCHARSET_DIMENSION (charset) == 1
2248 code_point - XCHARSET_BYTE_OFFSET (charset)
2250 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
2251 * XCHARSET_CHARS (charset)
2252 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
2253 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
2254 if ((cid < XCHARSET_UCS_MIN (charset))
2255 || (XCHARSET_UCS_MAX (charset) < cid))
2264 range_charset_code_point (Lisp_Object charset, Emchar ch)
2268 if ((XCHARSET_UCS_MIN (charset) <= ch)
2269 && (ch <= XCHARSET_UCS_MAX (charset)))
2271 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
2273 if (XCHARSET_CHARS (charset) == 256)
2275 else if (XCHARSET_DIMENSION (charset) == 1)
2276 return d + XCHARSET_BYTE_OFFSET (charset);
2277 else if (XCHARSET_DIMENSION (charset) == 2)
2279 ((d / XCHARSET_CHARS (charset)
2280 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2281 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2282 else if (XCHARSET_DIMENSION (charset) == 3)
2284 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2285 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2286 | ((d / XCHARSET_CHARS (charset)
2287 % XCHARSET_CHARS (charset)
2288 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2289 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2290 else /* if (XCHARSET_DIMENSION (charset) == 4) */
2292 ((d / (XCHARSET_CHARS (charset)
2293 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2294 + XCHARSET_BYTE_OFFSET (charset)) << 24)
2295 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2296 % XCHARSET_CHARS (charset)
2297 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2298 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
2299 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2300 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2302 else if (XCHARSET_CODE_OFFSET (charset) == 0)
2304 if (XCHARSET_DIMENSION (charset) == 1)
2306 if (XCHARSET_CHARS (charset) == 94)
2308 if (((d = ch - (MIN_CHAR_94
2309 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
2313 else if (XCHARSET_CHARS (charset) == 96)
2315 if (((d = ch - (MIN_CHAR_96
2316 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
2323 else if (XCHARSET_DIMENSION (charset) == 2)
2325 if (XCHARSET_CHARS (charset) == 94)
2327 if (((d = ch - (MIN_CHAR_94x94
2328 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
2331 return (((d / 94) + 33) << 8) | (d % 94 + 33);
2333 else if (XCHARSET_CHARS (charset) == 96)
2335 if (((d = ch - (MIN_CHAR_96x96
2336 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
2339 return (((d / 96) + 32) << 8) | (d % 96 + 32);
2345 if (EQ (charset, Vcharset_mojikyo_2022_1)
2346 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
2348 int m = ch - MIN_CHAR_MOJIKYO - 1;
2349 int byte1 = m / (94 * 60) + 33;
2350 int byte2 = (m % (94 * 60)) / 94;
2351 int byte3 = m % 94 + 33;
2357 return (byte1 << 16) | (byte2 << 8) | byte3;
2363 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
2365 if (c <= MAX_CHAR_BASIC_LATIN)
2367 *charset = Vcharset_ascii;
2372 *charset = Vcharset_control_1;
2377 *charset = Vcharset_latin_iso8859_1;
2381 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
2383 *charset = Vcharset_hebrew_iso8859_8;
2384 return c - MIN_CHAR_HEBREW + 0x20;
2387 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
2389 *charset = Vcharset_thai_tis620;
2390 return c - MIN_CHAR_THAI + 0x20;
2393 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
2394 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
2396 return list2 (Vcharset_katakana_jisx0201,
2397 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
2400 else if (c <= MAX_CHAR_BMP)
2402 *charset = Vcharset_ucs_bmp;
2405 else if (c < MIN_CHAR_DAIKANWA)
2407 *charset = Vcharset_ucs;
2410 else if (c <= MAX_CHAR_DAIKANWA)
2412 *charset = Vcharset_ideograph_daikanwa;
2413 return c - MIN_CHAR_DAIKANWA;
2415 else if (c <= MAX_CHAR_MOJIKYO_0)
2417 *charset = Vcharset_mojikyo;
2418 return c - MIN_CHAR_MOJIKYO_0;
2420 else if (c < MIN_CHAR_94)
2422 *charset = Vcharset_ucs;
2425 else if (c <= MAX_CHAR_94)
2427 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
2428 ((c - MIN_CHAR_94) / 94) + '0',
2429 CHARSET_LEFT_TO_RIGHT);
2430 if (!NILP (*charset))
2431 return ((c - MIN_CHAR_94) % 94) + 33;
2434 *charset = Vcharset_ucs;
2438 else if (c <= MAX_CHAR_96)
2440 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
2441 ((c - MIN_CHAR_96) / 96) + '0',
2442 CHARSET_LEFT_TO_RIGHT);
2443 if (!NILP (*charset))
2444 return ((c - MIN_CHAR_96) % 96) + 32;
2447 *charset = Vcharset_ucs;
2451 else if (c <= MAX_CHAR_94x94)
2454 = CHARSET_BY_ATTRIBUTES (94, 2,
2455 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
2456 CHARSET_LEFT_TO_RIGHT);
2457 if (!NILP (*charset))
2458 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
2459 | (((c - MIN_CHAR_94x94) % 94) + 33);
2462 *charset = Vcharset_ucs;
2466 else if (c <= MAX_CHAR_96x96)
2469 = CHARSET_BY_ATTRIBUTES (96, 2,
2470 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
2471 CHARSET_LEFT_TO_RIGHT);
2472 if (!NILP (*charset))
2473 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
2474 | (((c - MIN_CHAR_96x96) % 96) + 32);
2477 *charset = Vcharset_ucs;
2481 else if (c < MIN_CHAR_MOJIKYO)
2483 *charset = Vcharset_ucs;
2486 else if (c <= MAX_CHAR_MOJIKYO)
2488 *charset = Vcharset_mojikyo;
2489 return c - MIN_CHAR_MOJIKYO;
2493 *charset = Vcharset_ucs;
2498 Lisp_Object Vdefault_coded_charset_priority_list;
2502 /************************************************************************/
2503 /* Basic charset Lisp functions */
2504 /************************************************************************/
2506 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
2507 Return non-nil if OBJECT is a charset.
2511 return CHARSETP (object) ? Qt : Qnil;
2514 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
2515 Retrieve the charset of the given name.
2516 If CHARSET-OR-NAME is a charset object, it is simply returned.
2517 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
2518 nil is returned. Otherwise the associated charset object is returned.
2522 if (CHARSETP (charset_or_name))
2523 return charset_or_name;
2525 CHECK_SYMBOL (charset_or_name);
2526 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
2529 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
2530 Retrieve the charset of the given name.
2531 Same as `find-charset' except an error is signalled if there is no such
2532 charset instead of returning nil.
2536 Lisp_Object charset = Ffind_charset (name);
2539 signal_simple_error ("No such charset", name);
2543 /* We store the charsets in hash tables with the names as the key and the
2544 actual charset object as the value. Occasionally we need to use them
2545 in a list format. These routines provide us with that. */
2546 struct charset_list_closure
2548 Lisp_Object *charset_list;
2552 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
2553 void *charset_list_closure)
2555 /* This function can GC */
2556 struct charset_list_closure *chcl =
2557 (struct charset_list_closure*) charset_list_closure;
2558 Lisp_Object *charset_list = chcl->charset_list;
2560 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
2564 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
2565 Return a list of the names of all defined charsets.
2569 Lisp_Object charset_list = Qnil;
2570 struct gcpro gcpro1;
2571 struct charset_list_closure charset_list_closure;
2573 GCPRO1 (charset_list);
2574 charset_list_closure.charset_list = &charset_list;
2575 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
2576 &charset_list_closure);
2579 return charset_list;
2582 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
2583 Return the name of charset CHARSET.
2587 return XCHARSET_NAME (Fget_charset (charset));
2590 /* #### SJT Should generic properties be allowed? */
2591 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
2592 Define a new character set.
2593 This function is for use with Mule support.
2594 NAME is a symbol, the name by which the character set is normally referred.
2595 DOC-STRING is a string describing the character set.
2596 PROPS is a property list, describing the specific nature of the
2597 character set. Recognized properties are:
2599 'short-name Short version of the charset name (ex: Latin-1)
2600 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
2601 'registry A regular expression matching the font registry field for
2603 'dimension Number of octets used to index a character in this charset.
2604 Either 1 or 2. Defaults to 1.
2605 'columns Number of columns used to display a character in this charset.
2606 Only used in TTY mode. (Under X, the actual width of a
2607 character can be derived from the font used to display the
2608 characters.) If unspecified, defaults to the dimension
2609 (this is almost always the correct value).
2610 'chars Number of characters in each dimension (94 or 96).
2611 Defaults to 94. Note that if the dimension is 2, the
2612 character set thus described is 94x94 or 96x96.
2613 'final Final byte of ISO 2022 escape sequence. Must be
2614 supplied. Each combination of (DIMENSION, CHARS) defines a
2615 separate namespace for final bytes. Note that ISO
2616 2022 restricts the final byte to the range
2617 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2618 dimension == 2. Note also that final bytes in the range
2619 0x30 - 0x3F are reserved for user-defined (not official)
2621 'graphic 0 (use left half of font on output) or 1 (use right half
2622 of font on output). Defaults to 0. For example, for
2623 a font whose registry is ISO8859-1, the left half
2624 (octets 0x20 - 0x7F) is the `ascii' character set, while
2625 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2626 character set. With 'graphic set to 0, the octets
2627 will have their high bit cleared; with it set to 1,
2628 the octets will have their high bit set.
2629 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2631 'ccl-program A compiled CCL program used to convert a character in
2632 this charset into an index into the font. This is in
2633 addition to the 'graphic property. The CCL program
2634 is passed the octets of the character, with the high
2635 bit cleared and set depending upon whether the value
2636 of the 'graphic property is 0 or 1.
2638 (name, doc_string, props))
2640 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2641 int direction = CHARSET_LEFT_TO_RIGHT;
2642 Lisp_Object registry = Qnil;
2643 Lisp_Object charset;
2644 Lisp_Object ccl_program = Qnil;
2645 Lisp_Object short_name = Qnil, long_name = Qnil;
2646 int byte_offset = -1;
2648 CHECK_SYMBOL (name);
2649 if (!NILP (doc_string))
2650 CHECK_STRING (doc_string);
2652 charset = Ffind_charset (name);
2653 if (!NILP (charset))
2654 signal_simple_error ("Cannot redefine existing charset", name);
2657 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
2659 if (EQ (keyword, Qshort_name))
2661 CHECK_STRING (value);
2665 if (EQ (keyword, Qlong_name))
2667 CHECK_STRING (value);
2671 else if (EQ (keyword, Qdimension))
2674 dimension = XINT (value);
2675 if (dimension < 1 || dimension > 2)
2676 signal_simple_error ("Invalid value for 'dimension", value);
2679 else if (EQ (keyword, Qchars))
2682 chars = XINT (value);
2683 if (chars != 94 && chars != 96)
2684 signal_simple_error ("Invalid value for 'chars", value);
2687 else if (EQ (keyword, Qcolumns))
2690 columns = XINT (value);
2691 if (columns != 1 && columns != 2)
2692 signal_simple_error ("Invalid value for 'columns", value);
2695 else if (EQ (keyword, Qgraphic))
2698 graphic = XINT (value);
2700 if (graphic < 0 || graphic > 2)
2702 if (graphic < 0 || graphic > 1)
2704 signal_simple_error ("Invalid value for 'graphic", value);
2707 else if (EQ (keyword, Qregistry))
2709 CHECK_STRING (value);
2713 else if (EQ (keyword, Qdirection))
2715 if (EQ (value, Ql2r))
2716 direction = CHARSET_LEFT_TO_RIGHT;
2717 else if (EQ (value, Qr2l))
2718 direction = CHARSET_RIGHT_TO_LEFT;
2720 signal_simple_error ("Invalid value for 'direction", value);
2723 else if (EQ (keyword, Qfinal))
2725 CHECK_CHAR_COERCE_INT (value);
2726 final = XCHAR (value);
2727 if (final < '0' || final > '~')
2728 signal_simple_error ("Invalid value for 'final", value);
2731 else if (EQ (keyword, Qccl_program))
2733 struct ccl_program test_ccl;
2735 if (setup_ccl_program (&test_ccl, value) < 0)
2736 signal_simple_error ("Invalid value for 'ccl-program", value);
2737 ccl_program = value;
2741 signal_simple_error ("Unrecognized property", keyword);
2746 error ("'final must be specified");
2747 if (dimension == 2 && final > 0x5F)
2749 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2752 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2753 CHARSET_LEFT_TO_RIGHT)) ||
2754 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2755 CHARSET_RIGHT_TO_LEFT)))
2757 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2759 id = get_unallocated_leading_byte (dimension);
2761 if (NILP (doc_string))
2762 doc_string = build_string ("");
2764 if (NILP (registry))
2765 registry = build_string ("");
2767 if (NILP (short_name))
2768 XSETSTRING (short_name, XSYMBOL (name)->name);
2770 if (NILP (long_name))
2771 long_name = doc_string;
2774 columns = dimension;
2776 if (byte_offset < 0)
2780 else if (chars == 96)
2786 charset = make_charset (id, name, chars, dimension, columns, graphic,
2787 final, direction, short_name, long_name,
2788 doc_string, registry,
2789 Qnil, 0, 0, 0, byte_offset);
2790 if (!NILP (ccl_program))
2791 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2795 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2797 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2798 NEW-NAME is the name of the new charset. Return the new charset.
2800 (charset, new_name))
2802 Lisp_Object new_charset = Qnil;
2803 int id, chars, dimension, columns, graphic, final;
2805 Lisp_Object registry, doc_string, short_name, long_name;
2808 charset = Fget_charset (charset);
2809 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2810 signal_simple_error ("Charset already has reverse-direction charset",
2813 CHECK_SYMBOL (new_name);
2814 if (!NILP (Ffind_charset (new_name)))
2815 signal_simple_error ("Cannot redefine existing charset", new_name);
2817 cs = XCHARSET (charset);
2819 chars = CHARSET_CHARS (cs);
2820 dimension = CHARSET_DIMENSION (cs);
2821 columns = CHARSET_COLUMNS (cs);
2822 id = get_unallocated_leading_byte (dimension);
2824 graphic = CHARSET_GRAPHIC (cs);
2825 final = CHARSET_FINAL (cs);
2826 direction = CHARSET_RIGHT_TO_LEFT;
2827 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2828 direction = CHARSET_LEFT_TO_RIGHT;
2829 doc_string = CHARSET_DOC_STRING (cs);
2830 short_name = CHARSET_SHORT_NAME (cs);
2831 long_name = CHARSET_LONG_NAME (cs);
2832 registry = CHARSET_REGISTRY (cs);
2834 new_charset = make_charset (id, new_name, chars, dimension, columns,
2835 graphic, final, direction, short_name, long_name,
2836 doc_string, registry,
2838 CHARSET_DECODING_TABLE(cs),
2839 CHARSET_UCS_MIN(cs),
2840 CHARSET_UCS_MAX(cs),
2841 CHARSET_CODE_OFFSET(cs),
2842 CHARSET_BYTE_OFFSET(cs)
2848 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2849 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2854 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2855 Define symbol ALIAS as an alias for CHARSET.
2859 CHECK_SYMBOL (alias);
2860 charset = Fget_charset (charset);
2861 return Fputhash (alias, charset, Vcharset_hash_table);
2864 /* #### Reverse direction charsets not yet implemented. */
2866 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2868 Return the reverse-direction charset parallel to CHARSET, if any.
2869 This is the charset with the same properties (in particular, the same
2870 dimension, number of characters per dimension, and final byte) as
2871 CHARSET but whose characters are displayed in the opposite direction.
2875 charset = Fget_charset (charset);
2876 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2880 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2881 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2882 If DIRECTION is omitted, both directions will be checked (left-to-right
2883 will be returned if character sets exist for both directions).
2885 (dimension, chars, final, direction))
2887 int dm, ch, fi, di = -1;
2888 Lisp_Object obj = Qnil;
2890 CHECK_INT (dimension);
2891 dm = XINT (dimension);
2892 if (dm < 1 || dm > 2)
2893 signal_simple_error ("Invalid value for DIMENSION", dimension);
2897 if (ch != 94 && ch != 96)
2898 signal_simple_error ("Invalid value for CHARS", chars);
2900 CHECK_CHAR_COERCE_INT (final);
2902 if (fi < '0' || fi > '~')
2903 signal_simple_error ("Invalid value for FINAL", final);
2905 if (EQ (direction, Ql2r))
2906 di = CHARSET_LEFT_TO_RIGHT;
2907 else if (EQ (direction, Qr2l))
2908 di = CHARSET_RIGHT_TO_LEFT;
2909 else if (!NILP (direction))
2910 signal_simple_error ("Invalid value for DIRECTION", direction);
2912 if (dm == 2 && fi > 0x5F)
2914 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2918 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
2920 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
2923 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
2926 return XCHARSET_NAME (obj);
2930 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2931 Return short name of CHARSET.
2935 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2938 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2939 Return long name of CHARSET.
2943 return XCHARSET_LONG_NAME (Fget_charset (charset));
2946 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2947 Return description of CHARSET.
2951 return XCHARSET_DOC_STRING (Fget_charset (charset));
2954 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2955 Return dimension of CHARSET.
2959 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2962 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2963 Return property PROP of CHARSET, a charset object or symbol naming a charset.
2964 Recognized properties are those listed in `make-charset', as well as
2965 'name and 'doc-string.
2971 charset = Fget_charset (charset);
2972 cs = XCHARSET (charset);
2974 CHECK_SYMBOL (prop);
2975 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2976 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2977 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2978 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2979 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2980 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2981 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2982 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2983 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2984 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2985 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2986 if (EQ (prop, Qdirection))
2987 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2988 if (EQ (prop, Qreverse_direction_charset))
2990 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2991 /* #### Is this translation OK? If so, error checking sufficient? */
2992 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
2994 signal_simple_error ("Unrecognized charset property name", prop);
2995 return Qnil; /* not reached */
2998 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2999 Return charset identification number of CHARSET.
3003 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
3006 /* #### We need to figure out which properties we really want to
3009 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
3010 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
3012 (charset, ccl_program))
3014 struct ccl_program test_ccl;
3016 charset = Fget_charset (charset);
3017 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
3018 signal_simple_error ("Invalid ccl-program", ccl_program);
3019 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
3024 invalidate_charset_font_caches (Lisp_Object charset)
3026 /* Invalidate font cache entries for charset on all devices. */
3027 Lisp_Object devcons, concons, hash_table;
3028 DEVICE_LOOP_NO_BREAK (devcons, concons)
3030 struct device *d = XDEVICE (XCAR (devcons));
3031 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
3032 if (!UNBOUNDP (hash_table))
3033 Fclrhash (hash_table);
3037 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
3038 Set the 'registry property of CHARSET to REGISTRY.
3040 (charset, registry))
3042 charset = Fget_charset (charset);
3043 CHECK_STRING (registry);
3044 XCHARSET_REGISTRY (charset) = registry;
3045 invalidate_charset_font_caches (charset);
3046 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
3051 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
3052 Return mapping-table of CHARSET.
3056 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
3059 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
3060 Set mapping-table of CHARSET to TABLE.
3064 struct Lisp_Charset *cs;
3068 charset = Fget_charset (charset);
3069 cs = XCHARSET (charset);
3073 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
3074 make_vector_newer (CHARSET_DECODING_TABLE(cs));
3075 CHARSET_DECODING_TABLE(cs) = Qnil;
3078 else if (VECTORP (table))
3080 int ccs_len = CHARSET_BYTE_SIZE (cs);
3081 int ret = decoding_table_check_elements (table,
3082 CHARSET_DIMENSION (cs),
3087 signal_simple_error ("Too big table", table);
3089 signal_simple_error ("Invalid element is found", table);
3091 signal_simple_error ("Something wrong", table);
3093 CHARSET_DECODING_TABLE(cs) = Qnil;
3096 signal_error (Qwrong_type_argument,
3097 list2 (build_translated_string ("vector-or-nil-p"),
3100 byte_offset = CHARSET_BYTE_OFFSET (cs);
3101 switch (CHARSET_DIMENSION (cs))
3104 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3106 Lisp_Object c = XVECTOR_DATA(table)[i];
3109 put_char_ccs_code_point (c, charset,
3110 make_int (i + byte_offset));
3114 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3116 Lisp_Object v = XVECTOR_DATA(table)[i];
3122 for (j = 0; j < XVECTOR_LENGTH (v); j++)
3124 Lisp_Object c = XVECTOR_DATA(v)[j];
3127 put_char_ccs_code_point
3129 make_int ( ( (i + byte_offset) << 8 )
3135 put_char_ccs_code_point (v, charset,
3136 make_int (i + byte_offset));
3145 /************************************************************************/
3146 /* Lisp primitives for working with characters */
3147 /************************************************************************/
3150 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
3151 Make a character from CHARSET and code-point CODE.
3157 charset = Fget_charset (charset);
3160 if (XCHARSET_GRAPHIC (charset) == 1)
3162 c = DECODE_CHAR (charset, c);
3163 return c >= 0 ? make_char (c) : Qnil;
3166 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
3167 Make a builtin character from CHARSET and code-point CODE.
3173 charset = Fget_charset (charset);
3175 if (EQ (charset, Vcharset_latin_viscii))
3177 Lisp_Object chr = Fdecode_char (charset, code);
3183 (ret = Fget_char_attribute (chr,
3184 Vcharset_latin_viscii_lower,
3187 charset = Vcharset_latin_viscii_lower;
3191 (ret = Fget_char_attribute (chr,
3192 Vcharset_latin_viscii_upper,
3195 charset = Vcharset_latin_viscii_upper;
3202 if (XCHARSET_GRAPHIC (charset) == 1)
3205 c = decode_builtin_char (charset, c);
3206 return c >= 0 ? make_char (c) : Fdecode_char (charset, code);
3210 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
3211 Make a character from CHARSET and octets ARG1 and ARG2.
3212 ARG2 is required only for characters from two-dimensional charsets.
3213 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
3214 character s with caron.
3216 (charset, arg1, arg2))
3220 int lowlim, highlim;
3222 charset = Fget_charset (charset);
3223 cs = XCHARSET (charset);
3225 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
3226 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
3228 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
3230 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
3231 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
3234 /* It is useful (and safe, according to Olivier Galibert) to strip
3235 the 8th bit off ARG1 and ARG2 because it allows programmers to
3236 write (make-char 'latin-iso8859-2 CODE) where code is the actual
3237 Latin 2 code of the character. */
3245 if (a1 < lowlim || a1 > highlim)
3246 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
3248 if (CHARSET_DIMENSION (cs) == 1)
3252 ("Charset is of dimension one; second octet must be nil", arg2);
3253 return make_char (MAKE_CHAR (charset, a1, 0));
3262 a2 = XINT (arg2) & 0x7f;
3264 if (a2 < lowlim || a2 > highlim)
3265 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
3267 return make_char (MAKE_CHAR (charset, a1, a2));
3270 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
3271 Return the character set of CHARACTER.
3275 CHECK_CHAR_COERCE_INT (character);
3277 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
3280 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
3281 Return the octet numbered N (should be 0 or 1) of CHARACTER.
3282 N defaults to 0 if omitted.
3286 Lisp_Object charset;
3289 CHECK_CHAR_COERCE_INT (character);
3291 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
3293 if (NILP (n) || EQ (n, Qzero))
3294 return make_int (octet0);
3295 else if (EQ (n, make_int (1)))
3296 return make_int (octet1);
3298 signal_simple_error ("Octet number must be 0 or 1", n);
3301 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
3302 Return list of charset and one or two position-codes of CHARACTER.
3306 /* This function can GC */
3307 struct gcpro gcpro1, gcpro2;
3308 Lisp_Object charset = Qnil;
3309 Lisp_Object rc = Qnil;
3317 GCPRO2 (charset, rc);
3318 CHECK_CHAR_COERCE_INT (character);
3321 code_point = ENCODE_CHAR (XCHAR (character), charset);
3322 dimension = XCHARSET_DIMENSION (charset);
3323 while (dimension > 0)
3325 rc = Fcons (make_int (code_point & 255), rc);
3329 rc = Fcons (XCHARSET_NAME (charset), rc);
3331 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
3333 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
3335 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
3339 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
3348 #ifdef ENABLE_COMPOSITE_CHARS
3349 /************************************************************************/
3350 /* composite character functions */
3351 /************************************************************************/
3354 lookup_composite_char (Bufbyte *str, int len)
3356 Lisp_Object lispstr = make_string (str, len);
3357 Lisp_Object ch = Fgethash (lispstr,
3358 Vcomposite_char_string2char_hash_table,
3364 if (composite_char_row_next >= 128)
3365 signal_simple_error ("No more composite chars available", lispstr);
3366 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
3367 composite_char_col_next);
3368 Fputhash (make_char (emch), lispstr,
3369 Vcomposite_char_char2string_hash_table);
3370 Fputhash (lispstr, make_char (emch),
3371 Vcomposite_char_string2char_hash_table);
3372 composite_char_col_next++;
3373 if (composite_char_col_next >= 128)
3375 composite_char_col_next = 32;
3376 composite_char_row_next++;
3385 composite_char_string (Emchar ch)
3387 Lisp_Object str = Fgethash (make_char (ch),
3388 Vcomposite_char_char2string_hash_table,
3390 assert (!UNBOUNDP (str));
3394 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
3395 Convert a string into a single composite character.
3396 The character is the result of overstriking all the characters in
3401 CHECK_STRING (string);
3402 return make_char (lookup_composite_char (XSTRING_DATA (string),
3403 XSTRING_LENGTH (string)));
3406 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3407 Return a string of the characters comprising a composite character.
3415 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
3416 signal_simple_error ("Must be composite char", ch);
3417 return composite_char_string (emch);
3419 #endif /* ENABLE_COMPOSITE_CHARS */
3422 /************************************************************************/
3423 /* initialization */
3424 /************************************************************************/
3427 syms_of_mule_charset (void)
3430 INIT_LRECORD_IMPLEMENTATION (uint8_byte_table);
3431 INIT_LRECORD_IMPLEMENTATION (uint16_byte_table);
3432 INIT_LRECORD_IMPLEMENTATION (byte_table);
3433 INIT_LRECORD_IMPLEMENTATION (char_id_table);
3435 INIT_LRECORD_IMPLEMENTATION (charset);
3437 DEFSUBR (Fcharsetp);
3438 DEFSUBR (Ffind_charset);
3439 DEFSUBR (Fget_charset);
3440 DEFSUBR (Fcharset_list);
3441 DEFSUBR (Fcharset_name);
3442 DEFSUBR (Fmake_charset);
3443 DEFSUBR (Fmake_reverse_direction_charset);
3444 /* DEFSUBR (Freverse_direction_charset); */
3445 DEFSUBR (Fdefine_charset_alias);
3446 DEFSUBR (Fcharset_from_attributes);
3447 DEFSUBR (Fcharset_short_name);
3448 DEFSUBR (Fcharset_long_name);
3449 DEFSUBR (Fcharset_description);
3450 DEFSUBR (Fcharset_dimension);
3451 DEFSUBR (Fcharset_property);
3452 DEFSUBR (Fcharset_id);
3453 DEFSUBR (Fset_charset_ccl_program);
3454 DEFSUBR (Fset_charset_registry);
3456 DEFSUBR (Fchar_attribute_list);
3457 DEFSUBR (Ffind_char_attribute_table);
3458 DEFSUBR (Fchar_attribute_alist);
3459 DEFSUBR (Fget_char_attribute);
3460 DEFSUBR (Fput_char_attribute);
3461 DEFSUBR (Fremove_char_attribute);
3462 DEFSUBR (Fdefine_char);
3463 DEFSUBR (Fchar_variants);
3464 DEFSUBR (Fget_composite_char);
3465 DEFSUBR (Fcharset_mapping_table);
3466 DEFSUBR (Fset_charset_mapping_table);
3470 DEFSUBR (Fdecode_char);
3471 DEFSUBR (Fdecode_builtin_char);
3473 DEFSUBR (Fmake_char);
3474 DEFSUBR (Fchar_charset);
3475 DEFSUBR (Fchar_octet);
3476 DEFSUBR (Fsplit_char);
3478 #ifdef ENABLE_COMPOSITE_CHARS
3479 DEFSUBR (Fmake_composite_char);
3480 DEFSUBR (Fcomposite_char_string);
3483 defsymbol (&Qcharsetp, "charsetp");
3484 defsymbol (&Qregistry, "registry");
3485 defsymbol (&Qfinal, "final");
3486 defsymbol (&Qgraphic, "graphic");
3487 defsymbol (&Qdirection, "direction");
3488 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3489 defsymbol (&Qshort_name, "short-name");
3490 defsymbol (&Qlong_name, "long-name");
3492 defsymbol (&Ql2r, "l2r");
3493 defsymbol (&Qr2l, "r2l");
3495 /* Charsets, compatible with FSF 20.3
3496 Naming convention is Script-Charset[-Edition] */
3497 defsymbol (&Qascii, "ascii");
3498 defsymbol (&Qcontrol_1, "control-1");
3499 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3500 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3501 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3502 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3503 defsymbol (&Qthai_tis620, "thai-tis620");
3504 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3505 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3506 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3507 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3508 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3509 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3510 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3511 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
3512 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
3513 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
3514 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
3515 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
3516 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
3517 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
3518 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
3519 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
3521 defsymbol (&Qto_ucs, "=>ucs");
3522 defsymbol (&Q_ucs, "->ucs");
3523 defsymbol (&Q_decomposition, "->decomposition");
3524 defsymbol (&Qcompat, "compat");
3525 defsymbol (&Qisolated, "isolated");
3526 defsymbol (&Qinitial, "initial");
3527 defsymbol (&Qmedial, "medial");
3528 defsymbol (&Qfinal, "final");
3529 defsymbol (&Qvertical, "vertical");
3530 defsymbol (&QnoBreak, "noBreak");
3531 defsymbol (&Qfraction, "fraction");
3532 defsymbol (&Qsuper, "super");
3533 defsymbol (&Qsub, "sub");
3534 defsymbol (&Qcircle, "circle");
3535 defsymbol (&Qsquare, "square");
3536 defsymbol (&Qwide, "wide");
3537 defsymbol (&Qnarrow, "narrow");
3538 defsymbol (&Qsmall, "small");
3539 defsymbol (&Qfont, "font");
3540 defsymbol (&Qucs, "ucs");
3541 defsymbol (&Qucs_bmp, "ucs-bmp");
3542 defsymbol (&Qucs_cns, "ucs-cns");
3543 defsymbol (&Qucs_big5, "ucs-big5");
3544 defsymbol (&Qlatin_viscii, "latin-viscii");
3545 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3546 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3547 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3548 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3549 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3550 defsymbol (&Qideograph_gt, "ideograph-gt");
3551 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
3552 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
3553 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
3554 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
3555 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
3556 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
3557 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
3558 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
3559 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
3560 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
3561 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
3562 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
3563 defsymbol (&Qchinese_big5, "chinese-big5");
3564 defsymbol (&Qmojikyo, "mojikyo");
3565 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
3566 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
3567 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
3568 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
3569 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
3570 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
3571 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
3572 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
3573 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
3574 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
3575 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
3576 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
3577 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
3578 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
3579 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
3580 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
3581 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
3582 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
3583 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
3584 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
3585 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
3586 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
3587 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3589 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3590 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3592 defsymbol (&Qcomposite, "composite");
3596 vars_of_mule_charset (void)
3603 chlook = xnew (struct charset_lookup);
3604 dumpstruct (&chlook, &charset_lookup_description);
3606 /* Table of charsets indexed by leading byte. */
3607 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3608 chlook->charset_by_leading_byte[i] = Qnil;
3611 /* Table of charsets indexed by type/final-byte. */
3612 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3613 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3614 chlook->charset_by_attributes[i][j] = Qnil;
3616 /* Table of charsets indexed by type/final-byte/direction. */
3617 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3618 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3619 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3620 chlook->charset_by_attributes[i][j][k] = Qnil;
3624 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3626 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3627 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3631 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3632 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3633 Leading-code of private TYPE9N charset of column-width 1.
3635 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3639 Vutf_2000_version = build_string("0.17 (Hōryūji)");
3640 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
3641 Version number of UTF-2000.
3644 staticpro (&Vcharacter_composition_table);
3645 Vcharacter_composition_table = make_char_id_table (Qnil);
3647 staticpro (&Vcharacter_variant_table);
3648 Vcharacter_variant_table = make_char_id_table (Qnil);
3650 Vdefault_coded_charset_priority_list = Qnil;
3651 DEFVAR_LISP ("default-coded-charset-priority-list",
3652 &Vdefault_coded_charset_priority_list /*
3653 Default order of preferred coded-character-sets.
3659 complex_vars_of_mule_charset (void)
3661 staticpro (&Vcharset_hash_table);
3662 Vcharset_hash_table =
3663 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3665 /* Predefined character sets. We store them into variables for
3669 staticpro (&Vchar_attribute_hash_table);
3670 Vchar_attribute_hash_table
3671 = make_lisp_hash_table (16, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3673 staticpro (&Vcharset_ucs);
3675 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
3676 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3677 build_string ("UCS"),
3678 build_string ("UCS"),
3679 build_string ("ISO/IEC 10646"),
3681 Qnil, 0, 0xFFFFFFF, 0, 0);
3682 staticpro (&Vcharset_ucs_bmp);
3684 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3685 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3686 build_string ("BMP"),
3687 build_string ("BMP"),
3688 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3689 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
3690 Qnil, 0, 0xFFFF, 0, 0);
3691 staticpro (&Vcharset_ucs_cns);
3693 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
3694 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3695 build_string ("UCS for CNS"),
3696 build_string ("UCS for CNS 11643"),
3697 build_string ("ISO/IEC 10646 for CNS 11643"),
3700 staticpro (&Vcharset_ucs_big5);
3702 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
3703 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3704 build_string ("UCS for Big5"),
3705 build_string ("UCS for Big5"),
3706 build_string ("ISO/IEC 10646 for Big5"),
3710 # define MIN_CHAR_THAI 0
3711 # define MAX_CHAR_THAI 0
3712 /* # define MIN_CHAR_HEBREW 0 */
3713 /* # define MAX_CHAR_HEBREW 0 */
3714 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3715 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3717 staticpro (&Vcharset_ascii);
3719 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3720 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3721 build_string ("ASCII"),
3722 build_string ("ASCII)"),
3723 build_string ("ASCII (ISO646 IRV)"),
3724 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3725 Qnil, 0, 0x7F, 0, 0);
3726 staticpro (&Vcharset_control_1);
3727 Vcharset_control_1 =
3728 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3729 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3730 build_string ("C1"),
3731 build_string ("Control characters"),
3732 build_string ("Control characters 128-191"),
3734 Qnil, 0x80, 0x9F, 0, 0);
3735 staticpro (&Vcharset_latin_iso8859_1);
3736 Vcharset_latin_iso8859_1 =
3737 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3738 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3739 build_string ("Latin-1"),
3740 build_string ("ISO8859-1 (Latin-1)"),
3741 build_string ("ISO8859-1 (Latin-1)"),
3742 build_string ("iso8859-1"),
3743 Qnil, 0xA0, 0xFF, 0, 32);
3744 staticpro (&Vcharset_latin_iso8859_2);
3745 Vcharset_latin_iso8859_2 =
3746 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3747 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3748 build_string ("Latin-2"),
3749 build_string ("ISO8859-2 (Latin-2)"),
3750 build_string ("ISO8859-2 (Latin-2)"),
3751 build_string ("iso8859-2"),
3753 staticpro (&Vcharset_latin_iso8859_3);
3754 Vcharset_latin_iso8859_3 =
3755 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3756 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3757 build_string ("Latin-3"),
3758 build_string ("ISO8859-3 (Latin-3)"),
3759 build_string ("ISO8859-3 (Latin-3)"),
3760 build_string ("iso8859-3"),
3762 staticpro (&Vcharset_latin_iso8859_4);
3763 Vcharset_latin_iso8859_4 =
3764 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3765 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3766 build_string ("Latin-4"),
3767 build_string ("ISO8859-4 (Latin-4)"),
3768 build_string ("ISO8859-4 (Latin-4)"),
3769 build_string ("iso8859-4"),
3771 staticpro (&Vcharset_thai_tis620);
3772 Vcharset_thai_tis620 =
3773 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3774 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3775 build_string ("TIS620"),
3776 build_string ("TIS620 (Thai)"),
3777 build_string ("TIS620.2529 (Thai)"),
3778 build_string ("tis620"),
3779 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3780 staticpro (&Vcharset_greek_iso8859_7);
3781 Vcharset_greek_iso8859_7 =
3782 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3783 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3784 build_string ("ISO8859-7"),
3785 build_string ("ISO8859-7 (Greek)"),
3786 build_string ("ISO8859-7 (Greek)"),
3787 build_string ("iso8859-7"),
3789 staticpro (&Vcharset_arabic_iso8859_6);
3790 Vcharset_arabic_iso8859_6 =
3791 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3792 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3793 build_string ("ISO8859-6"),
3794 build_string ("ISO8859-6 (Arabic)"),
3795 build_string ("ISO8859-6 (Arabic)"),
3796 build_string ("iso8859-6"),
3798 staticpro (&Vcharset_hebrew_iso8859_8);
3799 Vcharset_hebrew_iso8859_8 =
3800 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3801 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3802 build_string ("ISO8859-8"),
3803 build_string ("ISO8859-8 (Hebrew)"),
3804 build_string ("ISO8859-8 (Hebrew)"),
3805 build_string ("iso8859-8"),
3807 0 /* MIN_CHAR_HEBREW */,
3808 0 /* MAX_CHAR_HEBREW */, 0, 32);
3809 staticpro (&Vcharset_katakana_jisx0201);
3810 Vcharset_katakana_jisx0201 =
3811 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3812 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3813 build_string ("JISX0201 Kana"),
3814 build_string ("JISX0201.1976 (Japanese Kana)"),
3815 build_string ("JISX0201.1976 Japanese Kana"),
3816 build_string ("jisx0201\\.1976"),
3818 staticpro (&Vcharset_latin_jisx0201);
3819 Vcharset_latin_jisx0201 =
3820 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3821 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3822 build_string ("JISX0201 Roman"),
3823 build_string ("JISX0201.1976 (Japanese Roman)"),
3824 build_string ("JISX0201.1976 Japanese Roman"),
3825 build_string ("jisx0201\\.1976"),
3827 staticpro (&Vcharset_cyrillic_iso8859_5);
3828 Vcharset_cyrillic_iso8859_5 =
3829 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3830 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3831 build_string ("ISO8859-5"),
3832 build_string ("ISO8859-5 (Cyrillic)"),
3833 build_string ("ISO8859-5 (Cyrillic)"),
3834 build_string ("iso8859-5"),
3836 staticpro (&Vcharset_latin_iso8859_9);
3837 Vcharset_latin_iso8859_9 =
3838 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3839 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3840 build_string ("Latin-5"),
3841 build_string ("ISO8859-9 (Latin-5)"),
3842 build_string ("ISO8859-9 (Latin-5)"),
3843 build_string ("iso8859-9"),
3845 staticpro (&Vcharset_japanese_jisx0208_1978);
3846 Vcharset_japanese_jisx0208_1978 =
3847 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3848 Qjapanese_jisx0208_1978, 94, 2,
3849 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3850 build_string ("JIS X0208:1978"),
3851 build_string ("JIS X0208:1978 (Japanese)"),
3853 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3854 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3856 staticpro (&Vcharset_chinese_gb2312);
3857 Vcharset_chinese_gb2312 =
3858 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3859 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3860 build_string ("GB2312"),
3861 build_string ("GB2312)"),
3862 build_string ("GB2312 Chinese simplified"),
3863 build_string ("gb2312"),
3865 staticpro (&Vcharset_chinese_gb12345);
3866 Vcharset_chinese_gb12345 =
3867 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
3868 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3869 build_string ("G1"),
3870 build_string ("GB 12345)"),
3871 build_string ("GB 12345-1990"),
3872 build_string ("GB12345\\(\\.1990\\)?-0"),
3874 staticpro (&Vcharset_japanese_jisx0208);
3875 Vcharset_japanese_jisx0208 =
3876 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3877 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3878 build_string ("JISX0208"),
3879 build_string ("JIS X0208:1983 (Japanese)"),
3880 build_string ("JIS X0208:1983 Japanese Kanji"),
3881 build_string ("jisx0208\\.1983"),
3884 staticpro (&Vcharset_japanese_jisx0208_1990);
3885 Vcharset_japanese_jisx0208_1990 =
3886 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3887 Qjapanese_jisx0208_1990, 94, 2,
3888 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3889 build_string ("JISX0208-1990"),
3890 build_string ("JIS X0208:1990 (Japanese)"),
3891 build_string ("JIS X0208:1990 Japanese Kanji"),
3892 build_string ("jisx0208\\.1990"),
3894 MIN_CHAR_JIS_X0208_1990,
3895 MAX_CHAR_JIS_X0208_1990, 0, 33);
3897 staticpro (&Vcharset_korean_ksc5601);
3898 Vcharset_korean_ksc5601 =
3899 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3900 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3901 build_string ("KSC5601"),
3902 build_string ("KSC5601 (Korean"),
3903 build_string ("KSC5601 Korean Hangul and Hanja"),
3904 build_string ("ksc5601"),
3906 staticpro (&Vcharset_japanese_jisx0212);
3907 Vcharset_japanese_jisx0212 =
3908 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3909 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3910 build_string ("JISX0212"),
3911 build_string ("JISX0212 (Japanese)"),
3912 build_string ("JISX0212 Japanese Supplement"),
3913 build_string ("jisx0212"),
3916 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3917 staticpro (&Vcharset_chinese_cns11643_1);
3918 Vcharset_chinese_cns11643_1 =
3919 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3920 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3921 build_string ("CNS11643-1"),
3922 build_string ("CNS11643-1 (Chinese traditional)"),
3924 ("CNS 11643 Plane 1 Chinese traditional"),
3925 build_string (CHINESE_CNS_PLANE_RE("1")),
3927 staticpro (&Vcharset_chinese_cns11643_2);
3928 Vcharset_chinese_cns11643_2 =
3929 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3930 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3931 build_string ("CNS11643-2"),
3932 build_string ("CNS11643-2 (Chinese traditional)"),
3934 ("CNS 11643 Plane 2 Chinese traditional"),
3935 build_string (CHINESE_CNS_PLANE_RE("2")),
3938 staticpro (&Vcharset_latin_tcvn5712);
3939 Vcharset_latin_tcvn5712 =
3940 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3941 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3942 build_string ("TCVN 5712"),
3943 build_string ("TCVN 5712 (VSCII-2)"),
3944 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3945 build_string ("tcvn5712\\(\\.1993\\)?-1"),
3947 staticpro (&Vcharset_latin_viscii_lower);
3948 Vcharset_latin_viscii_lower =
3949 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3950 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3951 build_string ("VISCII lower"),
3952 build_string ("VISCII lower (Vietnamese)"),
3953 build_string ("VISCII lower (Vietnamese)"),
3954 build_string ("MULEVISCII-LOWER"),
3956 staticpro (&Vcharset_latin_viscii_upper);
3957 Vcharset_latin_viscii_upper =
3958 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3959 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3960 build_string ("VISCII upper"),
3961 build_string ("VISCII upper (Vietnamese)"),
3962 build_string ("VISCII upper (Vietnamese)"),
3963 build_string ("MULEVISCII-UPPER"),
3965 staticpro (&Vcharset_latin_viscii);
3966 Vcharset_latin_viscii =
3967 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3968 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3969 build_string ("VISCII"),
3970 build_string ("VISCII 1.1 (Vietnamese)"),
3971 build_string ("VISCII 1.1 (Vietnamese)"),
3972 build_string ("VISCII1\\.1"),
3974 staticpro (&Vcharset_chinese_big5);
3975 Vcharset_chinese_big5 =
3976 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3977 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3978 build_string ("Big5"),
3979 build_string ("Big5"),
3980 build_string ("Big5 Chinese traditional"),
3981 build_string ("big5"),
3983 staticpro (&Vcharset_ideograph_gt);
3984 Vcharset_ideograph_gt =
3985 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
3986 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3987 build_string ("GT"),
3988 build_string ("GT"),
3989 build_string ("GT"),
3991 Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0);
3992 #define DEF_GT_PJ(n) \
3993 staticpro (&Vcharset_ideograph_gt_pj_##n); \
3994 Vcharset_ideograph_gt_pj_##n = \
3995 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
3996 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3997 build_string ("GT-PJ-"#n), \
3998 build_string ("GT (pseudo JIS encoding) part "#n), \
3999 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
4001 ("\\(GT2000PJ-"#n "\\|jisx0208\\.GT2000-"#n "\\)$"), \
4015 staticpro (&Vcharset_ideograph_daikanwa);
4016 Vcharset_ideograph_daikanwa =
4017 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
4018 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4019 build_string ("Daikanwa"),
4020 build_string ("Morohashi's Daikanwa"),
4021 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
4022 build_string ("Daikanwa"),
4023 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
4024 staticpro (&Vcharset_mojikyo);
4026 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
4027 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4028 build_string ("Mojikyo"),
4029 build_string ("Mojikyo"),
4030 build_string ("Konjaku-Mojikyo"),
4032 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
4033 staticpro (&Vcharset_mojikyo_2022_1);
4034 Vcharset_mojikyo_2022_1 =
4035 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
4036 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
4037 build_string ("Mojikyo-2022-1"),
4038 build_string ("Mojikyo ISO-2022 Part 1"),
4039 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
4043 #define DEF_MOJIKYO_PJ(n) \
4044 staticpro (&Vcharset_mojikyo_pj_##n); \
4045 Vcharset_mojikyo_pj_##n = \
4046 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
4047 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
4048 build_string ("Mojikyo-PJ-"#n), \
4049 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
4051 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
4053 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
4065 DEF_MOJIKYO_PJ (10);
4066 DEF_MOJIKYO_PJ (11);
4067 DEF_MOJIKYO_PJ (12);
4068 DEF_MOJIKYO_PJ (13);
4069 DEF_MOJIKYO_PJ (14);
4070 DEF_MOJIKYO_PJ (15);
4071 DEF_MOJIKYO_PJ (16);
4072 DEF_MOJIKYO_PJ (17);
4073 DEF_MOJIKYO_PJ (18);
4074 DEF_MOJIKYO_PJ (19);
4075 DEF_MOJIKYO_PJ (20);
4076 DEF_MOJIKYO_PJ (21);
4078 staticpro (&Vcharset_ethiopic_ucs);
4079 Vcharset_ethiopic_ucs =
4080 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
4081 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4082 build_string ("Ethiopic (UCS)"),
4083 build_string ("Ethiopic (UCS)"),
4084 build_string ("Ethiopic of UCS"),
4085 build_string ("Ethiopic-Unicode"),
4086 Qnil, 0x1200, 0x137F, 0x1200, 0);
4088 staticpro (&Vcharset_chinese_big5_1);
4089 Vcharset_chinese_big5_1 =
4090 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
4091 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
4092 build_string ("Big5"),
4093 build_string ("Big5 (Level-1)"),
4095 ("Big5 Level-1 Chinese traditional"),
4096 build_string ("big5"),
4098 staticpro (&Vcharset_chinese_big5_2);
4099 Vcharset_chinese_big5_2 =
4100 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
4101 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
4102 build_string ("Big5"),
4103 build_string ("Big5 (Level-2)"),
4105 ("Big5 Level-2 Chinese traditional"),
4106 build_string ("big5"),
4109 #ifdef ENABLE_COMPOSITE_CHARS
4110 /* #### For simplicity, we put composite chars into a 96x96 charset.
4111 This is going to lead to problems because you can run out of
4112 room, esp. as we don't yet recycle numbers. */
4113 staticpro (&Vcharset_composite);
4114 Vcharset_composite =
4115 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
4116 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4117 build_string ("Composite"),
4118 build_string ("Composite characters"),
4119 build_string ("Composite characters"),
4122 /* #### not dumped properly */
4123 composite_char_row_next = 32;
4124 composite_char_col_next = 32;
4126 Vcomposite_char_string2char_hash_table =
4127 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
4128 Vcomposite_char_char2string_hash_table =
4129 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
4130 staticpro (&Vcomposite_char_string2char_hash_table);
4131 staticpro (&Vcomposite_char_char2string_hash_table);
4132 #endif /* ENABLE_COMPOSITE_CHARS */