1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000,2001 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 /* Rewritten by MORIOKA Tomohiko <tomo@m17n.org> for XEmacs UTF-2000. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_chinese_gb12345;
60 Lisp_Object Vcharset_japanese_jisx0208;
61 Lisp_Object Vcharset_japanese_jisx0208_1990;
62 Lisp_Object Vcharset_korean_ksc5601;
63 Lisp_Object Vcharset_japanese_jisx0212;
64 Lisp_Object Vcharset_chinese_cns11643_1;
65 Lisp_Object Vcharset_chinese_cns11643_2;
67 Lisp_Object Vcharset_ucs;
68 Lisp_Object Vcharset_ucs_bmp;
69 Lisp_Object Vcharset_ucs_cns;
70 Lisp_Object Vcharset_ucs_big5;
71 Lisp_Object Vcharset_latin_viscii;
72 Lisp_Object Vcharset_latin_tcvn5712;
73 Lisp_Object Vcharset_latin_viscii_lower;
74 Lisp_Object Vcharset_latin_viscii_upper;
75 Lisp_Object Vcharset_chinese_big5;
76 Lisp_Object Vcharset_ideograph_gt;
77 Lisp_Object Vcharset_ideograph_gt_pj_1;
78 Lisp_Object Vcharset_ideograph_gt_pj_2;
79 Lisp_Object Vcharset_ideograph_gt_pj_3;
80 Lisp_Object Vcharset_ideograph_gt_pj_4;
81 Lisp_Object Vcharset_ideograph_gt_pj_5;
82 Lisp_Object Vcharset_ideograph_gt_pj_6;
83 Lisp_Object Vcharset_ideograph_gt_pj_7;
84 Lisp_Object Vcharset_ideograph_gt_pj_8;
85 Lisp_Object Vcharset_ideograph_gt_pj_9;
86 Lisp_Object Vcharset_ideograph_gt_pj_10;
87 Lisp_Object Vcharset_ideograph_gt_pj_11;
88 Lisp_Object Vcharset_ideograph_daikanwa;
89 Lisp_Object Vcharset_mojikyo;
90 Lisp_Object Vcharset_mojikyo_2022_1;
91 Lisp_Object Vcharset_mojikyo_pj_1;
92 Lisp_Object Vcharset_mojikyo_pj_2;
93 Lisp_Object Vcharset_mojikyo_pj_3;
94 Lisp_Object Vcharset_mojikyo_pj_4;
95 Lisp_Object Vcharset_mojikyo_pj_5;
96 Lisp_Object Vcharset_mojikyo_pj_6;
97 Lisp_Object Vcharset_mojikyo_pj_7;
98 Lisp_Object Vcharset_mojikyo_pj_8;
99 Lisp_Object Vcharset_mojikyo_pj_9;
100 Lisp_Object Vcharset_mojikyo_pj_10;
101 Lisp_Object Vcharset_mojikyo_pj_11;
102 Lisp_Object Vcharset_mojikyo_pj_12;
103 Lisp_Object Vcharset_mojikyo_pj_13;
104 Lisp_Object Vcharset_mojikyo_pj_14;
105 Lisp_Object Vcharset_mojikyo_pj_15;
106 Lisp_Object Vcharset_mojikyo_pj_16;
107 Lisp_Object Vcharset_mojikyo_pj_17;
108 Lisp_Object Vcharset_mojikyo_pj_18;
109 Lisp_Object Vcharset_mojikyo_pj_19;
110 Lisp_Object Vcharset_mojikyo_pj_20;
111 Lisp_Object Vcharset_mojikyo_pj_21;
112 Lisp_Object Vcharset_ethiopic_ucs;
114 Lisp_Object Vcharset_chinese_big5_1;
115 Lisp_Object Vcharset_chinese_big5_2;
117 #ifdef ENABLE_COMPOSITE_CHARS
118 Lisp_Object Vcharset_composite;
120 /* Hash tables for composite chars. One maps string representing
121 composed chars to their equivalent chars; one goes the
123 Lisp_Object Vcomposite_char_char2string_hash_table;
124 Lisp_Object Vcomposite_char_string2char_hash_table;
126 static int composite_char_row_next;
127 static int composite_char_col_next;
129 #endif /* ENABLE_COMPOSITE_CHARS */
131 struct charset_lookup *chlook;
133 static const struct lrecord_description charset_lookup_description_1[] = {
134 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
143 static const struct struct_description charset_lookup_description = {
144 sizeof (struct charset_lookup),
145 charset_lookup_description_1
149 /* Table of number of bytes in the string representation of a character
150 indexed by the first byte of that representation.
152 rep_bytes_by_first_byte(c) is more efficient than the equivalent
153 canonical computation:
155 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
157 const Bytecount rep_bytes_by_first_byte[0xA0] =
158 { /* 0x00 - 0x7f are for straight ASCII */
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
163 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
165 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
166 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
167 /* 0x80 - 0x8f are for Dimension-1 official charsets */
169 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
171 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
173 /* 0x90 - 0x9d are for Dimension-2 official charsets */
174 /* 0x9e is for Dimension-1 private charsets */
175 /* 0x9f is for Dimension-2 private charsets */
176 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
182 #define BT_UINT8_MIN 0
183 #define BT_UINT8_MAX (UCHAR_MAX - 3)
184 #define BT_UINT8_t (UCHAR_MAX - 2)
185 #define BT_UINT8_nil (UCHAR_MAX - 1)
186 #define BT_UINT8_unbound UCHAR_MAX
188 INLINE_HEADER int INT_UINT8_P (Lisp_Object obj);
189 INLINE_HEADER int UINT8_VALUE_P (Lisp_Object obj);
190 INLINE_HEADER unsigned char UINT8_ENCODE (Lisp_Object obj);
191 INLINE_HEADER Lisp_Object UINT8_DECODE (unsigned char n);
192 INLINE_HEADER unsigned short UINT8_TO_UINT16 (unsigned char n);
195 INT_UINT8_P (Lisp_Object obj)
199 int num = XINT (obj);
201 return (BT_UINT8_MIN <= num) && (num <= BT_UINT8_MAX);
208 UINT8_VALUE_P (Lisp_Object obj)
210 return EQ (obj, Qunbound)
211 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT8_P (obj);
214 INLINE_HEADER unsigned char
215 UINT8_ENCODE (Lisp_Object obj)
217 if (EQ (obj, Qunbound))
218 return BT_UINT8_unbound;
219 else if (EQ (obj, Qnil))
221 else if (EQ (obj, Qt))
227 INLINE_HEADER Lisp_Object
228 UINT8_DECODE (unsigned char n)
230 if (n == BT_UINT8_unbound)
232 else if (n == BT_UINT8_nil)
234 else if (n == BT_UINT8_t)
241 mark_uint8_byte_table (Lisp_Object obj)
247 print_uint8_byte_table (Lisp_Object obj,
248 Lisp_Object printcharfun, int escapeflag)
250 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
252 struct gcpro gcpro1, gcpro2;
253 GCPRO2 (obj, printcharfun);
255 write_c_string ("\n#<uint8-byte-table", printcharfun);
256 for (i = 0; i < 256; i++)
258 unsigned char n = bte->property[i];
260 write_c_string ("\n ", printcharfun);
261 write_c_string (" ", printcharfun);
262 if (n == BT_UINT8_unbound)
263 write_c_string ("void", printcharfun);
264 else if (n == BT_UINT8_nil)
265 write_c_string ("nil", printcharfun);
266 else if (n == BT_UINT8_t)
267 write_c_string ("t", printcharfun);
272 sprintf (buf, "%hd", n);
273 write_c_string (buf, printcharfun);
277 write_c_string (">", printcharfun);
281 uint8_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
283 Lisp_Uint8_Byte_Table *te1 = XUINT8_BYTE_TABLE (obj1);
284 Lisp_Uint8_Byte_Table *te2 = XUINT8_BYTE_TABLE (obj2);
287 for (i = 0; i < 256; i++)
288 if (te1->property[i] != te2->property[i])
294 uint8_byte_table_hash (Lisp_Object obj, int depth)
296 Lisp_Uint8_Byte_Table *te = XUINT8_BYTE_TABLE (obj);
300 for (i = 0; i < 256; i++)
301 hash = HASH2 (hash, te->property[i]);
305 DEFINE_LRECORD_IMPLEMENTATION ("uint8-byte-table", uint8_byte_table,
306 mark_uint8_byte_table,
307 print_uint8_byte_table,
308 0, uint8_byte_table_equal,
309 uint8_byte_table_hash,
310 0 /* uint8_byte_table_description */,
311 Lisp_Uint8_Byte_Table);
314 make_uint8_byte_table (unsigned char initval)
318 Lisp_Uint8_Byte_Table *cte;
320 cte = alloc_lcrecord_type (Lisp_Uint8_Byte_Table,
321 &lrecord_uint8_byte_table);
323 for (i = 0; i < 256; i++)
324 cte->property[i] = initval;
326 XSETUINT8_BYTE_TABLE (obj, cte);
331 uint8_byte_table_same_value_p (Lisp_Object obj)
333 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
334 unsigned char v0 = bte->property[0];
337 for (i = 1; i < 256; i++)
339 if (bte->property[i] != v0)
346 #define BT_UINT16_MIN 0
347 #define BT_UINT16_MAX (USHRT_MAX - 3)
348 #define BT_UINT16_t (USHRT_MAX - 2)
349 #define BT_UINT16_nil (USHRT_MAX - 1)
350 #define BT_UINT16_unbound USHRT_MAX
352 INLINE_HEADER int INT_UINT16_P (Lisp_Object obj);
353 INLINE_HEADER int UINT16_VALUE_P (Lisp_Object obj);
354 INLINE_HEADER unsigned short UINT16_ENCODE (Lisp_Object obj);
355 INLINE_HEADER Lisp_Object UINT16_DECODE (unsigned short us);
358 INT_UINT16_P (Lisp_Object obj)
362 int num = XINT (obj);
364 return (BT_UINT16_MIN <= num) && (num <= BT_UINT16_MAX);
371 UINT16_VALUE_P (Lisp_Object obj)
373 return EQ (obj, Qunbound)
374 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT16_P (obj);
377 INLINE_HEADER unsigned short
378 UINT16_ENCODE (Lisp_Object obj)
380 if (EQ (obj, Qunbound))
381 return BT_UINT16_unbound;
382 else if (EQ (obj, Qnil))
383 return BT_UINT16_nil;
384 else if (EQ (obj, Qt))
390 INLINE_HEADER Lisp_Object
391 UINT16_DECODE (unsigned short n)
393 if (n == BT_UINT16_unbound)
395 else if (n == BT_UINT16_nil)
397 else if (n == BT_UINT16_t)
403 INLINE_HEADER unsigned short
404 UINT8_TO_UINT16 (unsigned char n)
406 if (n == BT_UINT8_unbound)
407 return BT_UINT16_unbound;
408 else if (n == BT_UINT8_nil)
409 return BT_UINT16_nil;
410 else if (n == BT_UINT8_t)
417 mark_uint16_byte_table (Lisp_Object obj)
423 print_uint16_byte_table (Lisp_Object obj,
424 Lisp_Object printcharfun, int escapeflag)
426 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
428 struct gcpro gcpro1, gcpro2;
429 GCPRO2 (obj, printcharfun);
431 write_c_string ("\n#<uint16-byte-table", printcharfun);
432 for (i = 0; i < 256; i++)
434 unsigned short n = bte->property[i];
436 write_c_string ("\n ", printcharfun);
437 write_c_string (" ", printcharfun);
438 if (n == BT_UINT16_unbound)
439 write_c_string ("void", printcharfun);
440 else if (n == BT_UINT16_nil)
441 write_c_string ("nil", printcharfun);
442 else if (n == BT_UINT16_t)
443 write_c_string ("t", printcharfun);
448 sprintf (buf, "%hd", n);
449 write_c_string (buf, printcharfun);
453 write_c_string (">", printcharfun);
457 uint16_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
459 Lisp_Uint16_Byte_Table *te1 = XUINT16_BYTE_TABLE (obj1);
460 Lisp_Uint16_Byte_Table *te2 = XUINT16_BYTE_TABLE (obj2);
463 for (i = 0; i < 256; i++)
464 if (te1->property[i] != te2->property[i])
470 uint16_byte_table_hash (Lisp_Object obj, int depth)
472 Lisp_Uint16_Byte_Table *te = XUINT16_BYTE_TABLE (obj);
476 for (i = 0; i < 256; i++)
477 hash = HASH2 (hash, te->property[i]);
481 DEFINE_LRECORD_IMPLEMENTATION ("uint16-byte-table", uint16_byte_table,
482 mark_uint16_byte_table,
483 print_uint16_byte_table,
484 0, uint16_byte_table_equal,
485 uint16_byte_table_hash,
486 0 /* uint16_byte_table_description */,
487 Lisp_Uint16_Byte_Table);
490 make_uint16_byte_table (unsigned short initval)
494 Lisp_Uint16_Byte_Table *cte;
496 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
497 &lrecord_uint16_byte_table);
499 for (i = 0; i < 256; i++)
500 cte->property[i] = initval;
502 XSETUINT16_BYTE_TABLE (obj, cte);
507 expand_uint8_byte_table_to_uint16 (Lisp_Object table)
511 Lisp_Uint8_Byte_Table* bte = XUINT8_BYTE_TABLE(table);
512 Lisp_Uint16_Byte_Table* cte;
514 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
515 &lrecord_uint16_byte_table);
516 for (i = 0; i < 256; i++)
518 cte->property[i] = UINT8_TO_UINT16 (bte->property[i]);
520 XSETUINT16_BYTE_TABLE (obj, cte);
525 uint16_byte_table_same_value_p (Lisp_Object obj)
527 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
528 unsigned short v0 = bte->property[0];
531 for (i = 1; i < 256; i++)
533 if (bte->property[i] != v0)
541 mark_byte_table (Lisp_Object obj)
543 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
546 for (i = 0; i < 256; i++)
548 mark_object (cte->property[i]);
554 print_byte_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
556 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
558 struct gcpro gcpro1, gcpro2;
559 GCPRO2 (obj, printcharfun);
561 write_c_string ("\n#<byte-table", printcharfun);
562 for (i = 0; i < 256; i++)
564 Lisp_Object elt = bte->property[i];
566 write_c_string ("\n ", printcharfun);
567 write_c_string (" ", printcharfun);
568 if (EQ (elt, Qunbound))
569 write_c_string ("void", printcharfun);
571 print_internal (elt, printcharfun, escapeflag);
574 write_c_string (">", printcharfun);
578 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
580 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
581 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
584 for (i = 0; i < 256; i++)
585 if (BYTE_TABLE_P (cte1->property[i]))
587 if (BYTE_TABLE_P (cte2->property[i]))
589 if (!byte_table_equal (cte1->property[i],
590 cte2->property[i], depth + 1))
597 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
603 byte_table_hash (Lisp_Object obj, int depth)
605 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
607 return internal_array_hash (cte->property, 256, depth);
610 static const struct lrecord_description byte_table_description[] = {
611 { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Byte_Table, property), 256 },
615 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
620 byte_table_description,
624 make_byte_table (Lisp_Object initval)
628 Lisp_Byte_Table *cte;
630 cte = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
632 for (i = 0; i < 256; i++)
633 cte->property[i] = initval;
635 XSETBYTE_TABLE (obj, cte);
640 byte_table_same_value_p (Lisp_Object obj)
642 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
643 Lisp_Object v0 = bte->property[0];
646 for (i = 1; i < 256; i++)
648 if (!internal_equal (bte->property[i], v0, 0))
655 Lisp_Object get_byte_table (Lisp_Object table, unsigned char idx);
656 Lisp_Object put_byte_table (Lisp_Object table, unsigned char idx,
660 get_byte_table (Lisp_Object table, unsigned char idx)
662 if (UINT8_BYTE_TABLE_P (table))
663 return UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[idx]);
664 else if (UINT16_BYTE_TABLE_P (table))
665 return UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[idx]);
666 else if (BYTE_TABLE_P (table))
667 return XBYTE_TABLE(table)->property[idx];
673 put_byte_table (Lisp_Object table, unsigned char idx, Lisp_Object value)
675 if (UINT8_BYTE_TABLE_P (table))
677 if (UINT8_VALUE_P (value))
679 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
680 if (!UINT8_BYTE_TABLE_P (value) &&
681 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
682 && uint8_byte_table_same_value_p (table))
687 else if (UINT16_VALUE_P (value))
689 Lisp_Object new = expand_uint8_byte_table_to_uint16 (table);
691 XUINT16_BYTE_TABLE(new)->property[idx] = UINT16_ENCODE (value);
696 Lisp_Object new = make_byte_table (Qnil);
699 for (i = 0; i < 256; i++)
701 XBYTE_TABLE(new)->property[i]
702 = UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[i]);
704 XBYTE_TABLE(new)->property[idx] = value;
708 else if (UINT16_BYTE_TABLE_P (table))
710 if (UINT16_VALUE_P (value))
712 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
713 if (!UINT8_BYTE_TABLE_P (value) &&
714 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
715 && uint16_byte_table_same_value_p (table))
722 Lisp_Object new = make_byte_table (Qnil);
725 for (i = 0; i < 256; i++)
727 XBYTE_TABLE(new)->property[i]
728 = UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[i]);
730 XBYTE_TABLE(new)->property[idx] = value;
734 else if (BYTE_TABLE_P (table))
736 XBYTE_TABLE(table)->property[idx] = value;
737 if (!UINT8_BYTE_TABLE_P (value) &&
738 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
739 && byte_table_same_value_p (table))
744 else if (!internal_equal (table, value, 0))
746 if (UINT8_VALUE_P (table) && UINT8_VALUE_P (value))
748 table = make_uint8_byte_table (UINT8_ENCODE (table));
749 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
751 else if (UINT16_VALUE_P (table) && UINT16_VALUE_P (value))
753 table = make_uint16_byte_table (UINT16_ENCODE (table));
754 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
758 table = make_byte_table (table);
759 XBYTE_TABLE(table)->property[idx] = value;
766 mark_char_id_table (Lisp_Object obj)
768 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
774 print_char_id_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
776 Lisp_Object table = XCHAR_ID_TABLE (obj)->table;
778 struct gcpro gcpro1, gcpro2;
779 GCPRO2 (obj, printcharfun);
781 write_c_string ("#<char-id-table ", printcharfun);
782 for (i = 0; i < 256; i++)
784 Lisp_Object elt = get_byte_table (table, i);
785 if (i != 0) write_c_string ("\n ", printcharfun);
786 if (EQ (elt, Qunbound))
787 write_c_string ("void", printcharfun);
789 print_internal (elt, printcharfun, escapeflag);
792 write_c_string (">", printcharfun);
796 char_id_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
798 Lisp_Object table1 = XCHAR_ID_TABLE (obj1)->table;
799 Lisp_Object table2 = XCHAR_ID_TABLE (obj2)->table;
802 for (i = 0; i < 256; i++)
804 if (!internal_equal (get_byte_table (table1, i),
805 get_byte_table (table2, i), 0))
812 char_id_table_hash (Lisp_Object obj, int depth)
814 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
816 return char_id_table_hash (cte->table, depth + 1);
819 static const struct lrecord_description char_id_table_description[] = {
820 { XD_LISP_OBJECT, offsetof(Lisp_Char_ID_Table, table) },
824 DEFINE_LRECORD_IMPLEMENTATION ("char-id-table", char_id_table,
827 0, char_id_table_equal,
829 char_id_table_description,
833 make_char_id_table (Lisp_Object initval)
836 Lisp_Char_ID_Table *cte;
838 cte = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
840 cte->table = make_byte_table (initval);
842 XSETCHAR_ID_TABLE (obj, cte);
848 get_char_id_table (Emchar ch, Lisp_Object table)
850 unsigned int code = ch;
857 (XCHAR_ID_TABLE (table)->table,
858 (unsigned char)(code >> 24)),
859 (unsigned char) (code >> 16)),
860 (unsigned char) (code >> 8)),
861 (unsigned char) code);
864 void put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table);
866 put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table)
868 unsigned int code = ch;
869 Lisp_Object table1, table2, table3, table4;
871 table1 = XCHAR_ID_TABLE (table)->table;
872 table2 = get_byte_table (table1, (unsigned char)(code >> 24));
873 table3 = get_byte_table (table2, (unsigned char)(code >> 16));
874 table4 = get_byte_table (table3, (unsigned char)(code >> 8));
876 table4 = put_byte_table (table4, (unsigned char)code, value);
877 table3 = put_byte_table (table3, (unsigned char)(code >> 8), table4);
878 table2 = put_byte_table (table2, (unsigned char)(code >> 16), table3);
879 XCHAR_ID_TABLE (table)->table
880 = put_byte_table (table1, (unsigned char)(code >> 24), table2);
884 Lisp_Object Vchar_attribute_hash_table;
885 Lisp_Object Vcharacter_composition_table;
886 Lisp_Object Vcharacter_variant_table;
888 Lisp_Object Qideograph_daikanwa;
889 Lisp_Object Q_decomposition;
893 Lisp_Object Qisolated;
894 Lisp_Object Qinitial;
897 Lisp_Object Qvertical;
898 Lisp_Object QnoBreak;
899 Lisp_Object Qfraction;
909 Emchar to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
911 Lisp_Object put_char_ccs_code_point (Lisp_Object character,
912 Lisp_Object ccs, Lisp_Object value);
913 Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
916 to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
922 else if (EQ (v, Qcompat))
924 else if (EQ (v, Qisolated))
926 else if (EQ (v, Qinitial))
928 else if (EQ (v, Qmedial))
930 else if (EQ (v, Qfinal))
932 else if (EQ (v, Qvertical))
934 else if (EQ (v, QnoBreak))
936 else if (EQ (v, Qfraction))
938 else if (EQ (v, Qsuper))
940 else if (EQ (v, Qsub))
942 else if (EQ (v, Qcircle))
944 else if (EQ (v, Qsquare))
946 else if (EQ (v, Qwide))
948 else if (EQ (v, Qnarrow))
950 else if (EQ (v, Qsmall))
952 else if (EQ (v, Qfont))
955 signal_simple_error (err_msg, err_arg);
958 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
959 Return character corresponding with list.
963 Lisp_Object table = Vcharacter_composition_table;
964 Lisp_Object rest = list;
968 Lisp_Object v = Fcar (rest);
970 Emchar c = to_char_id (v, "Invalid value for composition", list);
972 ret = get_char_id_table (c, table);
977 if (!CHAR_ID_TABLE_P (ret))
982 else if (!CONSP (rest))
984 else if (CHAR_ID_TABLE_P (ret))
987 signal_simple_error ("Invalid table is found with", list);
989 signal_simple_error ("Invalid value for composition", list);
992 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
993 Return variants of CHARACTER.
997 CHECK_CHAR (character);
998 return Fcopy_list (get_char_id_table (XCHAR (character),
999 Vcharacter_variant_table));
1003 /* We store the char-attributes in hash tables with the names as the
1004 key and the actual char-id-table object as the value. Occasionally
1005 we need to use them in a list format. These routines provide us
1007 struct char_attribute_list_closure
1009 Lisp_Object *char_attribute_list;
1013 add_char_attribute_to_list_mapper (Lisp_Object key, Lisp_Object value,
1014 void *char_attribute_list_closure)
1016 /* This function can GC */
1017 struct char_attribute_list_closure *calcl
1018 = (struct char_attribute_list_closure*) char_attribute_list_closure;
1019 Lisp_Object *char_attribute_list = calcl->char_attribute_list;
1021 *char_attribute_list = Fcons (key, *char_attribute_list);
1025 DEFUN ("char-attribute-list", Fchar_attribute_list, 0, 0, 0, /*
1026 Return the list of all existing character attributes except coded-charsets.
1030 Lisp_Object char_attribute_list = Qnil;
1031 struct gcpro gcpro1;
1032 struct char_attribute_list_closure char_attribute_list_closure;
1034 GCPRO1 (char_attribute_list);
1035 char_attribute_list_closure.char_attribute_list = &char_attribute_list;
1036 elisp_maphash (add_char_attribute_to_list_mapper,
1037 Vchar_attribute_hash_table,
1038 &char_attribute_list_closure);
1040 return char_attribute_list;
1043 DEFUN ("find-char-attribute-table", Ffind_char_attribute_table, 1, 1, 0, /*
1044 Return char-id-table corresponding to ATTRIBUTE.
1048 return Fgethash (attribute, Vchar_attribute_hash_table, Qnil);
1052 /* We store the char-id-tables in hash tables with the attributes as
1053 the key and the actual char-id-table object as the value. Each
1054 char-id-table stores values of an attribute corresponding with
1055 characters. Occasionally we need to get attributes of a character
1056 in a association-list format. These routines provide us with
1058 struct char_attribute_alist_closure
1061 Lisp_Object *char_attribute_alist;
1065 add_char_attribute_alist_mapper (Lisp_Object key, Lisp_Object value,
1066 void *char_attribute_alist_closure)
1068 /* This function can GC */
1069 struct char_attribute_alist_closure *caacl =
1070 (struct char_attribute_alist_closure*) char_attribute_alist_closure;
1071 Lisp_Object ret = get_char_id_table (caacl->char_id, value);
1072 if (!UNBOUNDP (ret))
1074 Lisp_Object *char_attribute_alist = caacl->char_attribute_alist;
1075 *char_attribute_alist
1076 = Fcons (Fcons (key, ret), *char_attribute_alist);
1081 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
1082 Return the alist of attributes of CHARACTER.
1086 Lisp_Object alist = Qnil;
1089 CHECK_CHAR (character);
1091 struct gcpro gcpro1;
1092 struct char_attribute_alist_closure char_attribute_alist_closure;
1095 char_attribute_alist_closure.char_id = XCHAR (character);
1096 char_attribute_alist_closure.char_attribute_alist = &alist;
1097 elisp_maphash (add_char_attribute_alist_mapper,
1098 Vchar_attribute_hash_table,
1099 &char_attribute_alist_closure);
1103 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1105 Lisp_Object ccs = chlook->charset_by_leading_byte[i];
1109 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1112 if ( CHAR_ID_TABLE_P (encoding_table)
1113 && INTP (cpos = get_char_id_table (XCHAR (character),
1116 alist = Fcons (Fcons (ccs, cpos), alist);
1123 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 3, 0, /*
1124 Return the value of CHARACTER's ATTRIBUTE.
1125 Return DEFAULT-VALUE if the value is not exist.
1127 (character, attribute, default_value))
1131 CHECK_CHAR (character);
1132 if (!NILP (ccs = Ffind_charset (attribute)))
1134 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1136 if (CHAR_ID_TABLE_P (encoding_table))
1137 return get_char_id_table (XCHAR (character), encoding_table);
1141 Lisp_Object table = Fgethash (attribute,
1142 Vchar_attribute_hash_table,
1144 if (!UNBOUNDP (table))
1146 Lisp_Object ret = get_char_id_table (XCHAR (character), table);
1147 if (!UNBOUNDP (ret))
1151 return default_value;
1154 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
1155 Store CHARACTER's ATTRIBUTE with VALUE.
1157 (character, attribute, value))
1161 CHECK_CHAR (character);
1162 ccs = Ffind_charset (attribute);
1165 return put_char_ccs_code_point (character, ccs, value);
1167 else if (EQ (attribute, Q_decomposition))
1172 signal_simple_error ("Invalid value for ->decomposition",
1175 if (CONSP (Fcdr (value)))
1177 Lisp_Object rest = value;
1178 Lisp_Object table = Vcharacter_composition_table;
1182 GET_EXTERNAL_LIST_LENGTH (rest, len);
1183 seq = make_vector (len, Qnil);
1185 while (CONSP (rest))
1187 Lisp_Object v = Fcar (rest);
1190 = to_char_id (v, "Invalid value for ->decomposition", value);
1193 XVECTOR_DATA(seq)[i++] = v;
1195 XVECTOR_DATA(seq)[i++] = make_char (c);
1199 put_char_id_table (c, character, table);
1204 ntable = get_char_id_table (c, table);
1205 if (!CHAR_ID_TABLE_P (ntable))
1207 ntable = make_char_id_table (Qnil);
1208 put_char_id_table (c, ntable, table);
1216 Lisp_Object v = Fcar (value);
1220 Emchar c = XINT (v);
1222 = get_char_id_table (c, Vcharacter_variant_table);
1224 if (NILP (Fmemq (v, ret)))
1226 put_char_id_table (c, Fcons (character, ret),
1227 Vcharacter_variant_table);
1230 seq = make_vector (1, v);
1234 else if (EQ (attribute, Q_ucs))
1240 signal_simple_error ("Invalid value for ->ucs", value);
1244 ret = get_char_id_table (c, Vcharacter_variant_table);
1245 if (NILP (Fmemq (character, ret)))
1247 put_char_id_table (c, Fcons (character, ret),
1248 Vcharacter_variant_table);
1252 Lisp_Object table = Fgethash (attribute,
1253 Vchar_attribute_hash_table,
1258 table = make_char_id_table (Qunbound);
1259 Fputhash (attribute, table, Vchar_attribute_hash_table);
1261 put_char_id_table (XCHAR (character), value, table);
1266 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
1267 Remove CHARACTER's ATTRIBUTE.
1269 (character, attribute))
1273 CHECK_CHAR (character);
1274 ccs = Ffind_charset (attribute);
1277 return remove_char_ccs (character, ccs);
1281 Lisp_Object table = Fgethash (attribute,
1282 Vchar_attribute_hash_table,
1284 if (!UNBOUNDP (table))
1286 put_char_id_table (XCHAR (character), Qunbound, table);
1293 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
1295 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
1297 /* ad-hoc method for `ascii' */
1298 if ((CHARSET_CHARS (cs) == 94) &&
1299 (CHARSET_BYTE_OFFSET (cs) != 33))
1300 return 128 - CHARSET_BYTE_OFFSET (cs);
1302 return CHARSET_CHARS (cs);
1305 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
1307 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
1309 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
1313 if (XVECTOR_LENGTH (v) > ccs_len)
1316 for (i = 0; i < XVECTOR_LENGTH (v); i++)
1318 Lisp_Object c = XVECTOR_DATA(v)[i];
1320 if (!NILP (c) && !CHARP (c))
1324 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
1336 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1339 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1349 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1350 nv = XVECTOR_DATA(v)[i];
1356 XVECTOR_DATA(v)[i] = Qnil;
1360 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1361 int code_point, Lisp_Object character);
1363 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1364 int code_point, Lisp_Object character)
1368 int ccs_len = XVECTOR_LENGTH (v);
1373 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1374 nv = XVECTOR_DATA(v)[i];
1378 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
1384 XVECTOR_DATA(v)[i] = character;
1388 put_char_ccs_code_point (Lisp_Object character,
1389 Lisp_Object ccs, Lisp_Object value)
1391 Lisp_Object encoding_table;
1393 if (!EQ (XCHARSET_NAME (ccs), Qucs)
1394 || (XCHAR (character) != XINT (value)))
1396 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
1397 int dim = XCHARSET_DIMENSION (ccs);
1398 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
1399 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
1403 { /* obsolete representation: value must be a list of bytes */
1404 Lisp_Object ret = Fcar (value);
1408 signal_simple_error ("Invalid value for coded-charset", value);
1409 code_point = XINT (ret);
1410 if (XCHARSET_GRAPHIC (ccs) == 1)
1412 rest = Fcdr (value);
1413 while (!NILP (rest))
1418 signal_simple_error ("Invalid value for coded-charset",
1422 signal_simple_error ("Invalid value for coded-charset",
1425 if (XCHARSET_GRAPHIC (ccs) == 1)
1427 code_point = (code_point << 8) | j;
1430 value = make_int (code_point);
1432 else if (INTP (value))
1434 code_point = XINT (value);
1435 if (XCHARSET_GRAPHIC (ccs) == 1)
1437 code_point &= 0x7F7F7F7F;
1438 value = make_int (code_point);
1442 signal_simple_error ("Invalid value for coded-charset", value);
1446 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1449 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
1454 XCHARSET_DECODING_TABLE (ccs)
1455 = v = make_older_vector (ccs_len, Qnil);
1458 decoding_table_put_char (v, dim, byte_offset, code_point, character);
1460 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
1462 XCHARSET_ENCODING_TABLE (ccs)
1463 = encoding_table = make_char_id_table (Qnil);
1465 put_char_id_table (XCHAR (character), value, encoding_table);
1470 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
1472 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1473 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1475 if (VECTORP (decoding_table))
1477 Lisp_Object cpos = Fget_char_attribute (character, ccs, Qnil);
1481 decoding_table_remove_char (decoding_table,
1482 XCHARSET_DIMENSION (ccs),
1483 XCHARSET_BYTE_OFFSET (ccs),
1487 if (CHAR_ID_TABLE_P (encoding_table))
1489 put_char_id_table (XCHAR (character), Qnil, encoding_table);
1494 EXFUN (Fmake_char, 3);
1495 EXFUN (Fdecode_char, 2);
1497 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
1498 Store character's ATTRIBUTES.
1502 Lisp_Object rest = attributes;
1503 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
1504 Lisp_Object character;
1508 while (CONSP (rest))
1510 Lisp_Object cell = Fcar (rest);
1514 signal_simple_error ("Invalid argument", attributes);
1515 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
1516 && ((XCHARSET_FINAL (ccs) != 0) ||
1517 (XCHARSET_UCS_MAX (ccs) > 0)) )
1521 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1523 character = Fdecode_char (ccs, cell);
1524 if (!NILP (character))
1525 goto setup_attributes;
1529 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
1532 signal_simple_error ("Invalid argument", attributes);
1534 character = make_char (XINT (code) + 0x100000);
1535 goto setup_attributes;
1539 else if (!INTP (code))
1540 signal_simple_error ("Invalid argument", attributes);
1542 character = make_char (XINT (code));
1546 while (CONSP (rest))
1548 Lisp_Object cell = Fcar (rest);
1551 signal_simple_error ("Invalid argument", attributes);
1553 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
1559 Lisp_Object Vutf_2000_version;
1563 int leading_code_private_11;
1566 Lisp_Object Qcharsetp;
1568 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1569 Lisp_Object Qregistry, Qfinal, Qgraphic;
1570 Lisp_Object Qdirection;
1571 Lisp_Object Qreverse_direction_charset;
1572 Lisp_Object Qleading_byte;
1573 Lisp_Object Qshort_name, Qlong_name;
1587 Qcyrillic_iso8859_5,
1589 Qjapanese_jisx0208_1978,
1593 Qjapanese_jisx0208_1990,
1596 Qchinese_cns11643_1,
1597 Qchinese_cns11643_2,
1604 Qlatin_viscii_lower,
1605 Qlatin_viscii_upper,
1606 Qvietnamese_viscii_lower,
1607 Qvietnamese_viscii_upper,
1619 Qideograph_gt_pj_10,
1620 Qideograph_gt_pj_11,
1650 Lisp_Object Ql2r, Qr2l;
1652 Lisp_Object Vcharset_hash_table;
1654 /* Composite characters are characters constructed by overstriking two
1655 or more regular characters.
1657 1) The old Mule implementation involves storing composite characters
1658 in a buffer as a tag followed by all of the actual characters
1659 used to make up the composite character. I think this is a bad
1660 idea; it greatly complicates code that wants to handle strings
1661 one character at a time because it has to deal with the possibility
1662 of great big ungainly characters. It's much more reasonable to
1663 simply store an index into a table of composite characters.
1665 2) The current implementation only allows for 16,384 separate
1666 composite characters over the lifetime of the XEmacs process.
1667 This could become a potential problem if the user
1668 edited lots of different files that use composite characters.
1669 Due to FSF bogosity, increasing the number of allowable
1670 composite characters under Mule would decrease the number
1671 of possible faces that can exist. Mule already has shrunk
1672 this to 2048, and further shrinkage would become uncomfortable.
1673 No such problems exist in XEmacs.
1675 Composite characters could be represented as 0x80 C1 C2 C3,
1676 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1677 for slightly under 2^20 (one million) composite characters
1678 over the XEmacs process lifetime, and you only need to
1679 increase the size of a Mule character from 19 to 21 bits.
1680 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1681 85 million (slightly over 2^26) composite characters. */
1684 /************************************************************************/
1685 /* Basic Emchar functions */
1686 /************************************************************************/
1688 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1689 string in STR. Returns the number of bytes stored.
1690 Do not call this directly. Use the macro set_charptr_emchar() instead.
1694 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1700 Lisp_Object charset;
1709 else if ( c <= 0x7ff )
1711 *p++ = (c >> 6) | 0xc0;
1712 *p++ = (c & 0x3f) | 0x80;
1714 else if ( c <= 0xffff )
1716 *p++ = (c >> 12) | 0xe0;
1717 *p++ = ((c >> 6) & 0x3f) | 0x80;
1718 *p++ = (c & 0x3f) | 0x80;
1720 else if ( c <= 0x1fffff )
1722 *p++ = (c >> 18) | 0xf0;
1723 *p++ = ((c >> 12) & 0x3f) | 0x80;
1724 *p++ = ((c >> 6) & 0x3f) | 0x80;
1725 *p++ = (c & 0x3f) | 0x80;
1727 else if ( c <= 0x3ffffff )
1729 *p++ = (c >> 24) | 0xf8;
1730 *p++ = ((c >> 18) & 0x3f) | 0x80;
1731 *p++ = ((c >> 12) & 0x3f) | 0x80;
1732 *p++ = ((c >> 6) & 0x3f) | 0x80;
1733 *p++ = (c & 0x3f) | 0x80;
1737 *p++ = (c >> 30) | 0xfc;
1738 *p++ = ((c >> 24) & 0x3f) | 0x80;
1739 *p++ = ((c >> 18) & 0x3f) | 0x80;
1740 *p++ = ((c >> 12) & 0x3f) | 0x80;
1741 *p++ = ((c >> 6) & 0x3f) | 0x80;
1742 *p++ = (c & 0x3f) | 0x80;
1745 BREAKUP_CHAR (c, charset, c1, c2);
1746 lb = CHAR_LEADING_BYTE (c);
1747 if (LEADING_BYTE_PRIVATE_P (lb))
1748 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1750 if (EQ (charset, Vcharset_control_1))
1759 /* Return the first character from a Mule-encoded string in STR,
1760 assuming it's non-ASCII. Do not call this directly.
1761 Use the macro charptr_emchar() instead. */
1764 non_ascii_charptr_emchar (const Bufbyte *str)
1777 else if ( b >= 0xf8 )
1782 else if ( b >= 0xf0 )
1787 else if ( b >= 0xe0 )
1792 else if ( b >= 0xc0 )
1802 for( ; len > 0; len-- )
1805 ch = ( ch << 6 ) | ( b & 0x3f );
1809 Bufbyte i0 = *str, i1, i2 = 0;
1810 Lisp_Object charset;
1812 if (i0 == LEADING_BYTE_CONTROL_1)
1813 return (Emchar) (*++str - 0x20);
1815 if (LEADING_BYTE_PREFIX_P (i0))
1820 charset = CHARSET_BY_LEADING_BYTE (i0);
1821 if (XCHARSET_DIMENSION (charset) == 2)
1824 return MAKE_CHAR (charset, i1, i2);
1828 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1829 Do not call this directly. Use the macro valid_char_p() instead. */
1833 non_ascii_valid_char_p (Emchar ch)
1837 /* Must have only lowest 19 bits set */
1841 f1 = CHAR_FIELD1 (ch);
1842 f2 = CHAR_FIELD2 (ch);
1843 f3 = CHAR_FIELD3 (ch);
1847 Lisp_Object charset;
1849 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1850 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1851 f2 > MAX_CHAR_FIELD2_PRIVATE)
1856 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1857 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1861 NOTE: This takes advantage of the fact that
1862 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1863 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1865 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1866 if (EQ (charset, Qnil))
1868 return (XCHARSET_CHARS (charset) == 96);
1872 Lisp_Object charset;
1874 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1875 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1876 f1 > MAX_CHAR_FIELD1_PRIVATE)
1878 if (f2 < 0x20 || f3 < 0x20)
1881 #ifdef ENABLE_COMPOSITE_CHARS
1882 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1884 if (UNBOUNDP (Fgethash (make_int (ch),
1885 Vcomposite_char_char2string_hash_table,
1890 #endif /* ENABLE_COMPOSITE_CHARS */
1892 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1893 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1896 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1898 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1901 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1903 if (EQ (charset, Qnil))
1905 return (XCHARSET_CHARS (charset) == 96);
1911 /************************************************************************/
1912 /* Basic string functions */
1913 /************************************************************************/
1915 /* Copy the character pointed to by SRC into DST. Do not call this
1916 directly. Use the macro charptr_copy_char() instead.
1917 Return the number of bytes copied. */
1920 non_ascii_charptr_copy_char (const Bufbyte *src, Bufbyte *dst)
1922 unsigned int bytes = REP_BYTES_BY_FIRST_BYTE (*src);
1924 for (i = bytes; i; i--, dst++, src++)
1930 /************************************************************************/
1931 /* streams of Emchars */
1932 /************************************************************************/
1934 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1935 The functions below are not meant to be called directly; use
1936 the macros in insdel.h. */
1939 Lstream_get_emchar_1 (Lstream *stream, int ch)
1941 Bufbyte str[MAX_EMCHAR_LEN];
1942 Bufbyte *strptr = str;
1945 str[0] = (Bufbyte) ch;
1947 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
1949 int c = Lstream_getc (stream);
1950 bufpos_checking_assert (c >= 0);
1951 *++strptr = (Bufbyte) c;
1953 return charptr_emchar (str);
1957 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1959 Bufbyte str[MAX_EMCHAR_LEN];
1960 Bytecount len = set_charptr_emchar (str, ch);
1961 return Lstream_write (stream, str, len);
1965 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1967 Bufbyte str[MAX_EMCHAR_LEN];
1968 Bytecount len = set_charptr_emchar (str, ch);
1969 Lstream_unread (stream, str, len);
1973 /************************************************************************/
1974 /* charset object */
1975 /************************************************************************/
1978 mark_charset (Lisp_Object obj)
1980 Lisp_Charset *cs = XCHARSET (obj);
1982 mark_object (cs->short_name);
1983 mark_object (cs->long_name);
1984 mark_object (cs->doc_string);
1985 mark_object (cs->registry);
1986 mark_object (cs->ccl_program);
1988 mark_object (cs->encoding_table);
1989 /* mark_object (cs->decoding_table); */
1995 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1997 Lisp_Charset *cs = XCHARSET (obj);
2001 error ("printing unreadable object #<charset %s 0x%x>",
2002 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
2005 write_c_string ("#<charset ", printcharfun);
2006 print_internal (CHARSET_NAME (cs), printcharfun, 0);
2007 write_c_string (" ", printcharfun);
2008 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
2009 write_c_string (" ", printcharfun);
2010 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
2011 write_c_string (" ", printcharfun);
2012 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
2013 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
2015 CHARSET_DIMENSION (cs),
2016 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
2017 CHARSET_COLUMNS (cs),
2018 CHARSET_GRAPHIC (cs),
2019 CHARSET_FINAL (cs));
2020 write_c_string (buf, printcharfun);
2021 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
2022 sprintf (buf, " 0x%x>", cs->header.uid);
2023 write_c_string (buf, printcharfun);
2026 static const struct lrecord_description charset_description[] = {
2027 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
2028 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
2029 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
2030 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
2031 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
2032 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
2033 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
2035 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
2036 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
2041 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
2042 mark_charset, print_charset, 0, 0, 0,
2043 charset_description,
2046 /* Make a new charset. */
2047 /* #### SJT Should generic properties be allowed? */
2049 make_charset (Charset_ID id, Lisp_Object name,
2050 unsigned short chars, unsigned char dimension,
2051 unsigned char columns, unsigned char graphic,
2052 Bufbyte final, unsigned char direction, Lisp_Object short_name,
2053 Lisp_Object long_name, Lisp_Object doc,
2055 Lisp_Object decoding_table,
2056 Emchar ucs_min, Emchar ucs_max,
2057 Emchar code_offset, unsigned char byte_offset)
2060 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
2064 XSETCHARSET (obj, cs);
2066 CHARSET_ID (cs) = id;
2067 CHARSET_NAME (cs) = name;
2068 CHARSET_SHORT_NAME (cs) = short_name;
2069 CHARSET_LONG_NAME (cs) = long_name;
2070 CHARSET_CHARS (cs) = chars;
2071 CHARSET_DIMENSION (cs) = dimension;
2072 CHARSET_DIRECTION (cs) = direction;
2073 CHARSET_COLUMNS (cs) = columns;
2074 CHARSET_GRAPHIC (cs) = graphic;
2075 CHARSET_FINAL (cs) = final;
2076 CHARSET_DOC_STRING (cs) = doc;
2077 CHARSET_REGISTRY (cs) = reg;
2078 CHARSET_CCL_PROGRAM (cs) = Qnil;
2079 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
2081 CHARSET_DECODING_TABLE(cs) = Qnil;
2082 CHARSET_ENCODING_TABLE(cs) = Qnil;
2083 CHARSET_UCS_MIN(cs) = ucs_min;
2084 CHARSET_UCS_MAX(cs) = ucs_max;
2085 CHARSET_CODE_OFFSET(cs) = code_offset;
2086 CHARSET_BYTE_OFFSET(cs) = byte_offset;
2090 if (id == LEADING_BYTE_ASCII)
2091 CHARSET_REP_BYTES (cs) = 1;
2093 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
2095 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
2100 /* some charsets do not have final characters. This includes
2101 ASCII, Control-1, Composite, and the two faux private
2103 unsigned char iso2022_type
2104 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
2106 if (code_offset == 0)
2108 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
2109 chlook->charset_by_attributes[iso2022_type][final] = obj;
2113 (chlook->charset_by_attributes[iso2022_type][final][direction]));
2114 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
2118 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
2119 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
2121 /* Some charsets are "faux" and don't have names or really exist at
2122 all except in the leading-byte table. */
2124 Fputhash (name, obj, Vcharset_hash_table);
2129 get_unallocated_leading_byte (int dimension)
2134 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
2137 lb = chlook->next_allocated_leading_byte++;
2141 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
2144 lb = chlook->next_allocated_1_byte_leading_byte++;
2148 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
2151 lb = chlook->next_allocated_2_byte_leading_byte++;
2157 ("No more character sets free for this dimension",
2158 make_int (dimension));
2164 /* Number of Big5 characters which have the same code in 1st byte. */
2166 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2169 decode_builtin_char (Lisp_Object charset, int code_point)
2173 if (EQ (charset, Vcharset_chinese_big5))
2175 int c1 = code_point >> 8;
2176 int c2 = code_point & 0xFF;
2179 if ( ( (0xA1 <= c1) && (c1 <= 0xFE) )
2181 ( ((0x40 <= c2) && (c2 <= 0x7E)) ||
2182 ((0xA1 <= c2) && (c2 <= 0xFE)) ) )
2184 I = (c1 - 0xA1) * BIG5_SAME_ROW
2185 + c2 - (c2 < 0x7F ? 0x40 : 0x62);
2189 charset = Vcharset_chinese_big5_1;
2193 charset = Vcharset_chinese_big5_2;
2194 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);
2196 code_point = ((I / 94 + 33) << 8) | (I % 94 + 33);
2199 if ((final = XCHARSET_FINAL (charset)) >= '0')
2201 if (XCHARSET_DIMENSION (charset) == 1)
2203 switch (XCHARSET_CHARS (charset))
2207 + (final - '0') * 94 + ((code_point & 0x7F) - 33);
2210 + (final - '0') * 96 + ((code_point & 0x7F) - 32);
2218 switch (XCHARSET_CHARS (charset))
2221 return MIN_CHAR_94x94
2222 + (final - '0') * 94 * 94
2223 + (((code_point >> 8) & 0x7F) - 33) * 94
2224 + ((code_point & 0x7F) - 33);
2226 return MIN_CHAR_96x96
2227 + (final - '0') * 96 * 96
2228 + (((code_point >> 8) & 0x7F) - 32) * 96
2229 + ((code_point & 0x7F) - 32);
2236 else if (XCHARSET_UCS_MAX (charset))
2239 = (XCHARSET_DIMENSION (charset) == 1
2241 code_point - XCHARSET_BYTE_OFFSET (charset)
2243 ((code_point >> 8) - XCHARSET_BYTE_OFFSET (charset))
2244 * XCHARSET_CHARS (charset)
2245 + (code_point & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
2246 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
2247 if ((cid < XCHARSET_UCS_MIN (charset))
2248 || (XCHARSET_UCS_MAX (charset) < cid))
2257 range_charset_code_point (Lisp_Object charset, Emchar ch)
2261 if ((XCHARSET_UCS_MIN (charset) <= ch)
2262 && (ch <= XCHARSET_UCS_MAX (charset)))
2264 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
2266 if (XCHARSET_CHARS (charset) == 256)
2268 else if (XCHARSET_DIMENSION (charset) == 1)
2269 return d + XCHARSET_BYTE_OFFSET (charset);
2270 else if (XCHARSET_DIMENSION (charset) == 2)
2272 ((d / XCHARSET_CHARS (charset)
2273 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2274 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2275 else if (XCHARSET_DIMENSION (charset) == 3)
2277 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2278 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2279 | ((d / XCHARSET_CHARS (charset)
2280 % XCHARSET_CHARS (charset)
2281 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2282 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2283 else /* if (XCHARSET_DIMENSION (charset) == 4) */
2285 ((d / (XCHARSET_CHARS (charset)
2286 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2287 + XCHARSET_BYTE_OFFSET (charset)) << 24)
2288 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2289 % XCHARSET_CHARS (charset)
2290 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2291 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
2292 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2293 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2295 else if (XCHARSET_CODE_OFFSET (charset) == 0)
2297 if (XCHARSET_DIMENSION (charset) == 1)
2299 if (XCHARSET_CHARS (charset) == 94)
2301 if (((d = ch - (MIN_CHAR_94
2302 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
2306 else if (XCHARSET_CHARS (charset) == 96)
2308 if (((d = ch - (MIN_CHAR_96
2309 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
2316 else if (XCHARSET_DIMENSION (charset) == 2)
2318 if (XCHARSET_CHARS (charset) == 94)
2320 if (((d = ch - (MIN_CHAR_94x94
2321 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
2324 return (((d / 94) + 33) << 8) | (d % 94 + 33);
2326 else if (XCHARSET_CHARS (charset) == 96)
2328 if (((d = ch - (MIN_CHAR_96x96
2329 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
2332 return (((d / 96) + 32) << 8) | (d % 96 + 32);
2338 if (EQ (charset, Vcharset_mojikyo_2022_1)
2339 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
2341 int m = ch - MIN_CHAR_MOJIKYO - 1;
2342 int byte1 = m / (94 * 60) + 33;
2343 int byte2 = (m % (94 * 60)) / 94;
2344 int byte3 = m % 94 + 33;
2350 return (byte1 << 16) | (byte2 << 8) | byte3;
2356 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
2358 if (c <= MAX_CHAR_BASIC_LATIN)
2360 *charset = Vcharset_ascii;
2365 *charset = Vcharset_control_1;
2370 *charset = Vcharset_latin_iso8859_1;
2374 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
2376 *charset = Vcharset_greek_iso8859_7;
2377 return c - MIN_CHAR_GREEK + 0x20;
2379 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
2381 *charset = Vcharset_cyrillic_iso8859_5;
2382 return c - MIN_CHAR_CYRILLIC + 0x20;
2385 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
2387 *charset = Vcharset_hebrew_iso8859_8;
2388 return c - MIN_CHAR_HEBREW + 0x20;
2390 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
2392 *charset = Vcharset_thai_tis620;
2393 return c - MIN_CHAR_THAI + 0x20;
2396 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
2397 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
2399 return list2 (Vcharset_katakana_jisx0201,
2400 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
2403 else if (c <= MAX_CHAR_BMP)
2405 *charset = Vcharset_ucs_bmp;
2408 else if (c < MIN_CHAR_DAIKANWA)
2410 *charset = Vcharset_ucs;
2413 else if (c <= MAX_CHAR_DAIKANWA)
2415 *charset = Vcharset_ideograph_daikanwa;
2416 return c - MIN_CHAR_DAIKANWA;
2418 else if (c <= MAX_CHAR_MOJIKYO_0)
2420 *charset = Vcharset_mojikyo;
2421 return c - MIN_CHAR_MOJIKYO_0;
2423 else if (c < MIN_CHAR_94)
2425 *charset = Vcharset_ucs;
2428 else if (c <= MAX_CHAR_94)
2430 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
2431 ((c - MIN_CHAR_94) / 94) + '0',
2432 CHARSET_LEFT_TO_RIGHT);
2433 if (!NILP (*charset))
2434 return ((c - MIN_CHAR_94) % 94) + 33;
2437 *charset = Vcharset_ucs;
2441 else if (c <= MAX_CHAR_96)
2443 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
2444 ((c - MIN_CHAR_96) / 96) + '0',
2445 CHARSET_LEFT_TO_RIGHT);
2446 if (!NILP (*charset))
2447 return ((c - MIN_CHAR_96) % 96) + 32;
2450 *charset = Vcharset_ucs;
2454 else if (c <= MAX_CHAR_94x94)
2457 = CHARSET_BY_ATTRIBUTES (94, 2,
2458 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
2459 CHARSET_LEFT_TO_RIGHT);
2460 if (!NILP (*charset))
2461 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
2462 | (((c - MIN_CHAR_94x94) % 94) + 33);
2465 *charset = Vcharset_ucs;
2469 else if (c <= MAX_CHAR_96x96)
2472 = CHARSET_BY_ATTRIBUTES (96, 2,
2473 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
2474 CHARSET_LEFT_TO_RIGHT);
2475 if (!NILP (*charset))
2476 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
2477 | (((c - MIN_CHAR_96x96) % 96) + 32);
2480 *charset = Vcharset_ucs;
2484 else if (c < MIN_CHAR_MOJIKYO)
2486 *charset = Vcharset_ucs;
2489 else if (c <= MAX_CHAR_MOJIKYO)
2491 *charset = Vcharset_mojikyo;
2492 return c - MIN_CHAR_MOJIKYO;
2496 *charset = Vcharset_ucs;
2501 Lisp_Object Vdefault_coded_charset_priority_list;
2505 /************************************************************************/
2506 /* Basic charset Lisp functions */
2507 /************************************************************************/
2509 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
2510 Return non-nil if OBJECT is a charset.
2514 return CHARSETP (object) ? Qt : Qnil;
2517 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
2518 Retrieve the charset of the given name.
2519 If CHARSET-OR-NAME is a charset object, it is simply returned.
2520 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
2521 nil is returned. Otherwise the associated charset object is returned.
2525 if (CHARSETP (charset_or_name))
2526 return charset_or_name;
2528 CHECK_SYMBOL (charset_or_name);
2529 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
2532 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
2533 Retrieve the charset of the given name.
2534 Same as `find-charset' except an error is signalled if there is no such
2535 charset instead of returning nil.
2539 Lisp_Object charset = Ffind_charset (name);
2542 signal_simple_error ("No such charset", name);
2546 /* We store the charsets in hash tables with the names as the key and the
2547 actual charset object as the value. Occasionally we need to use them
2548 in a list format. These routines provide us with that. */
2549 struct charset_list_closure
2551 Lisp_Object *charset_list;
2555 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
2556 void *charset_list_closure)
2558 /* This function can GC */
2559 struct charset_list_closure *chcl =
2560 (struct charset_list_closure*) charset_list_closure;
2561 Lisp_Object *charset_list = chcl->charset_list;
2563 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
2567 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
2568 Return a list of the names of all defined charsets.
2572 Lisp_Object charset_list = Qnil;
2573 struct gcpro gcpro1;
2574 struct charset_list_closure charset_list_closure;
2576 GCPRO1 (charset_list);
2577 charset_list_closure.charset_list = &charset_list;
2578 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
2579 &charset_list_closure);
2582 return charset_list;
2585 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
2586 Return the name of charset CHARSET.
2590 return XCHARSET_NAME (Fget_charset (charset));
2593 /* #### SJT Should generic properties be allowed? */
2594 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
2595 Define a new character set.
2596 This function is for use with Mule support.
2597 NAME is a symbol, the name by which the character set is normally referred.
2598 DOC-STRING is a string describing the character set.
2599 PROPS is a property list, describing the specific nature of the
2600 character set. Recognized properties are:
2602 'short-name Short version of the charset name (ex: Latin-1)
2603 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
2604 'registry A regular expression matching the font registry field for
2606 'dimension Number of octets used to index a character in this charset.
2607 Either 1 or 2. Defaults to 1.
2608 'columns Number of columns used to display a character in this charset.
2609 Only used in TTY mode. (Under X, the actual width of a
2610 character can be derived from the font used to display the
2611 characters.) If unspecified, defaults to the dimension
2612 (this is almost always the correct value).
2613 'chars Number of characters in each dimension (94 or 96).
2614 Defaults to 94. Note that if the dimension is 2, the
2615 character set thus described is 94x94 or 96x96.
2616 'final Final byte of ISO 2022 escape sequence. Must be
2617 supplied. Each combination of (DIMENSION, CHARS) defines a
2618 separate namespace for final bytes. Note that ISO
2619 2022 restricts the final byte to the range
2620 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2621 dimension == 2. Note also that final bytes in the range
2622 0x30 - 0x3F are reserved for user-defined (not official)
2624 'graphic 0 (use left half of font on output) or 1 (use right half
2625 of font on output). Defaults to 0. For example, for
2626 a font whose registry is ISO8859-1, the left half
2627 (octets 0x20 - 0x7F) is the `ascii' character set, while
2628 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2629 character set. With 'graphic set to 0, the octets
2630 will have their high bit cleared; with it set to 1,
2631 the octets will have their high bit set.
2632 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2634 'ccl-program A compiled CCL program used to convert a character in
2635 this charset into an index into the font. This is in
2636 addition to the 'graphic property. The CCL program
2637 is passed the octets of the character, with the high
2638 bit cleared and set depending upon whether the value
2639 of the 'graphic property is 0 or 1.
2641 (name, doc_string, props))
2643 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2644 int direction = CHARSET_LEFT_TO_RIGHT;
2645 Lisp_Object registry = Qnil;
2646 Lisp_Object charset;
2647 Lisp_Object ccl_program = Qnil;
2648 Lisp_Object short_name = Qnil, long_name = Qnil;
2649 int byte_offset = -1;
2651 CHECK_SYMBOL (name);
2652 if (!NILP (doc_string))
2653 CHECK_STRING (doc_string);
2655 charset = Ffind_charset (name);
2656 if (!NILP (charset))
2657 signal_simple_error ("Cannot redefine existing charset", name);
2660 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
2662 if (EQ (keyword, Qshort_name))
2664 CHECK_STRING (value);
2668 if (EQ (keyword, Qlong_name))
2670 CHECK_STRING (value);
2674 else if (EQ (keyword, Qdimension))
2677 dimension = XINT (value);
2678 if (dimension < 1 || dimension > 2)
2679 signal_simple_error ("Invalid value for 'dimension", value);
2682 else if (EQ (keyword, Qchars))
2685 chars = XINT (value);
2686 if (chars != 94 && chars != 96)
2687 signal_simple_error ("Invalid value for 'chars", value);
2690 else if (EQ (keyword, Qcolumns))
2693 columns = XINT (value);
2694 if (columns != 1 && columns != 2)
2695 signal_simple_error ("Invalid value for 'columns", value);
2698 else if (EQ (keyword, Qgraphic))
2701 graphic = XINT (value);
2703 if (graphic < 0 || graphic > 2)
2705 if (graphic < 0 || graphic > 1)
2707 signal_simple_error ("Invalid value for 'graphic", value);
2710 else if (EQ (keyword, Qregistry))
2712 CHECK_STRING (value);
2716 else if (EQ (keyword, Qdirection))
2718 if (EQ (value, Ql2r))
2719 direction = CHARSET_LEFT_TO_RIGHT;
2720 else if (EQ (value, Qr2l))
2721 direction = CHARSET_RIGHT_TO_LEFT;
2723 signal_simple_error ("Invalid value for 'direction", value);
2726 else if (EQ (keyword, Qfinal))
2728 CHECK_CHAR_COERCE_INT (value);
2729 final = XCHAR (value);
2730 if (final < '0' || final > '~')
2731 signal_simple_error ("Invalid value for 'final", value);
2734 else if (EQ (keyword, Qccl_program))
2736 struct ccl_program test_ccl;
2738 if (setup_ccl_program (&test_ccl, value) < 0)
2739 signal_simple_error ("Invalid value for 'ccl-program", value);
2740 ccl_program = value;
2744 signal_simple_error ("Unrecognized property", keyword);
2749 error ("'final must be specified");
2750 if (dimension == 2 && final > 0x5F)
2752 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2755 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2756 CHARSET_LEFT_TO_RIGHT)) ||
2757 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2758 CHARSET_RIGHT_TO_LEFT)))
2760 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2762 id = get_unallocated_leading_byte (dimension);
2764 if (NILP (doc_string))
2765 doc_string = build_string ("");
2767 if (NILP (registry))
2768 registry = build_string ("");
2770 if (NILP (short_name))
2771 XSETSTRING (short_name, XSYMBOL (name)->name);
2773 if (NILP (long_name))
2774 long_name = doc_string;
2777 columns = dimension;
2779 if (byte_offset < 0)
2783 else if (chars == 96)
2789 charset = make_charset (id, name, chars, dimension, columns, graphic,
2790 final, direction, short_name, long_name,
2791 doc_string, registry,
2792 Qnil, 0, 0, 0, byte_offset);
2793 if (!NILP (ccl_program))
2794 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2798 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2800 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2801 NEW-NAME is the name of the new charset. Return the new charset.
2803 (charset, new_name))
2805 Lisp_Object new_charset = Qnil;
2806 int id, chars, dimension, columns, graphic, final;
2808 Lisp_Object registry, doc_string, short_name, long_name;
2811 charset = Fget_charset (charset);
2812 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2813 signal_simple_error ("Charset already has reverse-direction charset",
2816 CHECK_SYMBOL (new_name);
2817 if (!NILP (Ffind_charset (new_name)))
2818 signal_simple_error ("Cannot redefine existing charset", new_name);
2820 cs = XCHARSET (charset);
2822 chars = CHARSET_CHARS (cs);
2823 dimension = CHARSET_DIMENSION (cs);
2824 columns = CHARSET_COLUMNS (cs);
2825 id = get_unallocated_leading_byte (dimension);
2827 graphic = CHARSET_GRAPHIC (cs);
2828 final = CHARSET_FINAL (cs);
2829 direction = CHARSET_RIGHT_TO_LEFT;
2830 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2831 direction = CHARSET_LEFT_TO_RIGHT;
2832 doc_string = CHARSET_DOC_STRING (cs);
2833 short_name = CHARSET_SHORT_NAME (cs);
2834 long_name = CHARSET_LONG_NAME (cs);
2835 registry = CHARSET_REGISTRY (cs);
2837 new_charset = make_charset (id, new_name, chars, dimension, columns,
2838 graphic, final, direction, short_name, long_name,
2839 doc_string, registry,
2841 CHARSET_DECODING_TABLE(cs),
2842 CHARSET_UCS_MIN(cs),
2843 CHARSET_UCS_MAX(cs),
2844 CHARSET_CODE_OFFSET(cs),
2845 CHARSET_BYTE_OFFSET(cs)
2851 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2852 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2857 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2858 Define symbol ALIAS as an alias for CHARSET.
2862 CHECK_SYMBOL (alias);
2863 charset = Fget_charset (charset);
2864 return Fputhash (alias, charset, Vcharset_hash_table);
2867 /* #### Reverse direction charsets not yet implemented. */
2869 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2871 Return the reverse-direction charset parallel to CHARSET, if any.
2872 This is the charset with the same properties (in particular, the same
2873 dimension, number of characters per dimension, and final byte) as
2874 CHARSET but whose characters are displayed in the opposite direction.
2878 charset = Fget_charset (charset);
2879 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2883 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2884 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2885 If DIRECTION is omitted, both directions will be checked (left-to-right
2886 will be returned if character sets exist for both directions).
2888 (dimension, chars, final, direction))
2890 int dm, ch, fi, di = -1;
2891 Lisp_Object obj = Qnil;
2893 CHECK_INT (dimension);
2894 dm = XINT (dimension);
2895 if (dm < 1 || dm > 2)
2896 signal_simple_error ("Invalid value for DIMENSION", dimension);
2900 if (ch != 94 && ch != 96)
2901 signal_simple_error ("Invalid value for CHARS", chars);
2903 CHECK_CHAR_COERCE_INT (final);
2905 if (fi < '0' || fi > '~')
2906 signal_simple_error ("Invalid value for FINAL", final);
2908 if (EQ (direction, Ql2r))
2909 di = CHARSET_LEFT_TO_RIGHT;
2910 else if (EQ (direction, Qr2l))
2911 di = CHARSET_RIGHT_TO_LEFT;
2912 else if (!NILP (direction))
2913 signal_simple_error ("Invalid value for DIRECTION", direction);
2915 if (dm == 2 && fi > 0x5F)
2917 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2921 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
2923 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
2926 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
2929 return XCHARSET_NAME (obj);
2933 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2934 Return short name of CHARSET.
2938 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2941 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2942 Return long name of CHARSET.
2946 return XCHARSET_LONG_NAME (Fget_charset (charset));
2949 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2950 Return description of CHARSET.
2954 return XCHARSET_DOC_STRING (Fget_charset (charset));
2957 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2958 Return dimension of CHARSET.
2962 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2965 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2966 Return property PROP of CHARSET, a charset object or symbol naming a charset.
2967 Recognized properties are those listed in `make-charset', as well as
2968 'name and 'doc-string.
2974 charset = Fget_charset (charset);
2975 cs = XCHARSET (charset);
2977 CHECK_SYMBOL (prop);
2978 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2979 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2980 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2981 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2982 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2983 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2984 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2985 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2986 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2987 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2988 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2989 if (EQ (prop, Qdirection))
2990 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2991 if (EQ (prop, Qreverse_direction_charset))
2993 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2994 /* #### Is this translation OK? If so, error checking sufficient? */
2995 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
2997 signal_simple_error ("Unrecognized charset property name", prop);
2998 return Qnil; /* not reached */
3001 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
3002 Return charset identification number of CHARSET.
3006 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
3009 /* #### We need to figure out which properties we really want to
3012 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
3013 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
3015 (charset, ccl_program))
3017 struct ccl_program test_ccl;
3019 charset = Fget_charset (charset);
3020 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
3021 signal_simple_error ("Invalid ccl-program", ccl_program);
3022 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
3027 invalidate_charset_font_caches (Lisp_Object charset)
3029 /* Invalidate font cache entries for charset on all devices. */
3030 Lisp_Object devcons, concons, hash_table;
3031 DEVICE_LOOP_NO_BREAK (devcons, concons)
3033 struct device *d = XDEVICE (XCAR (devcons));
3034 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
3035 if (!UNBOUNDP (hash_table))
3036 Fclrhash (hash_table);
3040 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
3041 Set the 'registry property of CHARSET to REGISTRY.
3043 (charset, registry))
3045 charset = Fget_charset (charset);
3046 CHECK_STRING (registry);
3047 XCHARSET_REGISTRY (charset) = registry;
3048 invalidate_charset_font_caches (charset);
3049 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
3054 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
3055 Return mapping-table of CHARSET.
3059 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
3062 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
3063 Set mapping-table of CHARSET to TABLE.
3067 struct Lisp_Charset *cs;
3071 charset = Fget_charset (charset);
3072 cs = XCHARSET (charset);
3076 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
3077 make_vector_newer (CHARSET_DECODING_TABLE(cs));
3078 CHARSET_DECODING_TABLE(cs) = Qnil;
3081 else if (VECTORP (table))
3083 int ccs_len = CHARSET_BYTE_SIZE (cs);
3084 int ret = decoding_table_check_elements (table,
3085 CHARSET_DIMENSION (cs),
3090 signal_simple_error ("Too big table", table);
3092 signal_simple_error ("Invalid element is found", table);
3094 signal_simple_error ("Something wrong", table);
3096 CHARSET_DECODING_TABLE(cs) = Qnil;
3099 signal_error (Qwrong_type_argument,
3100 list2 (build_translated_string ("vector-or-nil-p"),
3103 byte_offset = CHARSET_BYTE_OFFSET (cs);
3104 switch (CHARSET_DIMENSION (cs))
3107 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3109 Lisp_Object c = XVECTOR_DATA(table)[i];
3112 put_char_ccs_code_point (c, charset,
3113 make_int (i + byte_offset));
3117 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3119 Lisp_Object v = XVECTOR_DATA(table)[i];
3125 for (j = 0; j < XVECTOR_LENGTH (v); j++)
3127 Lisp_Object c = XVECTOR_DATA(v)[j];
3130 put_char_ccs_code_point
3132 make_int ( ( (i + byte_offset) << 8 )
3138 put_char_ccs_code_point (v, charset,
3139 make_int (i + byte_offset));
3148 /************************************************************************/
3149 /* Lisp primitives for working with characters */
3150 /************************************************************************/
3153 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
3154 Make a character from CHARSET and code-point CODE.
3160 charset = Fget_charset (charset);
3163 if (XCHARSET_GRAPHIC (charset) == 1)
3165 c = DECODE_CHAR (charset, c);
3166 return c ? make_char (c) : Qnil;
3169 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
3170 Make a builtin character from CHARSET and code-point CODE.
3176 charset = Fget_charset (charset);
3178 if (EQ (charset, Vcharset_latin_viscii))
3180 Lisp_Object chr = Fdecode_char (charset, code);
3186 (ret = Fget_char_attribute (chr,
3187 Vcharset_latin_viscii_lower,
3190 charset = Vcharset_latin_viscii_lower;
3194 (ret = Fget_char_attribute (chr,
3195 Vcharset_latin_viscii_upper,
3198 charset = Vcharset_latin_viscii_upper;
3205 if (XCHARSET_GRAPHIC (charset) == 1)
3208 c = decode_builtin_char (charset, c);
3209 return c >= 0 ? make_char (c) : Fdecode_char (charset, code);
3213 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
3214 Make a character from CHARSET and octets ARG1 and ARG2.
3215 ARG2 is required only for characters from two-dimensional charsets.
3216 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
3217 character s with caron.
3219 (charset, arg1, arg2))
3223 int lowlim, highlim;
3225 charset = Fget_charset (charset);
3226 cs = XCHARSET (charset);
3228 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
3229 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
3231 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
3233 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
3234 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
3237 /* It is useful (and safe, according to Olivier Galibert) to strip
3238 the 8th bit off ARG1 and ARG2 because it allows programmers to
3239 write (make-char 'latin-iso8859-2 CODE) where code is the actual
3240 Latin 2 code of the character. */
3248 if (a1 < lowlim || a1 > highlim)
3249 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
3251 if (CHARSET_DIMENSION (cs) == 1)
3255 ("Charset is of dimension one; second octet must be nil", arg2);
3256 return make_char (MAKE_CHAR (charset, a1, 0));
3265 a2 = XINT (arg2) & 0x7f;
3267 if (a2 < lowlim || a2 > highlim)
3268 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
3270 return make_char (MAKE_CHAR (charset, a1, a2));
3273 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
3274 Return the character set of CHARACTER.
3278 CHECK_CHAR_COERCE_INT (character);
3280 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (character)));
3283 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
3284 Return the octet numbered N (should be 0 or 1) of CHARACTER.
3285 N defaults to 0 if omitted.
3289 Lisp_Object charset;
3292 CHECK_CHAR_COERCE_INT (character);
3294 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
3296 if (NILP (n) || EQ (n, Qzero))
3297 return make_int (octet0);
3298 else if (EQ (n, make_int (1)))
3299 return make_int (octet1);
3301 signal_simple_error ("Octet number must be 0 or 1", n);
3304 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
3305 Return list of charset and one or two position-codes of CHARACTER.
3309 /* This function can GC */
3310 struct gcpro gcpro1, gcpro2;
3311 Lisp_Object charset = Qnil;
3312 Lisp_Object rc = Qnil;
3320 GCPRO2 (charset, rc);
3321 CHECK_CHAR_COERCE_INT (character);
3324 code_point = ENCODE_CHAR (XCHAR (character), charset);
3325 dimension = XCHARSET_DIMENSION (charset);
3326 while (dimension > 0)
3328 rc = Fcons (make_int (code_point & 255), rc);
3332 rc = Fcons (XCHARSET_NAME (charset), rc);
3334 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
3336 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
3338 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
3342 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
3351 #ifdef ENABLE_COMPOSITE_CHARS
3352 /************************************************************************/
3353 /* composite character functions */
3354 /************************************************************************/
3357 lookup_composite_char (Bufbyte *str, int len)
3359 Lisp_Object lispstr = make_string (str, len);
3360 Lisp_Object ch = Fgethash (lispstr,
3361 Vcomposite_char_string2char_hash_table,
3367 if (composite_char_row_next >= 128)
3368 signal_simple_error ("No more composite chars available", lispstr);
3369 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
3370 composite_char_col_next);
3371 Fputhash (make_char (emch), lispstr,
3372 Vcomposite_char_char2string_hash_table);
3373 Fputhash (lispstr, make_char (emch),
3374 Vcomposite_char_string2char_hash_table);
3375 composite_char_col_next++;
3376 if (composite_char_col_next >= 128)
3378 composite_char_col_next = 32;
3379 composite_char_row_next++;
3388 composite_char_string (Emchar ch)
3390 Lisp_Object str = Fgethash (make_char (ch),
3391 Vcomposite_char_char2string_hash_table,
3393 assert (!UNBOUNDP (str));
3397 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
3398 Convert a string into a single composite character.
3399 The character is the result of overstriking all the characters in
3404 CHECK_STRING (string);
3405 return make_char (lookup_composite_char (XSTRING_DATA (string),
3406 XSTRING_LENGTH (string)));
3409 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3410 Return a string of the characters comprising a composite character.
3418 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
3419 signal_simple_error ("Must be composite char", ch);
3420 return composite_char_string (emch);
3422 #endif /* ENABLE_COMPOSITE_CHARS */
3425 /************************************************************************/
3426 /* initialization */
3427 /************************************************************************/
3430 syms_of_mule_charset (void)
3433 INIT_LRECORD_IMPLEMENTATION (uint8_byte_table);
3434 INIT_LRECORD_IMPLEMENTATION (uint16_byte_table);
3435 INIT_LRECORD_IMPLEMENTATION (byte_table);
3436 INIT_LRECORD_IMPLEMENTATION (char_id_table);
3438 INIT_LRECORD_IMPLEMENTATION (charset);
3440 DEFSUBR (Fcharsetp);
3441 DEFSUBR (Ffind_charset);
3442 DEFSUBR (Fget_charset);
3443 DEFSUBR (Fcharset_list);
3444 DEFSUBR (Fcharset_name);
3445 DEFSUBR (Fmake_charset);
3446 DEFSUBR (Fmake_reverse_direction_charset);
3447 /* DEFSUBR (Freverse_direction_charset); */
3448 DEFSUBR (Fdefine_charset_alias);
3449 DEFSUBR (Fcharset_from_attributes);
3450 DEFSUBR (Fcharset_short_name);
3451 DEFSUBR (Fcharset_long_name);
3452 DEFSUBR (Fcharset_description);
3453 DEFSUBR (Fcharset_dimension);
3454 DEFSUBR (Fcharset_property);
3455 DEFSUBR (Fcharset_id);
3456 DEFSUBR (Fset_charset_ccl_program);
3457 DEFSUBR (Fset_charset_registry);
3459 DEFSUBR (Fchar_attribute_list);
3460 DEFSUBR (Ffind_char_attribute_table);
3461 DEFSUBR (Fchar_attribute_alist);
3462 DEFSUBR (Fget_char_attribute);
3463 DEFSUBR (Fput_char_attribute);
3464 DEFSUBR (Fremove_char_attribute);
3465 DEFSUBR (Fdefine_char);
3466 DEFSUBR (Fchar_variants);
3467 DEFSUBR (Fget_composite_char);
3468 DEFSUBR (Fcharset_mapping_table);
3469 DEFSUBR (Fset_charset_mapping_table);
3473 DEFSUBR (Fdecode_char);
3474 DEFSUBR (Fdecode_builtin_char);
3476 DEFSUBR (Fmake_char);
3477 DEFSUBR (Fchar_charset);
3478 DEFSUBR (Fchar_octet);
3479 DEFSUBR (Fsplit_char);
3481 #ifdef ENABLE_COMPOSITE_CHARS
3482 DEFSUBR (Fmake_composite_char);
3483 DEFSUBR (Fcomposite_char_string);
3486 defsymbol (&Qcharsetp, "charsetp");
3487 defsymbol (&Qregistry, "registry");
3488 defsymbol (&Qfinal, "final");
3489 defsymbol (&Qgraphic, "graphic");
3490 defsymbol (&Qdirection, "direction");
3491 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3492 defsymbol (&Qshort_name, "short-name");
3493 defsymbol (&Qlong_name, "long-name");
3495 defsymbol (&Ql2r, "l2r");
3496 defsymbol (&Qr2l, "r2l");
3498 /* Charsets, compatible with FSF 20.3
3499 Naming convention is Script-Charset[-Edition] */
3500 defsymbol (&Qascii, "ascii");
3501 defsymbol (&Qcontrol_1, "control-1");
3502 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3503 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3504 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3505 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3506 defsymbol (&Qthai_tis620, "thai-tis620");
3507 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3508 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3509 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3510 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3511 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3512 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3513 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3514 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
3515 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
3516 defsymbol (&Qchinese_gb12345, "chinese-gb12345");
3517 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
3518 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
3519 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
3520 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
3521 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
3522 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
3524 defsymbol (&Q_ucs, "->ucs");
3525 defsymbol (&Q_decomposition, "->decomposition");
3526 defsymbol (&Qcompat, "compat");
3527 defsymbol (&Qisolated, "isolated");
3528 defsymbol (&Qinitial, "initial");
3529 defsymbol (&Qmedial, "medial");
3530 defsymbol (&Qfinal, "final");
3531 defsymbol (&Qvertical, "vertical");
3532 defsymbol (&QnoBreak, "noBreak");
3533 defsymbol (&Qfraction, "fraction");
3534 defsymbol (&Qsuper, "super");
3535 defsymbol (&Qsub, "sub");
3536 defsymbol (&Qcircle, "circle");
3537 defsymbol (&Qsquare, "square");
3538 defsymbol (&Qwide, "wide");
3539 defsymbol (&Qnarrow, "narrow");
3540 defsymbol (&Qsmall, "small");
3541 defsymbol (&Qfont, "font");
3542 defsymbol (&Qucs, "ucs");
3543 defsymbol (&Qucs_bmp, "ucs-bmp");
3544 defsymbol (&Qucs_cns, "ucs-cns");
3545 defsymbol (&Qucs_big5, "ucs-big5");
3546 defsymbol (&Qlatin_viscii, "latin-viscii");
3547 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3548 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3549 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3550 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3551 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3552 defsymbol (&Qideograph_gt, "ideograph-gt");
3553 defsymbol (&Qideograph_gt_pj_1, "ideograph-gt-pj-1");
3554 defsymbol (&Qideograph_gt_pj_2, "ideograph-gt-pj-2");
3555 defsymbol (&Qideograph_gt_pj_3, "ideograph-gt-pj-3");
3556 defsymbol (&Qideograph_gt_pj_4, "ideograph-gt-pj-4");
3557 defsymbol (&Qideograph_gt_pj_5, "ideograph-gt-pj-5");
3558 defsymbol (&Qideograph_gt_pj_6, "ideograph-gt-pj-6");
3559 defsymbol (&Qideograph_gt_pj_7, "ideograph-gt-pj-7");
3560 defsymbol (&Qideograph_gt_pj_8, "ideograph-gt-pj-8");
3561 defsymbol (&Qideograph_gt_pj_9, "ideograph-gt-pj-9");
3562 defsymbol (&Qideograph_gt_pj_10, "ideograph-gt-pj-10");
3563 defsymbol (&Qideograph_gt_pj_11, "ideograph-gt-pj-11");
3564 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
3565 defsymbol (&Qchinese_big5, "chinese-big5");
3566 defsymbol (&Qmojikyo, "mojikyo");
3567 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
3568 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
3569 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
3570 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
3571 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
3572 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
3573 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
3574 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
3575 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
3576 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
3577 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
3578 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
3579 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
3580 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
3581 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
3582 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
3583 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
3584 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
3585 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
3586 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
3587 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
3588 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
3589 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3591 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3592 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3594 defsymbol (&Qcomposite, "composite");
3598 vars_of_mule_charset (void)
3605 chlook = xnew (struct charset_lookup);
3606 dumpstruct (&chlook, &charset_lookup_description);
3608 /* Table of charsets indexed by leading byte. */
3609 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3610 chlook->charset_by_leading_byte[i] = Qnil;
3613 /* Table of charsets indexed by type/final-byte. */
3614 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3615 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3616 chlook->charset_by_attributes[i][j] = Qnil;
3618 /* Table of charsets indexed by type/final-byte/direction. */
3619 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3620 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3621 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3622 chlook->charset_by_attributes[i][j][k] = Qnil;
3626 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3628 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3629 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3633 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3634 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3635 Leading-code of private TYPE9N charset of column-width 1.
3637 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3641 Vutf_2000_version = build_string("0.17 (Hōryūji)");
3642 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
3643 Version number of UTF-2000.
3646 staticpro (&Vcharacter_composition_table);
3647 Vcharacter_composition_table = make_char_id_table (Qnil);
3649 staticpro (&Vcharacter_variant_table);
3650 Vcharacter_variant_table = make_char_id_table (Qnil);
3652 Vdefault_coded_charset_priority_list = Qnil;
3653 DEFVAR_LISP ("default-coded-charset-priority-list",
3654 &Vdefault_coded_charset_priority_list /*
3655 Default order of preferred coded-character-sets.
3661 complex_vars_of_mule_charset (void)
3663 staticpro (&Vcharset_hash_table);
3664 Vcharset_hash_table =
3665 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3667 /* Predefined character sets. We store them into variables for
3671 staticpro (&Vchar_attribute_hash_table);
3672 Vchar_attribute_hash_table
3673 = make_lisp_hash_table (16, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3675 staticpro (&Vcharset_ucs);
3677 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
3678 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3679 build_string ("UCS"),
3680 build_string ("UCS"),
3681 build_string ("ISO/IEC 10646"),
3683 Qnil, 0, 0xFFFFFFF, 0, 0);
3684 staticpro (&Vcharset_ucs_bmp);
3686 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3687 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3688 build_string ("BMP"),
3689 build_string ("BMP"),
3690 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3691 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
3692 Qnil, 0, 0xFFFF, 0, 0);
3693 staticpro (&Vcharset_ucs_cns);
3695 make_charset (LEADING_BYTE_UCS_CNS, Qucs_cns, 256, 3,
3696 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3697 build_string ("UCS for CNS"),
3698 build_string ("UCS for CNS 11643"),
3699 build_string ("ISO/IEC 10646 for CNS 11643"),
3702 staticpro (&Vcharset_ucs_big5);
3704 make_charset (LEADING_BYTE_UCS_BIG5, Qucs_big5, 256, 3,
3705 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3706 build_string ("UCS for Big5"),
3707 build_string ("UCS for Big5"),
3708 build_string ("ISO/IEC 10646 for Big5"),
3712 # define MIN_CHAR_THAI 0
3713 # define MAX_CHAR_THAI 0
3714 # define MIN_CHAR_HEBREW 0
3715 # define MAX_CHAR_HEBREW 0
3716 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3717 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3719 staticpro (&Vcharset_ascii);
3721 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3722 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3723 build_string ("ASCII"),
3724 build_string ("ASCII)"),
3725 build_string ("ASCII (ISO646 IRV)"),
3726 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3727 Qnil, 0, 0x7F, 0, 0);
3728 staticpro (&Vcharset_control_1);
3729 Vcharset_control_1 =
3730 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3731 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3732 build_string ("C1"),
3733 build_string ("Control characters"),
3734 build_string ("Control characters 128-191"),
3736 Qnil, 0x80, 0x9F, 0, 0);
3737 staticpro (&Vcharset_latin_iso8859_1);
3738 Vcharset_latin_iso8859_1 =
3739 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3740 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3741 build_string ("Latin-1"),
3742 build_string ("ISO8859-1 (Latin-1)"),
3743 build_string ("ISO8859-1 (Latin-1)"),
3744 build_string ("iso8859-1"),
3745 Qnil, 0xA0, 0xFF, 0, 32);
3746 staticpro (&Vcharset_latin_iso8859_2);
3747 Vcharset_latin_iso8859_2 =
3748 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3749 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3750 build_string ("Latin-2"),
3751 build_string ("ISO8859-2 (Latin-2)"),
3752 build_string ("ISO8859-2 (Latin-2)"),
3753 build_string ("iso8859-2"),
3755 staticpro (&Vcharset_latin_iso8859_3);
3756 Vcharset_latin_iso8859_3 =
3757 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3758 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3759 build_string ("Latin-3"),
3760 build_string ("ISO8859-3 (Latin-3)"),
3761 build_string ("ISO8859-3 (Latin-3)"),
3762 build_string ("iso8859-3"),
3764 staticpro (&Vcharset_latin_iso8859_4);
3765 Vcharset_latin_iso8859_4 =
3766 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3767 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3768 build_string ("Latin-4"),
3769 build_string ("ISO8859-4 (Latin-4)"),
3770 build_string ("ISO8859-4 (Latin-4)"),
3771 build_string ("iso8859-4"),
3773 staticpro (&Vcharset_thai_tis620);
3774 Vcharset_thai_tis620 =
3775 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3776 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3777 build_string ("TIS620"),
3778 build_string ("TIS620 (Thai)"),
3779 build_string ("TIS620.2529 (Thai)"),
3780 build_string ("tis620"),
3781 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3782 staticpro (&Vcharset_greek_iso8859_7);
3783 Vcharset_greek_iso8859_7 =
3784 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3785 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3786 build_string ("ISO8859-7"),
3787 build_string ("ISO8859-7 (Greek)"),
3788 build_string ("ISO8859-7 (Greek)"),
3789 build_string ("iso8859-7"),
3791 0 /* MIN_CHAR_GREEK */,
3792 0 /* MAX_CHAR_GREEK */, 0, 32);
3793 staticpro (&Vcharset_arabic_iso8859_6);
3794 Vcharset_arabic_iso8859_6 =
3795 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3796 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3797 build_string ("ISO8859-6"),
3798 build_string ("ISO8859-6 (Arabic)"),
3799 build_string ("ISO8859-6 (Arabic)"),
3800 build_string ("iso8859-6"),
3802 staticpro (&Vcharset_hebrew_iso8859_8);
3803 Vcharset_hebrew_iso8859_8 =
3804 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3805 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3806 build_string ("ISO8859-8"),
3807 build_string ("ISO8859-8 (Hebrew)"),
3808 build_string ("ISO8859-8 (Hebrew)"),
3809 build_string ("iso8859-8"),
3810 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3811 staticpro (&Vcharset_katakana_jisx0201);
3812 Vcharset_katakana_jisx0201 =
3813 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3814 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3815 build_string ("JISX0201 Kana"),
3816 build_string ("JISX0201.1976 (Japanese Kana)"),
3817 build_string ("JISX0201.1976 Japanese Kana"),
3818 build_string ("jisx0201\\.1976"),
3820 staticpro (&Vcharset_latin_jisx0201);
3821 Vcharset_latin_jisx0201 =
3822 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3823 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3824 build_string ("JISX0201 Roman"),
3825 build_string ("JISX0201.1976 (Japanese Roman)"),
3826 build_string ("JISX0201.1976 Japanese Roman"),
3827 build_string ("jisx0201\\.1976"),
3829 staticpro (&Vcharset_cyrillic_iso8859_5);
3830 Vcharset_cyrillic_iso8859_5 =
3831 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3832 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3833 build_string ("ISO8859-5"),
3834 build_string ("ISO8859-5 (Cyrillic)"),
3835 build_string ("ISO8859-5 (Cyrillic)"),
3836 build_string ("iso8859-5"),
3838 0 /* MIN_CHAR_CYRILLIC */,
3839 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3840 staticpro (&Vcharset_latin_iso8859_9);
3841 Vcharset_latin_iso8859_9 =
3842 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3843 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3844 build_string ("Latin-5"),
3845 build_string ("ISO8859-9 (Latin-5)"),
3846 build_string ("ISO8859-9 (Latin-5)"),
3847 build_string ("iso8859-9"),
3849 staticpro (&Vcharset_japanese_jisx0208_1978);
3850 Vcharset_japanese_jisx0208_1978 =
3851 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3852 Qjapanese_jisx0208_1978, 94, 2,
3853 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3854 build_string ("JIS X0208:1978"),
3855 build_string ("JIS X0208:1978 (Japanese)"),
3857 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3858 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3860 staticpro (&Vcharset_chinese_gb2312);
3861 Vcharset_chinese_gb2312 =
3862 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3863 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3864 build_string ("GB2312"),
3865 build_string ("GB2312)"),
3866 build_string ("GB2312 Chinese simplified"),
3867 build_string ("gb2312"),
3869 staticpro (&Vcharset_chinese_gb12345);
3870 Vcharset_chinese_gb12345 =
3871 make_charset (LEADING_BYTE_CHINESE_GB12345, Qchinese_gb12345, 94, 2,
3872 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3873 build_string ("G1"),
3874 build_string ("GB 12345)"),
3875 build_string ("GB 12345-1990"),
3876 build_string ("GB12345\\(\\.1990\\)?-0"),
3878 staticpro (&Vcharset_japanese_jisx0208);
3879 Vcharset_japanese_jisx0208 =
3880 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3881 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3882 build_string ("JISX0208"),
3883 build_string ("JIS X0208:1983 (Japanese)"),
3884 build_string ("JIS X0208:1983 Japanese Kanji"),
3885 build_string ("jisx0208\\.1983"),
3888 staticpro (&Vcharset_japanese_jisx0208_1990);
3889 Vcharset_japanese_jisx0208_1990 =
3890 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3891 Qjapanese_jisx0208_1990, 94, 2,
3892 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3893 build_string ("JISX0208-1990"),
3894 build_string ("JIS X0208:1990 (Japanese)"),
3895 build_string ("JIS X0208:1990 Japanese Kanji"),
3896 build_string ("jisx0208\\.1990"),
3898 MIN_CHAR_JIS_X0208_1990,
3899 MAX_CHAR_JIS_X0208_1990, 0, 33);
3901 staticpro (&Vcharset_korean_ksc5601);
3902 Vcharset_korean_ksc5601 =
3903 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3904 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3905 build_string ("KSC5601"),
3906 build_string ("KSC5601 (Korean"),
3907 build_string ("KSC5601 Korean Hangul and Hanja"),
3908 build_string ("ksc5601"),
3910 staticpro (&Vcharset_japanese_jisx0212);
3911 Vcharset_japanese_jisx0212 =
3912 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3913 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3914 build_string ("JISX0212"),
3915 build_string ("JISX0212 (Japanese)"),
3916 build_string ("JISX0212 Japanese Supplement"),
3917 build_string ("jisx0212"),
3920 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3921 staticpro (&Vcharset_chinese_cns11643_1);
3922 Vcharset_chinese_cns11643_1 =
3923 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3924 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3925 build_string ("CNS11643-1"),
3926 build_string ("CNS11643-1 (Chinese traditional)"),
3928 ("CNS 11643 Plane 1 Chinese traditional"),
3929 build_string (CHINESE_CNS_PLANE_RE("1")),
3931 staticpro (&Vcharset_chinese_cns11643_2);
3932 Vcharset_chinese_cns11643_2 =
3933 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3934 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3935 build_string ("CNS11643-2"),
3936 build_string ("CNS11643-2 (Chinese traditional)"),
3938 ("CNS 11643 Plane 2 Chinese traditional"),
3939 build_string (CHINESE_CNS_PLANE_RE("2")),
3942 staticpro (&Vcharset_latin_tcvn5712);
3943 Vcharset_latin_tcvn5712 =
3944 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3945 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3946 build_string ("TCVN 5712"),
3947 build_string ("TCVN 5712 (VSCII-2)"),
3948 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3949 build_string ("tcvn5712-1"),
3951 staticpro (&Vcharset_latin_viscii_lower);
3952 Vcharset_latin_viscii_lower =
3953 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3954 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3955 build_string ("VISCII lower"),
3956 build_string ("VISCII lower (Vietnamese)"),
3957 build_string ("VISCII lower (Vietnamese)"),
3958 build_string ("MULEVISCII-LOWER"),
3960 staticpro (&Vcharset_latin_viscii_upper);
3961 Vcharset_latin_viscii_upper =
3962 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3963 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3964 build_string ("VISCII upper"),
3965 build_string ("VISCII upper (Vietnamese)"),
3966 build_string ("VISCII upper (Vietnamese)"),
3967 build_string ("MULEVISCII-UPPER"),
3969 staticpro (&Vcharset_latin_viscii);
3970 Vcharset_latin_viscii =
3971 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3972 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3973 build_string ("VISCII"),
3974 build_string ("VISCII 1.1 (Vietnamese)"),
3975 build_string ("VISCII 1.1 (Vietnamese)"),
3976 build_string ("VISCII1\\.1"),
3978 staticpro (&Vcharset_chinese_big5);
3979 Vcharset_chinese_big5 =
3980 make_charset (LEADING_BYTE_CHINESE_BIG5, Qchinese_big5, 256, 2,
3981 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3982 build_string ("Big5"),
3983 build_string ("Big5"),
3984 build_string ("Big5 Chinese traditional"),
3985 build_string ("big5"),
3987 staticpro (&Vcharset_ideograph_gt);
3988 Vcharset_ideograph_gt =
3989 make_charset (LEADING_BYTE_GT, Qideograph_gt, 256, 3,
3990 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3991 build_string ("GT"),
3992 build_string ("GT"),
3993 build_string ("GT"),
3995 Qnil, MIN_CHAR_GT, MAX_CHAR_GT, 0, 0);
3996 #define DEF_GT_PJ(n) \
3997 staticpro (&Vcharset_ideograph_gt_pj_##n); \
3998 Vcharset_ideograph_gt_pj_##n = \
3999 make_charset (LEADING_BYTE_GT_PJ_##n, Qideograph_gt_pj_##n, 94, 2, \
4000 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
4001 build_string ("GT-PJ-"#n), \
4002 build_string ("GT (pseudo JIS encoding) part "#n), \
4003 build_string ("GT 2000 (pseudo JIS encoding) part "#n), \
4005 ("\\(GT2000PJ-"#n "\\|jisx0208\\.GT2000-"#n "\\)$"), \
4019 staticpro (&Vcharset_ideograph_daikanwa);
4020 Vcharset_ideograph_daikanwa =
4021 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
4022 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4023 build_string ("Daikanwa"),
4024 build_string ("Morohashi's Daikanwa"),
4025 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
4026 build_string ("Daikanwa"),
4027 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
4028 staticpro (&Vcharset_mojikyo);
4030 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
4031 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4032 build_string ("Mojikyo"),
4033 build_string ("Mojikyo"),
4034 build_string ("Konjaku-Mojikyo"),
4036 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
4037 staticpro (&Vcharset_mojikyo_2022_1);
4038 Vcharset_mojikyo_2022_1 =
4039 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
4040 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
4041 build_string ("Mojikyo-2022-1"),
4042 build_string ("Mojikyo ISO-2022 Part 1"),
4043 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
4047 #define DEF_MOJIKYO_PJ(n) \
4048 staticpro (&Vcharset_mojikyo_pj_##n); \
4049 Vcharset_mojikyo_pj_##n = \
4050 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
4051 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
4052 build_string ("Mojikyo-PJ-"#n), \
4053 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
4055 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
4057 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
4069 DEF_MOJIKYO_PJ (10);
4070 DEF_MOJIKYO_PJ (11);
4071 DEF_MOJIKYO_PJ (12);
4072 DEF_MOJIKYO_PJ (13);
4073 DEF_MOJIKYO_PJ (14);
4074 DEF_MOJIKYO_PJ (15);
4075 DEF_MOJIKYO_PJ (16);
4076 DEF_MOJIKYO_PJ (17);
4077 DEF_MOJIKYO_PJ (18);
4078 DEF_MOJIKYO_PJ (19);
4079 DEF_MOJIKYO_PJ (20);
4080 DEF_MOJIKYO_PJ (21);
4082 staticpro (&Vcharset_ethiopic_ucs);
4083 Vcharset_ethiopic_ucs =
4084 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
4085 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4086 build_string ("Ethiopic (UCS)"),
4087 build_string ("Ethiopic (UCS)"),
4088 build_string ("Ethiopic of UCS"),
4089 build_string ("Ethiopic-Unicode"),
4090 Qnil, 0x1200, 0x137F, 0x1200, 0);
4092 staticpro (&Vcharset_chinese_big5_1);
4093 Vcharset_chinese_big5_1 =
4094 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
4095 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
4096 build_string ("Big5"),
4097 build_string ("Big5 (Level-1)"),
4099 ("Big5 Level-1 Chinese traditional"),
4100 build_string ("big5"),
4102 staticpro (&Vcharset_chinese_big5_2);
4103 Vcharset_chinese_big5_2 =
4104 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
4105 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
4106 build_string ("Big5"),
4107 build_string ("Big5 (Level-2)"),
4109 ("Big5 Level-2 Chinese traditional"),
4110 build_string ("big5"),
4113 #ifdef ENABLE_COMPOSITE_CHARS
4114 /* #### For simplicity, we put composite chars into a 96x96 charset.
4115 This is going to lead to problems because you can run out of
4116 room, esp. as we don't yet recycle numbers. */
4117 staticpro (&Vcharset_composite);
4118 Vcharset_composite =
4119 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
4120 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4121 build_string ("Composite"),
4122 build_string ("Composite characters"),
4123 build_string ("Composite characters"),
4126 /* #### not dumped properly */
4127 composite_char_row_next = 32;
4128 composite_char_col_next = 32;
4130 Vcomposite_char_string2char_hash_table =
4131 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
4132 Vcomposite_char_char2string_hash_table =
4133 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
4134 staticpro (&Vcomposite_char_string2char_hash_table);
4135 staticpro (&Vcomposite_char_char2string_hash_table);
4136 #endif /* ENABLE_COMPOSITE_CHARS */