1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_japanese_jisx0208;
60 Lisp_Object Vcharset_japanese_jisx0208_1990;
61 Lisp_Object Vcharset_korean_ksc5601;
62 Lisp_Object Vcharset_japanese_jisx0212;
63 Lisp_Object Vcharset_chinese_cns11643_1;
64 Lisp_Object Vcharset_chinese_cns11643_2;
66 Lisp_Object Vcharset_ucs;
67 Lisp_Object Vcharset_ucs_bmp;
68 Lisp_Object Vcharset_latin_viscii;
69 Lisp_Object Vcharset_latin_tcvn5712;
70 Lisp_Object Vcharset_latin_viscii_lower;
71 Lisp_Object Vcharset_latin_viscii_upper;
72 Lisp_Object Vcharset_ideograph_daikanwa;
73 Lisp_Object Vcharset_mojikyo;
74 Lisp_Object Vcharset_mojikyo_pj_1;
75 Lisp_Object Vcharset_mojikyo_pj_2;
76 Lisp_Object Vcharset_mojikyo_pj_3;
77 Lisp_Object Vcharset_mojikyo_pj_4;
78 Lisp_Object Vcharset_mojikyo_pj_5;
79 Lisp_Object Vcharset_mojikyo_pj_6;
80 Lisp_Object Vcharset_mojikyo_pj_7;
81 Lisp_Object Vcharset_mojikyo_pj_8;
82 Lisp_Object Vcharset_mojikyo_pj_9;
83 Lisp_Object Vcharset_mojikyo_pj_10;
84 Lisp_Object Vcharset_mojikyo_pj_11;
85 Lisp_Object Vcharset_mojikyo_pj_12;
86 Lisp_Object Vcharset_mojikyo_pj_13;
87 Lisp_Object Vcharset_mojikyo_pj_14;
88 Lisp_Object Vcharset_mojikyo_pj_15;
89 Lisp_Object Vcharset_mojikyo_pj_16;
90 Lisp_Object Vcharset_mojikyo_pj_17;
91 Lisp_Object Vcharset_mojikyo_pj_18;
92 Lisp_Object Vcharset_mojikyo_pj_19;
93 Lisp_Object Vcharset_mojikyo_pj_20;
94 Lisp_Object Vcharset_mojikyo_pj_21;
95 Lisp_Object Vcharset_ethiopic_ucs;
97 Lisp_Object Vcharset_chinese_big5_1;
98 Lisp_Object Vcharset_chinese_big5_2;
100 #ifdef ENABLE_COMPOSITE_CHARS
101 Lisp_Object Vcharset_composite;
103 /* Hash tables for composite chars. One maps string representing
104 composed chars to their equivalent chars; one goes the
106 Lisp_Object Vcomposite_char_char2string_hash_table;
107 Lisp_Object Vcomposite_char_string2char_hash_table;
109 static int composite_char_row_next;
110 static int composite_char_col_next;
112 #endif /* ENABLE_COMPOSITE_CHARS */
114 struct charset_lookup *chlook;
116 static const struct lrecord_description charset_lookup_description_1[] = {
117 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
126 static const struct struct_description charset_lookup_description = {
127 sizeof (struct charset_lookup),
128 charset_lookup_description_1
132 /* Table of number of bytes in the string representation of a character
133 indexed by the first byte of that representation.
135 rep_bytes_by_first_byte(c) is more efficient than the equivalent
136 canonical computation:
138 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
140 const Bytecount rep_bytes_by_first_byte[0xA0] =
141 { /* 0x00 - 0x7f are for straight ASCII */
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
147 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
148 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
149 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
150 /* 0x80 - 0x8f are for Dimension-1 official charsets */
152 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
154 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
156 /* 0x90 - 0x9d are for Dimension-2 official charsets */
157 /* 0x9e is for Dimension-1 private charsets */
158 /* 0x9f is for Dimension-2 private charsets */
159 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
165 #define BT_UINT8_MIN 0
166 #define BT_UINT8_MAX (UCHAR_MAX - 3)
167 #define BT_UINT8_t (UCHAR_MAX - 2)
168 #define BT_UINT8_nil (UCHAR_MAX - 1)
169 #define BT_UINT8_unbound UCHAR_MAX
171 INLINE_HEADER int INT_UINT8_P (Lisp_Object obj);
172 INLINE_HEADER int UINT8_VALUE_P (Lisp_Object obj);
173 INLINE_HEADER unsigned char UINT8_ENCODE (Lisp_Object obj);
174 INLINE_HEADER Lisp_Object UINT8_DECODE (unsigned char n);
175 INLINE_HEADER unsigned short UINT8_TO_UINT16 (unsigned char n);
178 INT_UINT8_P (Lisp_Object obj)
182 int num = XINT (obj);
184 return (BT_UINT8_MIN <= num) && (num <= BT_UINT8_MAX);
191 UINT8_VALUE_P (Lisp_Object obj)
193 return EQ (obj, Qunbound)
194 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT8_P (obj);
197 INLINE_HEADER unsigned char
198 UINT8_ENCODE (Lisp_Object obj)
200 if (EQ (obj, Qunbound))
201 return BT_UINT8_unbound;
202 else if (EQ (obj, Qnil))
204 else if (EQ (obj, Qt))
210 INLINE_HEADER Lisp_Object
211 UINT8_DECODE (unsigned char n)
213 if (n == BT_UINT8_unbound)
215 else if (n == BT_UINT8_nil)
217 else if (n == BT_UINT8_t)
224 mark_uint8_byte_table (Lisp_Object obj)
230 print_uint8_byte_table (Lisp_Object obj,
231 Lisp_Object printcharfun, int escapeflag)
233 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
235 struct gcpro gcpro1, gcpro2;
236 GCPRO2 (obj, printcharfun);
238 write_c_string ("\n#<uint8-byte-table", printcharfun);
239 for (i = 0; i < 256; i++)
241 unsigned char n = bte->property[i];
243 write_c_string ("\n ", printcharfun);
244 write_c_string (" ", printcharfun);
245 if (n == BT_UINT8_unbound)
246 write_c_string ("void", printcharfun);
247 else if (n == BT_UINT8_nil)
248 write_c_string ("nil", printcharfun);
249 else if (n == BT_UINT8_t)
250 write_c_string ("t", printcharfun);
255 sprintf (buf, "%hd", n);
256 write_c_string (buf, printcharfun);
260 write_c_string (">", printcharfun);
264 uint8_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
266 Lisp_Uint8_Byte_Table *te1 = XUINT8_BYTE_TABLE (obj1);
267 Lisp_Uint8_Byte_Table *te2 = XUINT8_BYTE_TABLE (obj2);
270 for (i = 0; i < 256; i++)
271 if (te1->property[i] != te2->property[i])
277 uint8_byte_table_hash (Lisp_Object obj, int depth)
279 Lisp_Uint8_Byte_Table *te = XUINT8_BYTE_TABLE (obj);
283 for (i = 0; i < 256; i++)
284 hash = HASH2 (hash, te->property[i]);
288 DEFINE_LRECORD_IMPLEMENTATION ("uint8-byte-table", uint8_byte_table,
289 mark_uint8_byte_table,
290 print_uint8_byte_table,
291 0, uint8_byte_table_equal,
292 uint8_byte_table_hash,
293 0 /* uint8_byte_table_description */,
294 Lisp_Uint8_Byte_Table);
297 make_uint8_byte_table (unsigned char initval)
301 Lisp_Uint8_Byte_Table *cte;
303 cte = alloc_lcrecord_type (Lisp_Uint8_Byte_Table,
304 &lrecord_uint8_byte_table);
306 for (i = 0; i < 256; i++)
307 cte->property[i] = initval;
309 XSETUINT8_BYTE_TABLE (obj, cte);
314 uint8_byte_table_same_value_p (Lisp_Object obj)
316 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
317 unsigned char v0 = bte->property[0];
320 for (i = 1; i < 256; i++)
322 if (bte->property[i] != v0)
329 #define BT_UINT16_MIN 0
330 #define BT_UINT16_MAX (USHRT_MAX - 3)
331 #define BT_UINT16_t (USHRT_MAX - 2)
332 #define BT_UINT16_nil (USHRT_MAX - 1)
333 #define BT_UINT16_unbound USHRT_MAX
335 INLINE_HEADER int INT_UINT16_P (Lisp_Object obj);
336 INLINE_HEADER int UINT16_VALUE_P (Lisp_Object obj);
337 INLINE_HEADER unsigned short UINT16_ENCODE (Lisp_Object obj);
338 INLINE_HEADER Lisp_Object UINT16_DECODE (unsigned short us);
341 INT_UINT16_P (Lisp_Object obj)
345 int num = XINT (obj);
347 return (BT_UINT16_MIN <= num) && (num <= BT_UINT16_MAX);
354 UINT16_VALUE_P (Lisp_Object obj)
356 return EQ (obj, Qunbound)
357 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT16_P (obj);
360 INLINE_HEADER unsigned short
361 UINT16_ENCODE (Lisp_Object obj)
363 if (EQ (obj, Qunbound))
364 return BT_UINT16_unbound;
365 else if (EQ (obj, Qnil))
366 return BT_UINT16_nil;
367 else if (EQ (obj, Qt))
373 INLINE_HEADER Lisp_Object
374 UINT16_DECODE (unsigned short n)
376 if (n == BT_UINT16_unbound)
378 else if (n == BT_UINT16_nil)
380 else if (n == BT_UINT16_t)
386 INLINE_HEADER unsigned short
387 UINT8_TO_UINT16 (unsigned char n)
389 if (n == BT_UINT8_unbound)
390 return BT_UINT16_unbound;
391 else if (n == BT_UINT8_nil)
392 return BT_UINT16_nil;
393 else if (n == BT_UINT8_t)
400 mark_uint16_byte_table (Lisp_Object obj)
406 print_uint16_byte_table (Lisp_Object obj,
407 Lisp_Object printcharfun, int escapeflag)
409 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
411 struct gcpro gcpro1, gcpro2;
412 GCPRO2 (obj, printcharfun);
414 write_c_string ("\n#<uint16-byte-table", printcharfun);
415 for (i = 0; i < 256; i++)
417 unsigned short n = bte->property[i];
419 write_c_string ("\n ", printcharfun);
420 write_c_string (" ", printcharfun);
421 if (n == BT_UINT16_unbound)
422 write_c_string ("void", printcharfun);
423 else if (n == BT_UINT16_nil)
424 write_c_string ("nil", printcharfun);
425 else if (n == BT_UINT16_t)
426 write_c_string ("t", printcharfun);
431 sprintf (buf, "%hd", n);
432 write_c_string (buf, printcharfun);
436 write_c_string (">", printcharfun);
440 uint16_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
442 Lisp_Uint16_Byte_Table *te1 = XUINT16_BYTE_TABLE (obj1);
443 Lisp_Uint16_Byte_Table *te2 = XUINT16_BYTE_TABLE (obj2);
446 for (i = 0; i < 256; i++)
447 if (te1->property[i] != te2->property[i])
453 uint16_byte_table_hash (Lisp_Object obj, int depth)
455 Lisp_Uint16_Byte_Table *te = XUINT16_BYTE_TABLE (obj);
459 for (i = 0; i < 256; i++)
460 hash = HASH2 (hash, te->property[i]);
464 DEFINE_LRECORD_IMPLEMENTATION ("uint16-byte-table", uint16_byte_table,
465 mark_uint16_byte_table,
466 print_uint16_byte_table,
467 0, uint16_byte_table_equal,
468 uint16_byte_table_hash,
469 0 /* uint16_byte_table_description */,
470 Lisp_Uint16_Byte_Table);
473 make_uint16_byte_table (unsigned short initval)
477 Lisp_Uint16_Byte_Table *cte;
479 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
480 &lrecord_uint16_byte_table);
482 for (i = 0; i < 256; i++)
483 cte->property[i] = initval;
485 XSETUINT16_BYTE_TABLE (obj, cte);
490 uint16_byte_table_same_value_p (Lisp_Object obj)
492 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
493 unsigned short v0 = bte->property[0];
496 for (i = 1; i < 256; i++)
498 if (bte->property[i] != v0)
506 mark_byte_table (Lisp_Object obj)
508 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
511 for (i = 0; i < 256; i++)
513 mark_object (cte->property[i]);
519 print_byte_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
521 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
523 struct gcpro gcpro1, gcpro2;
524 GCPRO2 (obj, printcharfun);
526 write_c_string ("\n#<byte-table", printcharfun);
527 for (i = 0; i < 256; i++)
529 Lisp_Object elt = bte->property[i];
531 write_c_string ("\n ", printcharfun);
532 write_c_string (" ", printcharfun);
533 if (EQ (elt, Qunbound))
534 write_c_string ("void", printcharfun);
536 print_internal (elt, printcharfun, escapeflag);
539 write_c_string (">", printcharfun);
543 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
545 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
546 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
549 for (i = 0; i < 256; i++)
550 if (BYTE_TABLE_P (cte1->property[i]))
552 if (BYTE_TABLE_P (cte2->property[i]))
554 if (!byte_table_equal (cte1->property[i],
555 cte2->property[i], depth + 1))
562 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
568 byte_table_hash (Lisp_Object obj, int depth)
570 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
572 return internal_array_hash (cte->property, 256, depth);
575 static const struct lrecord_description byte_table_description[] = {
576 { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Byte_Table, property), 256 },
580 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
585 byte_table_description,
589 make_byte_table (Lisp_Object initval)
593 Lisp_Byte_Table *cte;
595 cte = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
597 for (i = 0; i < 256; i++)
598 cte->property[i] = initval;
600 XSETBYTE_TABLE (obj, cte);
605 byte_table_same_value_p (Lisp_Object obj)
607 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
608 Lisp_Object v0 = bte->property[0];
611 for (i = 1; i < 256; i++)
613 if (!internal_equal (bte->property[i], v0, 0))
620 Lisp_Object get_byte_table (Lisp_Object table, unsigned char idx);
621 Lisp_Object put_byte_table (Lisp_Object table, unsigned char idx,
625 get_byte_table (Lisp_Object table, unsigned char idx)
627 if (UINT8_BYTE_TABLE_P (table))
628 return UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[idx]);
629 else if (UINT16_BYTE_TABLE_P (table))
630 return UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[idx]);
631 else if (BYTE_TABLE_P (table))
632 return XBYTE_TABLE(table)->property[idx];
638 put_byte_table (Lisp_Object table, unsigned char idx, Lisp_Object value)
640 if (UINT8_BYTE_TABLE_P (table))
642 if (UINT8_VALUE_P (value))
644 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
645 if (!UINT8_BYTE_TABLE_P (value) &&
646 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
647 && uint8_byte_table_same_value_p (table))
652 else if (UINT16_VALUE_P (value))
654 Lisp_Object new = make_uint16_byte_table (Qnil);
657 for (i = 0; i < 256; i++)
659 XUINT16_BYTE_TABLE(new)->property[i]
660 = UINT8_TO_UINT16 (XUINT8_BYTE_TABLE(table)->property[i]);
662 XUINT16_BYTE_TABLE(new)->property[idx] = UINT16_ENCODE (value);
667 Lisp_Object new = make_byte_table (Qnil);
670 for (i = 0; i < 256; i++)
672 XBYTE_TABLE(new)->property[i]
673 = UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[i]);
675 XBYTE_TABLE(new)->property[idx] = value;
679 else if (UINT16_BYTE_TABLE_P (table))
681 if (UINT16_VALUE_P (value))
683 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
684 if (!UINT8_BYTE_TABLE_P (value) &&
685 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
686 && uint16_byte_table_same_value_p (table))
693 Lisp_Object new = make_byte_table (Qnil);
696 for (i = 0; i < 256; i++)
698 XBYTE_TABLE(new)->property[i]
699 = UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[i]);
701 XBYTE_TABLE(new)->property[idx] = value;
705 else if (BYTE_TABLE_P (table))
707 XBYTE_TABLE(table)->property[idx] = value;
708 if (!UINT8_BYTE_TABLE_P (value) &&
709 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
710 && byte_table_same_value_p (table))
715 else if (!internal_equal (table, value, 0))
717 if (UINT8_VALUE_P (table) && UINT8_VALUE_P (value))
719 table = make_uint8_byte_table (UINT8_ENCODE (table));
720 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
722 else if (UINT16_VALUE_P (table) && UINT16_VALUE_P (value))
724 table = make_uint16_byte_table (UINT16_ENCODE (table));
725 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
729 table = make_byte_table (table);
730 XBYTE_TABLE(table)->property[idx] = value;
737 mark_char_id_table (Lisp_Object obj)
739 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
745 print_char_id_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
747 Lisp_Object table = XCHAR_ID_TABLE (obj)->table;
749 struct gcpro gcpro1, gcpro2;
750 GCPRO2 (obj, printcharfun);
752 write_c_string ("#<char-id-table ", printcharfun);
753 for (i = 0; i < 256; i++)
755 Lisp_Object elt = get_byte_table (table, i);
756 if (i != 0) write_c_string ("\n ", printcharfun);
757 if (EQ (elt, Qunbound))
758 write_c_string ("void", printcharfun);
760 print_internal (elt, printcharfun, escapeflag);
763 write_c_string (">", printcharfun);
767 char_id_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
769 Lisp_Object table1 = XCHAR_ID_TABLE (obj1)->table;
770 Lisp_Object table2 = XCHAR_ID_TABLE (obj2)->table;
773 for (i = 0; i < 256; i++)
775 if (!internal_equal (get_byte_table (table1, i),
776 get_byte_table (table2, i), 0))
783 char_id_table_hash (Lisp_Object obj, int depth)
785 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
787 return char_id_table_hash (cte->table, depth + 1);
790 static const struct lrecord_description char_id_table_description[] = {
791 { XD_LISP_OBJECT, offsetof(Lisp_Char_ID_Table, table) },
795 DEFINE_LRECORD_IMPLEMENTATION ("char-id-table", char_id_table,
798 0, char_id_table_equal,
800 char_id_table_description,
804 make_char_id_table (Lisp_Object initval)
807 Lisp_Char_ID_Table *cte;
809 cte = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
811 cte->table = make_byte_table (initval);
813 XSETCHAR_ID_TABLE (obj, cte);
819 get_char_id_table (Emchar ch, Lisp_Object table)
821 unsigned int code = ch;
828 (XCHAR_ID_TABLE (table)->table,
829 (unsigned char)(code >> 24)),
830 (unsigned char) (code >> 16)),
831 (unsigned char) (code >> 8)),
832 (unsigned char) code);
835 void put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table);
837 put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table)
839 unsigned int code = ch;
840 Lisp_Object table1, table2, table3, table4;
842 table1 = XCHAR_ID_TABLE (table)->table;
843 table2 = get_byte_table (table1, (unsigned char)(code >> 24));
844 table3 = get_byte_table (table2, (unsigned char)(code >> 16));
845 table4 = get_byte_table (table3, (unsigned char)(code >> 8));
847 table4 = put_byte_table (table4, (unsigned char)code, value);
848 table3 = put_byte_table (table3, (unsigned char)(code >> 8), table4);
849 table2 = put_byte_table (table2, (unsigned char)(code >> 16), table3);
850 XCHAR_ID_TABLE (table)->table
851 = put_byte_table (table1, (unsigned char)(code >> 24), table2);
855 Lisp_Object Vchar_attribute_hash_table;
856 Lisp_Object Vcharacter_composition_table;
857 Lisp_Object Vcharacter_variant_table;
859 Lisp_Object Qideograph_daikanwa;
860 Lisp_Object Q_decomposition;
864 Lisp_Object Qisolated;
865 Lisp_Object Qinitial;
868 Lisp_Object Qvertical;
869 Lisp_Object QnoBreak;
870 Lisp_Object Qfraction;
880 Emchar to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
882 Lisp_Object put_char_ccs_code_point (Lisp_Object character,
883 Lisp_Object ccs, Lisp_Object value);
884 Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
887 to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
893 else if (EQ (v, Qcompat))
895 else if (EQ (v, Qisolated))
897 else if (EQ (v, Qinitial))
899 else if (EQ (v, Qmedial))
901 else if (EQ (v, Qfinal))
903 else if (EQ (v, Qvertical))
905 else if (EQ (v, QnoBreak))
907 else if (EQ (v, Qfraction))
909 else if (EQ (v, Qsuper))
911 else if (EQ (v, Qsub))
913 else if (EQ (v, Qcircle))
915 else if (EQ (v, Qsquare))
917 else if (EQ (v, Qwide))
919 else if (EQ (v, Qnarrow))
921 else if (EQ (v, Qsmall))
923 else if (EQ (v, Qfont))
926 signal_simple_error (err_msg, err_arg);
929 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
930 Return character corresponding with list.
934 Lisp_Object table = Vcharacter_composition_table;
935 Lisp_Object rest = list;
939 Lisp_Object v = Fcar (rest);
941 Emchar c = to_char_id (v, "Invalid value for composition", list);
943 ret = get_char_id_table (c, table);
948 if (!CHAR_ID_TABLE_P (ret))
953 else if (!CONSP (rest))
955 else if (CHAR_ID_TABLE_P (ret))
958 signal_simple_error ("Invalid table is found with", list);
960 signal_simple_error ("Invalid value for composition", list);
963 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
964 Return variants of CHARACTER.
968 CHECK_CHAR (character);
969 return Fcopy_list (get_char_id_table (XCHAR (character),
970 Vcharacter_variant_table));
974 /* We store the char-attributes in hash tables with the names as the
975 key and the actual char-id-table object as the value. Occasionally
976 we need to use them in a list format. These routines provide us
978 struct char_attribute_list_closure
980 Lisp_Object *char_attribute_list;
984 add_char_attribute_to_list_mapper (Lisp_Object key, Lisp_Object value,
985 void *char_attribute_list_closure)
987 /* This function can GC */
988 struct char_attribute_list_closure *calcl
989 = (struct char_attribute_list_closure*) char_attribute_list_closure;
990 Lisp_Object *char_attribute_list = calcl->char_attribute_list;
992 *char_attribute_list = Fcons (key, *char_attribute_list);
996 DEFUN ("char-attribute-list", Fchar_attribute_list, 0, 0, 0, /*
997 Return the list of all existing character attributes except coded-charsets.
1001 Lisp_Object char_attribute_list = Qnil;
1002 struct gcpro gcpro1;
1003 struct char_attribute_list_closure char_attribute_list_closure;
1005 GCPRO1 (char_attribute_list);
1006 char_attribute_list_closure.char_attribute_list = &char_attribute_list;
1007 elisp_maphash (add_char_attribute_to_list_mapper,
1008 Vchar_attribute_hash_table,
1009 &char_attribute_list_closure);
1011 return char_attribute_list;
1014 DEFUN ("find-char-attribute-table", Ffind_char_attribute_table, 1, 1, 0, /*
1015 Return char-id-table corresponding to ATTRIBUTE.
1019 return Fgethash (attribute, Vchar_attribute_hash_table, Qnil);
1023 /* We store the char-id-tables in hash tables with the attributes as
1024 the key and the actual char-id-table object as the value. Each
1025 char-id-table stores values of an attribute corresponding with
1026 characters. Occasionally we need to get attributes of a character
1027 in a association-list format. These routines provide us with
1029 struct char_attribute_alist_closure
1032 Lisp_Object *char_attribute_alist;
1036 add_char_attribute_alist_mapper (Lisp_Object key, Lisp_Object value,
1037 void *char_attribute_alist_closure)
1039 /* This function can GC */
1040 struct char_attribute_alist_closure *caacl =
1041 (struct char_attribute_alist_closure*) char_attribute_alist_closure;
1042 Lisp_Object ret = get_char_id_table (caacl->char_id, value);
1043 if (!UNBOUNDP (ret))
1045 Lisp_Object *char_attribute_alist = caacl->char_attribute_alist;
1046 *char_attribute_alist
1047 = Fcons (Fcons (key, ret), *char_attribute_alist);
1052 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
1053 Return the alist of attributes of CHARACTER.
1057 Lisp_Object alist = Qnil;
1060 CHECK_CHAR (character);
1062 struct gcpro gcpro1;
1063 struct char_attribute_alist_closure char_attribute_alist_closure;
1066 char_attribute_alist_closure.char_id = XCHAR (character);
1067 char_attribute_alist_closure.char_attribute_alist = &alist;
1068 elisp_maphash (add_char_attribute_alist_mapper,
1069 Vchar_attribute_hash_table,
1070 &char_attribute_alist_closure);
1074 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1076 Lisp_Object ccs = chlook->charset_by_leading_byte[i];
1080 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1083 if ( CHAR_ID_TABLE_P (encoding_table)
1084 && INTP (cpos = get_char_id_table (XCHAR (character),
1087 alist = Fcons (Fcons (ccs, cpos), alist);
1094 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
1095 Return the value of CHARACTER's ATTRIBUTE.
1097 (character, attribute))
1101 CHECK_CHAR (character);
1102 if (!NILP (ccs = Ffind_charset (attribute)))
1104 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1106 if (CHAR_ID_TABLE_P (encoding_table))
1107 return get_char_id_table (XCHAR (character), encoding_table);
1113 Lisp_Object table = Fgethash (attribute,
1114 Vchar_attribute_hash_table,
1116 if (!UNBOUNDP (table))
1118 Lisp_Object ret = get_char_id_table (XCHAR (character), table);
1119 if (!UNBOUNDP (ret))
1126 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
1127 Store CHARACTER's ATTRIBUTE with VALUE.
1129 (character, attribute, value))
1133 CHECK_CHAR (character);
1134 ccs = Ffind_charset (attribute);
1137 return put_char_ccs_code_point (character, ccs, value);
1139 else if (EQ (attribute, Q_decomposition))
1144 signal_simple_error ("Invalid value for ->decomposition",
1147 if (CONSP (Fcdr (value)))
1149 Lisp_Object rest = value;
1150 Lisp_Object table = Vcharacter_composition_table;
1154 GET_EXTERNAL_LIST_LENGTH (rest, len);
1155 seq = make_vector (len, Qnil);
1157 while (CONSP (rest))
1159 Lisp_Object v = Fcar (rest);
1162 = to_char_id (v, "Invalid value for ->decomposition", value);
1165 XVECTOR_DATA(seq)[i++] = v;
1167 XVECTOR_DATA(seq)[i++] = make_char (c);
1171 put_char_id_table (c, character, table);
1176 ntable = get_char_id_table (c, table);
1177 if (!CHAR_ID_TABLE_P (ntable))
1179 ntable = make_char_id_table (Qnil);
1180 put_char_id_table (c, ntable, table);
1188 Lisp_Object v = Fcar (value);
1192 Emchar c = XINT (v);
1194 = get_char_id_table (c, Vcharacter_variant_table);
1196 if (NILP (Fmemq (v, ret)))
1198 put_char_id_table (c, Fcons (character, ret),
1199 Vcharacter_variant_table);
1202 seq = make_vector (1, v);
1206 else if (EQ (attribute, Q_ucs))
1212 signal_simple_error ("Invalid value for ->ucs", value);
1216 ret = get_char_id_table (c, Vcharacter_variant_table);
1217 if (NILP (Fmemq (character, ret)))
1219 put_char_id_table (c, Fcons (character, ret),
1220 Vcharacter_variant_table);
1224 Lisp_Object table = Fgethash (attribute,
1225 Vchar_attribute_hash_table,
1230 table = make_char_id_table (Qunbound);
1231 Fputhash (attribute, table, Vchar_attribute_hash_table);
1233 put_char_id_table (XCHAR (character), value, table);
1238 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
1239 Remove CHARACTER's ATTRIBUTE.
1241 (character, attribute))
1245 CHECK_CHAR (character);
1246 ccs = Ffind_charset (attribute);
1249 return remove_char_ccs (character, ccs);
1253 Lisp_Object table = Fgethash (attribute,
1254 Vchar_attribute_hash_table,
1256 if (!UNBOUNDP (table))
1258 put_char_id_table (XCHAR (character), Qunbound, table);
1265 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
1267 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
1269 /* ad-hoc method for `ascii' */
1270 if ((CHARSET_CHARS (cs) == 94) &&
1271 (CHARSET_BYTE_OFFSET (cs) != 33))
1272 return 128 - CHARSET_BYTE_OFFSET (cs);
1274 return CHARSET_CHARS (cs);
1277 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
1279 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
1281 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
1285 if (XVECTOR_LENGTH (v) > ccs_len)
1288 for (i = 0; i < XVECTOR_LENGTH (v); i++)
1290 Lisp_Object c = XVECTOR_DATA(v)[i];
1292 if (!NILP (c) && !CHARP (c))
1296 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
1308 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1311 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1321 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1322 nv = XVECTOR_DATA(v)[i];
1328 XVECTOR_DATA(v)[i] = Qnil;
1332 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1333 int code_point, Lisp_Object character);
1335 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1336 int code_point, Lisp_Object character)
1340 int ccs_len = XVECTOR_LENGTH (v);
1345 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1346 nv = XVECTOR_DATA(v)[i];
1350 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
1356 XVECTOR_DATA(v)[i] = character;
1360 put_char_ccs_code_point (Lisp_Object character,
1361 Lisp_Object ccs, Lisp_Object value)
1363 Lisp_Object encoding_table;
1365 if (!EQ (XCHARSET_NAME (ccs), Qucs)
1366 || (XCHAR (character) != XINT (value)))
1368 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
1369 int dim = XCHARSET_DIMENSION (ccs);
1370 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
1371 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
1375 { /* obsolete representation: value must be a list of bytes */
1376 Lisp_Object ret = Fcar (value);
1380 signal_simple_error ("Invalid value for coded-charset", value);
1381 code_point = XINT (ret);
1382 if (XCHARSET_GRAPHIC (ccs) == 1)
1384 rest = Fcdr (value);
1385 while (!NILP (rest))
1390 signal_simple_error ("Invalid value for coded-charset",
1394 signal_simple_error ("Invalid value for coded-charset",
1397 if (XCHARSET_GRAPHIC (ccs) == 1)
1399 code_point = (code_point << 8) | j;
1402 value = make_int (code_point);
1404 else if (INTP (value))
1406 code_point = XINT (value);
1407 if (XCHARSET_GRAPHIC (ccs) == 1)
1409 code_point &= 0x7F7F7F7F;
1410 value = make_int (code_point);
1414 signal_simple_error ("Invalid value for coded-charset", value);
1418 Lisp_Object cpos = Fget_char_attribute (character, ccs);
1421 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
1426 XCHARSET_DECODING_TABLE (ccs)
1427 = v = make_older_vector (ccs_len, Qnil);
1430 decoding_table_put_char (v, dim, byte_offset, code_point, character);
1432 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
1434 XCHARSET_ENCODING_TABLE (ccs)
1435 = encoding_table = make_char_id_table (Qnil);
1437 put_char_id_table (XCHAR (character), value, encoding_table);
1442 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
1444 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1445 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1447 if (VECTORP (decoding_table))
1449 Lisp_Object cpos = Fget_char_attribute (character, ccs);
1453 decoding_table_remove_char (decoding_table,
1454 XCHARSET_DIMENSION (ccs),
1455 XCHARSET_BYTE_OFFSET (ccs),
1459 if (CHAR_ID_TABLE_P (encoding_table))
1461 put_char_id_table (XCHAR (character), Qnil, encoding_table);
1466 EXFUN (Fmake_char, 3);
1467 EXFUN (Fdecode_char, 2);
1469 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
1470 Store character's ATTRIBUTES.
1474 Lisp_Object rest = attributes;
1475 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
1476 Lisp_Object character;
1478 Lisp_Object daikanwa = Qnil;
1483 while (CONSP (rest))
1485 Lisp_Object cell = Fcar (rest);
1489 signal_simple_error ("Invalid argument", attributes);
1490 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
1491 && ((XCHARSET_FINAL (ccs) != 0) ||
1492 (XCHARSET_UCS_MAX (ccs) > 0)) )
1496 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1498 character = Fdecode_char (ccs, cell);
1499 goto setup_attributes;
1503 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
1506 signal_simple_error ("Invalid argument", attributes);
1508 character = make_char (XINT (code) + 0x100000);
1509 goto setup_attributes;
1513 else if (!INTP (code))
1514 signal_simple_error ("Invalid argument", attributes);
1516 character = make_char (XINT (code));
1520 while (CONSP (rest))
1522 Lisp_Object cell = Fcar (rest);
1524 Lisp_Object key = Fcar (cell);
1525 Lisp_Object value = Fcdr (cell);
1529 signal_simple_error ("Invalid argument", attributes);
1532 if (EQ (key, Qmorohashi_daikanwa))
1535 GET_EXTERNAL_LIST_LENGTH (value, len);
1539 if (NILP (daikanwa))
1540 daikanwa = Fcdr (Fassq (Qideograph_daikanwa, rest));
1541 if (EQ (Fcar (value), daikanwa))
1545 else if (EQ (key, Qideograph_daikanwa))
1549 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
1558 Lisp_Object Vutf_2000_version;
1562 int leading_code_private_11;
1565 Lisp_Object Qcharsetp;
1567 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1568 Lisp_Object Qregistry, Qfinal, Qgraphic;
1569 Lisp_Object Qdirection;
1570 Lisp_Object Qreverse_direction_charset;
1571 Lisp_Object Qleading_byte;
1572 Lisp_Object Qshort_name, Qlong_name;
1586 Qcyrillic_iso8859_5,
1588 Qjapanese_jisx0208_1978,
1591 Qjapanese_jisx0208_1990,
1594 Qchinese_cns11643_1,
1595 Qchinese_cns11643_2,
1600 Qlatin_viscii_lower,
1601 Qlatin_viscii_upper,
1602 Qvietnamese_viscii_lower,
1603 Qvietnamese_viscii_upper,
1632 Lisp_Object Ql2r, Qr2l;
1634 Lisp_Object Vcharset_hash_table;
1636 /* Composite characters are characters constructed by overstriking two
1637 or more regular characters.
1639 1) The old Mule implementation involves storing composite characters
1640 in a buffer as a tag followed by all of the actual characters
1641 used to make up the composite character. I think this is a bad
1642 idea; it greatly complicates code that wants to handle strings
1643 one character at a time because it has to deal with the possibility
1644 of great big ungainly characters. It's much more reasonable to
1645 simply store an index into a table of composite characters.
1647 2) The current implementation only allows for 16,384 separate
1648 composite characters over the lifetime of the XEmacs process.
1649 This could become a potential problem if the user
1650 edited lots of different files that use composite characters.
1651 Due to FSF bogosity, increasing the number of allowable
1652 composite characters under Mule would decrease the number
1653 of possible faces that can exist. Mule already has shrunk
1654 this to 2048, and further shrinkage would become uncomfortable.
1655 No such problems exist in XEmacs.
1657 Composite characters could be represented as 0x80 C1 C2 C3,
1658 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1659 for slightly under 2^20 (one million) composite characters
1660 over the XEmacs process lifetime, and you only need to
1661 increase the size of a Mule character from 19 to 21 bits.
1662 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1663 85 million (slightly over 2^26) composite characters. */
1666 /************************************************************************/
1667 /* Basic Emchar functions */
1668 /************************************************************************/
1670 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1671 string in STR. Returns the number of bytes stored.
1672 Do not call this directly. Use the macro set_charptr_emchar() instead.
1676 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1682 Lisp_Object charset;
1691 else if ( c <= 0x7ff )
1693 *p++ = (c >> 6) | 0xc0;
1694 *p++ = (c & 0x3f) | 0x80;
1696 else if ( c <= 0xffff )
1698 *p++ = (c >> 12) | 0xe0;
1699 *p++ = ((c >> 6) & 0x3f) | 0x80;
1700 *p++ = (c & 0x3f) | 0x80;
1702 else if ( c <= 0x1fffff )
1704 *p++ = (c >> 18) | 0xf0;
1705 *p++ = ((c >> 12) & 0x3f) | 0x80;
1706 *p++ = ((c >> 6) & 0x3f) | 0x80;
1707 *p++ = (c & 0x3f) | 0x80;
1709 else if ( c <= 0x3ffffff )
1711 *p++ = (c >> 24) | 0xf8;
1712 *p++ = ((c >> 18) & 0x3f) | 0x80;
1713 *p++ = ((c >> 12) & 0x3f) | 0x80;
1714 *p++ = ((c >> 6) & 0x3f) | 0x80;
1715 *p++ = (c & 0x3f) | 0x80;
1719 *p++ = (c >> 30) | 0xfc;
1720 *p++ = ((c >> 24) & 0x3f) | 0x80;
1721 *p++ = ((c >> 18) & 0x3f) | 0x80;
1722 *p++ = ((c >> 12) & 0x3f) | 0x80;
1723 *p++ = ((c >> 6) & 0x3f) | 0x80;
1724 *p++ = (c & 0x3f) | 0x80;
1727 BREAKUP_CHAR (c, charset, c1, c2);
1728 lb = CHAR_LEADING_BYTE (c);
1729 if (LEADING_BYTE_PRIVATE_P (lb))
1730 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1732 if (EQ (charset, Vcharset_control_1))
1741 /* Return the first character from a Mule-encoded string in STR,
1742 assuming it's non-ASCII. Do not call this directly.
1743 Use the macro charptr_emchar() instead. */
1746 non_ascii_charptr_emchar (const Bufbyte *str)
1759 else if ( b >= 0xf8 )
1764 else if ( b >= 0xf0 )
1769 else if ( b >= 0xe0 )
1774 else if ( b >= 0xc0 )
1784 for( ; len > 0; len-- )
1787 ch = ( ch << 6 ) | ( b & 0x3f );
1791 Bufbyte i0 = *str, i1, i2 = 0;
1792 Lisp_Object charset;
1794 if (i0 == LEADING_BYTE_CONTROL_1)
1795 return (Emchar) (*++str - 0x20);
1797 if (LEADING_BYTE_PREFIX_P (i0))
1802 charset = CHARSET_BY_LEADING_BYTE (i0);
1803 if (XCHARSET_DIMENSION (charset) == 2)
1806 return MAKE_CHAR (charset, i1, i2);
1810 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1811 Do not call this directly. Use the macro valid_char_p() instead. */
1815 non_ascii_valid_char_p (Emchar ch)
1819 /* Must have only lowest 19 bits set */
1823 f1 = CHAR_FIELD1 (ch);
1824 f2 = CHAR_FIELD2 (ch);
1825 f3 = CHAR_FIELD3 (ch);
1829 Lisp_Object charset;
1831 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1832 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1833 f2 > MAX_CHAR_FIELD2_PRIVATE)
1838 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1839 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1843 NOTE: This takes advantage of the fact that
1844 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1845 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1847 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1848 if (EQ (charset, Qnil))
1850 return (XCHARSET_CHARS (charset) == 96);
1854 Lisp_Object charset;
1856 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1857 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1858 f1 > MAX_CHAR_FIELD1_PRIVATE)
1860 if (f2 < 0x20 || f3 < 0x20)
1863 #ifdef ENABLE_COMPOSITE_CHARS
1864 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1866 if (UNBOUNDP (Fgethash (make_int (ch),
1867 Vcomposite_char_char2string_hash_table,
1872 #endif /* ENABLE_COMPOSITE_CHARS */
1874 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1875 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1878 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1880 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1883 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1885 if (EQ (charset, Qnil))
1887 return (XCHARSET_CHARS (charset) == 96);
1893 /************************************************************************/
1894 /* Basic string functions */
1895 /************************************************************************/
1897 /* Copy the character pointed to by PTR into STR, assuming it's
1898 non-ASCII. Do not call this directly. Use the macro
1899 charptr_copy_char() instead. */
1902 non_ascii_charptr_copy_char (const Bufbyte *ptr, Bufbyte *str)
1904 Bufbyte *strptr = str;
1906 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1908 /* Notice fallthrough. */
1910 case 6: *++strptr = *ptr++;
1911 case 5: *++strptr = *ptr++;
1913 case 4: *++strptr = *ptr++;
1914 case 3: *++strptr = *ptr++;
1915 case 2: *++strptr = *ptr;
1920 return strptr + 1 - str;
1924 /************************************************************************/
1925 /* streams of Emchars */
1926 /************************************************************************/
1928 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1929 The functions below are not meant to be called directly; use
1930 the macros in insdel.h. */
1933 Lstream_get_emchar_1 (Lstream *stream, int ch)
1935 Bufbyte str[MAX_EMCHAR_LEN];
1936 Bufbyte *strptr = str;
1938 str[0] = (Bufbyte) ch;
1939 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1941 /* Notice fallthrough. */
1944 ch = Lstream_getc (stream);
1946 *++strptr = (Bufbyte) ch;
1948 ch = Lstream_getc (stream);
1950 *++strptr = (Bufbyte) ch;
1953 ch = Lstream_getc (stream);
1955 *++strptr = (Bufbyte) ch;
1957 ch = Lstream_getc (stream);
1959 *++strptr = (Bufbyte) ch;
1961 ch = Lstream_getc (stream);
1963 *++strptr = (Bufbyte) ch;
1968 return charptr_emchar (str);
1972 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1974 Bufbyte str[MAX_EMCHAR_LEN];
1975 Bytecount len = set_charptr_emchar (str, ch);
1976 return Lstream_write (stream, str, len);
1980 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1982 Bufbyte str[MAX_EMCHAR_LEN];
1983 Bytecount len = set_charptr_emchar (str, ch);
1984 Lstream_unread (stream, str, len);
1988 /************************************************************************/
1989 /* charset object */
1990 /************************************************************************/
1993 mark_charset (Lisp_Object obj)
1995 Lisp_Charset *cs = XCHARSET (obj);
1997 mark_object (cs->short_name);
1998 mark_object (cs->long_name);
1999 mark_object (cs->doc_string);
2000 mark_object (cs->registry);
2001 mark_object (cs->ccl_program);
2003 mark_object (cs->encoding_table);
2004 /* mark_object (cs->decoding_table); */
2010 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
2012 Lisp_Charset *cs = XCHARSET (obj);
2016 error ("printing unreadable object #<charset %s 0x%x>",
2017 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
2020 write_c_string ("#<charset ", printcharfun);
2021 print_internal (CHARSET_NAME (cs), printcharfun, 0);
2022 write_c_string (" ", printcharfun);
2023 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
2024 write_c_string (" ", printcharfun);
2025 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
2026 write_c_string (" ", printcharfun);
2027 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
2028 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
2030 CHARSET_DIMENSION (cs),
2031 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
2032 CHARSET_COLUMNS (cs),
2033 CHARSET_GRAPHIC (cs),
2034 CHARSET_FINAL (cs));
2035 write_c_string (buf, printcharfun);
2036 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
2037 sprintf (buf, " 0x%x>", cs->header.uid);
2038 write_c_string (buf, printcharfun);
2041 static const struct lrecord_description charset_description[] = {
2042 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
2043 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
2044 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
2045 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
2046 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
2047 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
2048 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
2050 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
2051 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
2056 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
2057 mark_charset, print_charset, 0, 0, 0,
2058 charset_description,
2060 /* Make a new charset. */
2063 make_charset (Charset_ID id, Lisp_Object name,
2064 unsigned short chars, unsigned char dimension,
2065 unsigned char columns, unsigned char graphic,
2066 Bufbyte final, unsigned char direction, Lisp_Object short_name,
2067 Lisp_Object long_name, Lisp_Object doc,
2069 Lisp_Object decoding_table,
2070 Emchar ucs_min, Emchar ucs_max,
2071 Emchar code_offset, unsigned char byte_offset)
2073 unsigned char type = 0;
2075 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
2079 XSETCHARSET (obj, cs);
2081 CHARSET_ID (cs) = id;
2082 CHARSET_NAME (cs) = name;
2083 CHARSET_SHORT_NAME (cs) = short_name;
2084 CHARSET_LONG_NAME (cs) = long_name;
2085 CHARSET_CHARS (cs) = chars;
2086 CHARSET_DIMENSION (cs) = dimension;
2087 CHARSET_DIRECTION (cs) = direction;
2088 CHARSET_COLUMNS (cs) = columns;
2089 CHARSET_GRAPHIC (cs) = graphic;
2090 CHARSET_FINAL (cs) = final;
2091 CHARSET_DOC_STRING (cs) = doc;
2092 CHARSET_REGISTRY (cs) = reg;
2093 CHARSET_CCL_PROGRAM (cs) = Qnil;
2094 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
2096 CHARSET_DECODING_TABLE(cs) = Qnil;
2097 CHARSET_ENCODING_TABLE(cs) = Qnil;
2098 CHARSET_UCS_MIN(cs) = ucs_min;
2099 CHARSET_UCS_MAX(cs) = ucs_max;
2100 CHARSET_CODE_OFFSET(cs) = code_offset;
2101 CHARSET_BYTE_OFFSET(cs) = byte_offset;
2104 switch (CHARSET_CHARS (cs))
2107 switch (CHARSET_DIMENSION (cs))
2110 type = CHARSET_TYPE_94;
2113 type = CHARSET_TYPE_94X94;
2118 switch (CHARSET_DIMENSION (cs))
2121 type = CHARSET_TYPE_96;
2124 type = CHARSET_TYPE_96X96;
2130 switch (CHARSET_DIMENSION (cs))
2133 type = CHARSET_TYPE_128;
2136 type = CHARSET_TYPE_128X128;
2141 switch (CHARSET_DIMENSION (cs))
2144 type = CHARSET_TYPE_256;
2147 type = CHARSET_TYPE_256X256;
2154 CHARSET_TYPE (cs) = type;
2158 if (id == LEADING_BYTE_ASCII)
2159 CHARSET_REP_BYTES (cs) = 1;
2161 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
2163 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
2168 /* some charsets do not have final characters. This includes
2169 ASCII, Control-1, Composite, and the two faux private
2172 if (code_offset == 0)
2174 assert (NILP (chlook->charset_by_attributes[type][final]));
2175 chlook->charset_by_attributes[type][final] = obj;
2178 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
2179 chlook->charset_by_attributes[type][final][direction] = obj;
2183 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
2184 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
2186 /* Some charsets are "faux" and don't have names or really exist at
2187 all except in the leading-byte table. */
2189 Fputhash (name, obj, Vcharset_hash_table);
2194 get_unallocated_leading_byte (int dimension)
2199 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
2202 lb = chlook->next_allocated_leading_byte++;
2206 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
2209 lb = chlook->next_allocated_1_byte_leading_byte++;
2213 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
2216 lb = chlook->next_allocated_2_byte_leading_byte++;
2222 ("No more character sets free for this dimension",
2223 make_int (dimension));
2230 make_builtin_char (Lisp_Object charset, int c1, int c2)
2232 if (XCHARSET_UCS_MAX (charset))
2235 = (XCHARSET_DIMENSION (charset) == 1
2237 c1 - XCHARSET_BYTE_OFFSET (charset)
2239 (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
2240 + c2 - XCHARSET_BYTE_OFFSET (charset))
2241 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
2242 if ((code < XCHARSET_UCS_MIN (charset))
2243 || (XCHARSET_UCS_MAX (charset) < code))
2244 signal_simple_error ("Arguments makes invalid character",
2248 else if (XCHARSET_DIMENSION (charset) == 1)
2250 switch (XCHARSET_CHARS (charset))
2254 + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
2257 + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
2264 switch (XCHARSET_CHARS (charset))
2267 return MIN_CHAR_94x94
2268 + (XCHARSET_FINAL (charset) - '0') * 94 * 94
2269 + (c1 - 33) * 94 + (c2 - 33);
2271 return MIN_CHAR_96x96
2272 + (XCHARSET_FINAL (charset) - '0') * 96 * 96
2273 + (c1 - 32) * 96 + (c2 - 32);
2281 range_charset_code_point (Lisp_Object charset, Emchar ch)
2285 if ((XCHARSET_UCS_MIN (charset) <= ch)
2286 && (ch <= XCHARSET_UCS_MAX (charset)))
2288 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
2290 if (XCHARSET_CHARS (charset) == 256)
2292 else if (XCHARSET_DIMENSION (charset) == 1)
2293 return d + XCHARSET_BYTE_OFFSET (charset);
2294 else if (XCHARSET_DIMENSION (charset) == 2)
2296 ((d / XCHARSET_CHARS (charset)
2297 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2298 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2299 else if (XCHARSET_DIMENSION (charset) == 3)
2301 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2302 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2303 | ((d / XCHARSET_CHARS (charset)
2304 % XCHARSET_CHARS (charset)
2305 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2306 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2307 else /* if (XCHARSET_DIMENSION (charset) == 4) */
2309 ((d / (XCHARSET_CHARS (charset)
2310 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2311 + XCHARSET_BYTE_OFFSET (charset)) << 24)
2312 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2313 % XCHARSET_CHARS (charset)
2314 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2315 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
2316 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2317 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2319 else if (XCHARSET_CODE_OFFSET (charset) == 0)
2321 if (XCHARSET_DIMENSION (charset) == 1)
2323 if (XCHARSET_CHARS (charset) == 94)
2325 if (((d = ch - (MIN_CHAR_94
2326 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
2330 else if (XCHARSET_CHARS (charset) == 96)
2332 if (((d = ch - (MIN_CHAR_96
2333 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
2340 else if (XCHARSET_DIMENSION (charset) == 2)
2342 if (XCHARSET_CHARS (charset) == 94)
2344 if (((d = ch - (MIN_CHAR_94x94
2345 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
2348 return (((d / 94) + 33) << 8) | (d % 94 + 33);
2350 else if (XCHARSET_CHARS (charset) == 96)
2352 if (((d = ch - (MIN_CHAR_96x96
2353 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
2356 return (((d / 96) + 32) << 8) | (d % 96 + 32);
2366 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
2368 if (c <= MAX_CHAR_BASIC_LATIN)
2370 *charset = Vcharset_ascii;
2375 *charset = Vcharset_control_1;
2380 *charset = Vcharset_latin_iso8859_1;
2384 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
2386 *charset = Vcharset_greek_iso8859_7;
2387 return c - MIN_CHAR_GREEK + 0x20;
2389 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
2391 *charset = Vcharset_cyrillic_iso8859_5;
2392 return c - MIN_CHAR_CYRILLIC + 0x20;
2395 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
2397 *charset = Vcharset_hebrew_iso8859_8;
2398 return c - MIN_CHAR_HEBREW + 0x20;
2400 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
2402 *charset = Vcharset_thai_tis620;
2403 return c - MIN_CHAR_THAI + 0x20;
2406 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
2407 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
2409 return list2 (Vcharset_katakana_jisx0201,
2410 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
2413 else if (c <= MAX_CHAR_BMP)
2415 *charset = Vcharset_ucs_bmp;
2418 else if (c < MIN_CHAR_DAIKANWA)
2420 *charset = Vcharset_ucs;
2424 else if (c <= MAX_CHAR_DAIKANWA)
2426 *charset = Vcharset_ideograph_daikanwa;
2427 return c - MIN_CHAR_DAIKANWA;
2430 else if (c <= MAX_CHAR_MOJIKYO)
2432 *charset = Vcharset_mojikyo;
2433 return c - MIN_CHAR_MOJIKYO;
2435 else if (c < MIN_CHAR_94)
2437 *charset = Vcharset_ucs;
2440 else if (c <= MAX_CHAR_94)
2442 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
2443 ((c - MIN_CHAR_94) / 94) + '0',
2444 CHARSET_LEFT_TO_RIGHT);
2445 if (!NILP (*charset))
2446 return ((c - MIN_CHAR_94) % 94) + 33;
2449 *charset = Vcharset_ucs;
2453 else if (c <= MAX_CHAR_96)
2455 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
2456 ((c - MIN_CHAR_96) / 96) + '0',
2457 CHARSET_LEFT_TO_RIGHT);
2458 if (!NILP (*charset))
2459 return ((c - MIN_CHAR_96) % 96) + 32;
2462 *charset = Vcharset_ucs;
2466 else if (c <= MAX_CHAR_94x94)
2469 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94,
2470 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
2471 CHARSET_LEFT_TO_RIGHT);
2472 if (!NILP (*charset))
2473 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
2474 | (((c - MIN_CHAR_94x94) % 94) + 33);
2477 *charset = Vcharset_ucs;
2481 else if (c <= MAX_CHAR_96x96)
2484 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96X96,
2485 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
2486 CHARSET_LEFT_TO_RIGHT);
2487 if (!NILP (*charset))
2488 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
2489 | (((c - MIN_CHAR_96x96) % 96) + 32);
2492 *charset = Vcharset_ucs;
2498 *charset = Vcharset_ucs;
2503 Lisp_Object Vdefault_coded_charset_priority_list;
2507 /************************************************************************/
2508 /* Basic charset Lisp functions */
2509 /************************************************************************/
2511 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
2512 Return non-nil if OBJECT is a charset.
2516 return CHARSETP (object) ? Qt : Qnil;
2519 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
2520 Retrieve the charset of the given name.
2521 If CHARSET-OR-NAME is a charset object, it is simply returned.
2522 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
2523 nil is returned. Otherwise the associated charset object is returned.
2527 if (CHARSETP (charset_or_name))
2528 return charset_or_name;
2530 CHECK_SYMBOL (charset_or_name);
2531 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
2534 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
2535 Retrieve the charset of the given name.
2536 Same as `find-charset' except an error is signalled if there is no such
2537 charset instead of returning nil.
2541 Lisp_Object charset = Ffind_charset (name);
2544 signal_simple_error ("No such charset", name);
2548 /* We store the charsets in hash tables with the names as the key and the
2549 actual charset object as the value. Occasionally we need to use them
2550 in a list format. These routines provide us with that. */
2551 struct charset_list_closure
2553 Lisp_Object *charset_list;
2557 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
2558 void *charset_list_closure)
2560 /* This function can GC */
2561 struct charset_list_closure *chcl =
2562 (struct charset_list_closure*) charset_list_closure;
2563 Lisp_Object *charset_list = chcl->charset_list;
2565 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
2569 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
2570 Return a list of the names of all defined charsets.
2574 Lisp_Object charset_list = Qnil;
2575 struct gcpro gcpro1;
2576 struct charset_list_closure charset_list_closure;
2578 GCPRO1 (charset_list);
2579 charset_list_closure.charset_list = &charset_list;
2580 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
2581 &charset_list_closure);
2584 return charset_list;
2587 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
2588 Return the name of the given charset.
2592 return XCHARSET_NAME (Fget_charset (charset));
2595 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
2596 Define a new character set.
2597 This function is for use with Mule support.
2598 NAME is a symbol, the name by which the character set is normally referred.
2599 DOC-STRING is a string describing the character set.
2600 PROPS is a property list, describing the specific nature of the
2601 character set. Recognized properties are:
2603 'short-name Short version of the charset name (ex: Latin-1)
2604 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
2605 'registry A regular expression matching the font registry field for
2607 'dimension Number of octets used to index a character in this charset.
2608 Either 1 or 2. Defaults to 1.
2609 'columns Number of columns used to display a character in this charset.
2610 Only used in TTY mode. (Under X, the actual width of a
2611 character can be derived from the font used to display the
2612 characters.) If unspecified, defaults to the dimension
2613 (this is almost always the correct value).
2614 'chars Number of characters in each dimension (94 or 96).
2615 Defaults to 94. Note that if the dimension is 2, the
2616 character set thus described is 94x94 or 96x96.
2617 'final Final byte of ISO 2022 escape sequence. Must be
2618 supplied. Each combination of (DIMENSION, CHARS) defines a
2619 separate namespace for final bytes. Note that ISO
2620 2022 restricts the final byte to the range
2621 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2622 dimension == 2. Note also that final bytes in the range
2623 0x30 - 0x3F are reserved for user-defined (not official)
2625 'graphic 0 (use left half of font on output) or 1 (use right half
2626 of font on output). Defaults to 0. For example, for
2627 a font whose registry is ISO8859-1, the left half
2628 (octets 0x20 - 0x7F) is the `ascii' character set, while
2629 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2630 character set. With 'graphic set to 0, the octets
2631 will have their high bit cleared; with it set to 1,
2632 the octets will have their high bit set.
2633 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2635 'ccl-program A compiled CCL program used to convert a character in
2636 this charset into an index into the font. This is in
2637 addition to the 'graphic property. The CCL program
2638 is passed the octets of the character, with the high
2639 bit cleared and set depending upon whether the value
2640 of the 'graphic property is 0 or 1.
2642 (name, doc_string, props))
2644 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2645 int direction = CHARSET_LEFT_TO_RIGHT;
2647 Lisp_Object registry = Qnil;
2648 Lisp_Object charset;
2649 Lisp_Object rest, keyword, value;
2650 Lisp_Object ccl_program = Qnil;
2651 Lisp_Object short_name = Qnil, long_name = Qnil;
2652 int byte_offset = -1;
2654 CHECK_SYMBOL (name);
2655 if (!NILP (doc_string))
2656 CHECK_STRING (doc_string);
2658 charset = Ffind_charset (name);
2659 if (!NILP (charset))
2660 signal_simple_error ("Cannot redefine existing charset", name);
2662 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
2664 if (EQ (keyword, Qshort_name))
2666 CHECK_STRING (value);
2670 if (EQ (keyword, Qlong_name))
2672 CHECK_STRING (value);
2676 else if (EQ (keyword, Qdimension))
2679 dimension = XINT (value);
2680 if (dimension < 1 || dimension > 2)
2681 signal_simple_error ("Invalid value for 'dimension", value);
2684 else if (EQ (keyword, Qchars))
2687 chars = XINT (value);
2688 if (chars != 94 && chars != 96)
2689 signal_simple_error ("Invalid value for 'chars", value);
2692 else if (EQ (keyword, Qcolumns))
2695 columns = XINT (value);
2696 if (columns != 1 && columns != 2)
2697 signal_simple_error ("Invalid value for 'columns", value);
2700 else if (EQ (keyword, Qgraphic))
2703 graphic = XINT (value);
2705 if (graphic < 0 || graphic > 2)
2707 if (graphic < 0 || graphic > 1)
2709 signal_simple_error ("Invalid value for 'graphic", value);
2712 else if (EQ (keyword, Qregistry))
2714 CHECK_STRING (value);
2718 else if (EQ (keyword, Qdirection))
2720 if (EQ (value, Ql2r))
2721 direction = CHARSET_LEFT_TO_RIGHT;
2722 else if (EQ (value, Qr2l))
2723 direction = CHARSET_RIGHT_TO_LEFT;
2725 signal_simple_error ("Invalid value for 'direction", value);
2728 else if (EQ (keyword, Qfinal))
2730 CHECK_CHAR_COERCE_INT (value);
2731 final = XCHAR (value);
2732 if (final < '0' || final > '~')
2733 signal_simple_error ("Invalid value for 'final", value);
2736 else if (EQ (keyword, Qccl_program))
2738 CHECK_VECTOR (value);
2739 ccl_program = value;
2743 signal_simple_error ("Unrecognized property", keyword);
2747 error ("'final must be specified");
2748 if (dimension == 2 && final > 0x5F)
2750 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2754 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2756 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2758 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
2759 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
2761 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2763 id = get_unallocated_leading_byte (dimension);
2765 if (NILP (doc_string))
2766 doc_string = build_string ("");
2768 if (NILP (registry))
2769 registry = build_string ("");
2771 if (NILP (short_name))
2772 XSETSTRING (short_name, XSYMBOL (name)->name);
2774 if (NILP (long_name))
2775 long_name = doc_string;
2778 columns = dimension;
2780 if (byte_offset < 0)
2784 else if (chars == 96)
2790 charset = make_charset (id, name, chars, dimension, columns, graphic,
2791 final, direction, short_name, long_name,
2792 doc_string, registry,
2793 Qnil, 0, 0, 0, byte_offset);
2794 if (!NILP (ccl_program))
2795 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2799 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2801 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2802 NEW-NAME is the name of the new charset. Return the new charset.
2804 (charset, new_name))
2806 Lisp_Object new_charset = Qnil;
2807 int id, chars, dimension, columns, graphic, final;
2809 Lisp_Object registry, doc_string, short_name, long_name;
2812 charset = Fget_charset (charset);
2813 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2814 signal_simple_error ("Charset already has reverse-direction charset",
2817 CHECK_SYMBOL (new_name);
2818 if (!NILP (Ffind_charset (new_name)))
2819 signal_simple_error ("Cannot redefine existing charset", new_name);
2821 cs = XCHARSET (charset);
2823 chars = CHARSET_CHARS (cs);
2824 dimension = CHARSET_DIMENSION (cs);
2825 columns = CHARSET_COLUMNS (cs);
2826 id = get_unallocated_leading_byte (dimension);
2828 graphic = CHARSET_GRAPHIC (cs);
2829 final = CHARSET_FINAL (cs);
2830 direction = CHARSET_RIGHT_TO_LEFT;
2831 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2832 direction = CHARSET_LEFT_TO_RIGHT;
2833 doc_string = CHARSET_DOC_STRING (cs);
2834 short_name = CHARSET_SHORT_NAME (cs);
2835 long_name = CHARSET_LONG_NAME (cs);
2836 registry = CHARSET_REGISTRY (cs);
2838 new_charset = make_charset (id, new_name, chars, dimension, columns,
2839 graphic, final, direction, short_name, long_name,
2840 doc_string, registry,
2842 CHARSET_DECODING_TABLE(cs),
2843 CHARSET_UCS_MIN(cs),
2844 CHARSET_UCS_MAX(cs),
2845 CHARSET_CODE_OFFSET(cs),
2846 CHARSET_BYTE_OFFSET(cs)
2852 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2853 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2858 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2859 Define symbol ALIAS as an alias for CHARSET.
2863 CHECK_SYMBOL (alias);
2864 charset = Fget_charset (charset);
2865 return Fputhash (alias, charset, Vcharset_hash_table);
2868 /* #### Reverse direction charsets not yet implemented. */
2870 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2872 Return the reverse-direction charset parallel to CHARSET, if any.
2873 This is the charset with the same properties (in particular, the same
2874 dimension, number of characters per dimension, and final byte) as
2875 CHARSET but whose characters are displayed in the opposite direction.
2879 charset = Fget_charset (charset);
2880 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2884 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2885 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2886 If DIRECTION is omitted, both directions will be checked (left-to-right
2887 will be returned if character sets exist for both directions).
2889 (dimension, chars, final, direction))
2891 int dm, ch, fi, di = -1;
2893 Lisp_Object obj = Qnil;
2895 CHECK_INT (dimension);
2896 dm = XINT (dimension);
2897 if (dm < 1 || dm > 2)
2898 signal_simple_error ("Invalid value for DIMENSION", dimension);
2902 if (ch != 94 && ch != 96)
2903 signal_simple_error ("Invalid value for CHARS", chars);
2905 CHECK_CHAR_COERCE_INT (final);
2907 if (fi < '0' || fi > '~')
2908 signal_simple_error ("Invalid value for FINAL", final);
2910 if (EQ (direction, Ql2r))
2911 di = CHARSET_LEFT_TO_RIGHT;
2912 else if (EQ (direction, Qr2l))
2913 di = CHARSET_RIGHT_TO_LEFT;
2914 else if (!NILP (direction))
2915 signal_simple_error ("Invalid value for DIRECTION", direction);
2917 if (dm == 2 && fi > 0x5F)
2919 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2922 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2924 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2928 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2930 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2933 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2936 return XCHARSET_NAME (obj);
2940 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2941 Return short name of CHARSET.
2945 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2948 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2949 Return long name of CHARSET.
2953 return XCHARSET_LONG_NAME (Fget_charset (charset));
2956 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2957 Return description of CHARSET.
2961 return XCHARSET_DOC_STRING (Fget_charset (charset));
2964 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2965 Return dimension of CHARSET.
2969 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2972 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2973 Return property PROP of CHARSET.
2974 Recognized properties are those listed in `make-charset', as well as
2975 'name and 'doc-string.
2981 charset = Fget_charset (charset);
2982 cs = XCHARSET (charset);
2984 CHECK_SYMBOL (prop);
2985 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2986 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2987 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2988 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2989 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2990 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2991 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2992 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2993 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2994 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2995 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2996 if (EQ (prop, Qdirection))
2997 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2998 if (EQ (prop, Qreverse_direction_charset))
3000 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
3004 return XCHARSET_NAME (obj);
3006 signal_simple_error ("Unrecognized charset property name", prop);
3007 return Qnil; /* not reached */
3010 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
3011 Return charset identification number of CHARSET.
3015 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
3018 /* #### We need to figure out which properties we really want to
3021 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
3022 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
3024 (charset, ccl_program))
3026 charset = Fget_charset (charset);
3027 CHECK_VECTOR (ccl_program);
3028 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
3033 invalidate_charset_font_caches (Lisp_Object charset)
3035 /* Invalidate font cache entries for charset on all devices. */
3036 Lisp_Object devcons, concons, hash_table;
3037 DEVICE_LOOP_NO_BREAK (devcons, concons)
3039 struct device *d = XDEVICE (XCAR (devcons));
3040 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
3041 if (!UNBOUNDP (hash_table))
3042 Fclrhash (hash_table);
3046 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
3047 Set the 'registry property of CHARSET to REGISTRY.
3049 (charset, registry))
3051 charset = Fget_charset (charset);
3052 CHECK_STRING (registry);
3053 XCHARSET_REGISTRY (charset) = registry;
3054 invalidate_charset_font_caches (charset);
3055 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
3060 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
3061 Return mapping-table of CHARSET.
3065 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
3068 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
3069 Set mapping-table of CHARSET to TABLE.
3073 struct Lisp_Charset *cs;
3077 charset = Fget_charset (charset);
3078 cs = XCHARSET (charset);
3082 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
3083 make_vector_newer (CHARSET_DECODING_TABLE(cs));
3084 CHARSET_DECODING_TABLE(cs) = Qnil;
3087 else if (VECTORP (table))
3089 int ccs_len = CHARSET_BYTE_SIZE (cs);
3090 int ret = decoding_table_check_elements (table,
3091 CHARSET_DIMENSION (cs),
3096 signal_simple_error ("Too big table", table);
3098 signal_simple_error ("Invalid element is found", table);
3100 signal_simple_error ("Something wrong", table);
3102 CHARSET_DECODING_TABLE(cs) = Qnil;
3105 signal_error (Qwrong_type_argument,
3106 list2 (build_translated_string ("vector-or-nil-p"),
3109 byte_offset = CHARSET_BYTE_OFFSET (cs);
3110 switch (CHARSET_DIMENSION (cs))
3113 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3115 Lisp_Object c = XVECTOR_DATA(table)[i];
3118 put_char_ccs_code_point (c, charset,
3119 make_int (i + byte_offset));
3123 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3125 Lisp_Object v = XVECTOR_DATA(table)[i];
3131 for (j = 0; j < XVECTOR_LENGTH (v); j++)
3133 Lisp_Object c = XVECTOR_DATA(v)[j];
3136 put_char_ccs_code_point
3138 make_int ( ( (i + byte_offset) << 8 )
3144 put_char_ccs_code_point (v, charset,
3145 make_int (i + byte_offset));
3154 /************************************************************************/
3155 /* Lisp primitives for working with characters */
3156 /************************************************************************/
3159 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
3160 Make a character from CHARSET and code-point CODE.
3166 charset = Fget_charset (charset);
3169 if (XCHARSET_GRAPHIC (charset) == 1)
3171 return make_char (DECODE_CHAR (charset, c));
3174 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
3175 Make a builtin character from CHARSET and code-point CODE.
3182 charset = Fget_charset (charset);
3186 if ((final = XCHARSET_FINAL (charset)) >= '0')
3188 if (XCHARSET_DIMENSION (charset) == 1)
3190 switch (XCHARSET_CHARS (charset))
3194 make_char (MIN_CHAR_94 + (final - '0') * 94
3195 + ((c & 0x7F) - 33));
3198 make_char (MIN_CHAR_96 + (final - '0') * 96
3199 + ((c & 0x7F) - 32));
3201 return Fdecode_char (charset, code);
3206 switch (XCHARSET_CHARS (charset))
3210 make_char (MIN_CHAR_94x94
3211 + (final - '0') * 94 * 94
3212 + (((c >> 8) & 0x7F) - 33) * 94
3213 + ((c & 0x7F) - 33));
3216 make_char (MIN_CHAR_96x96
3217 + (final - '0') * 96 * 96
3218 + (((c >> 8) & 0x7F) - 32) * 96
3219 + ((c & 0x7F) - 32));
3221 return Fdecode_char (charset, code);
3225 else if (XCHARSET_UCS_MAX (charset))
3228 = (XCHARSET_DIMENSION (charset) == 1
3230 c - XCHARSET_BYTE_OFFSET (charset)
3232 ((c >> 8) - XCHARSET_BYTE_OFFSET (charset))
3233 * XCHARSET_CHARS (charset)
3234 + (c & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
3235 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
3236 if ((cid < XCHARSET_UCS_MIN (charset))
3237 || (XCHARSET_UCS_MAX (charset) < cid))
3238 return Fdecode_char (charset, code);
3239 return make_char (cid);
3242 return Fdecode_char (charset, code);
3246 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
3247 Make a character from CHARSET and octets ARG1 and ARG2.
3248 ARG2 is required only for characters from two-dimensional charsets.
3249 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
3250 character s with caron.
3252 (charset, arg1, arg2))
3256 int lowlim, highlim;
3258 charset = Fget_charset (charset);
3259 cs = XCHARSET (charset);
3261 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
3262 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
3264 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
3266 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
3267 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
3270 /* It is useful (and safe, according to Olivier Galibert) to strip
3271 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
3272 write (make-char 'latin-iso8859-2 CODE) where code is the actual
3273 Latin 2 code of the character. */
3281 if (a1 < lowlim || a1 > highlim)
3282 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
3284 if (CHARSET_DIMENSION (cs) == 1)
3288 ("Charset is of dimension one; second octet must be nil", arg2);
3289 return make_char (MAKE_CHAR (charset, a1, 0));
3298 a2 = XINT (arg2) & 0x7f;
3300 if (a2 < lowlim || a2 > highlim)
3301 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
3303 return make_char (MAKE_CHAR (charset, a1, a2));
3306 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
3307 Return the character set of char CH.
3311 CHECK_CHAR_COERCE_INT (ch);
3313 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
3316 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
3317 Return the octet numbered N (should be 0 or 1) of char CH.
3318 N defaults to 0 if omitted.
3322 Lisp_Object charset;
3325 CHECK_CHAR_COERCE_INT (ch);
3327 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
3329 if (NILP (n) || EQ (n, Qzero))
3330 return make_int (octet0);
3331 else if (EQ (n, make_int (1)))
3332 return make_int (octet1);
3334 signal_simple_error ("Octet number must be 0 or 1", n);
3337 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
3338 Return list of charset and one or two position-codes of CHAR.
3342 /* This function can GC */
3343 struct gcpro gcpro1, gcpro2;
3344 Lisp_Object charset = Qnil;
3345 Lisp_Object rc = Qnil;
3353 GCPRO2 (charset, rc);
3354 CHECK_CHAR_COERCE_INT (character);
3357 code_point = ENCODE_CHAR (XCHAR (character), charset);
3358 dimension = XCHARSET_DIMENSION (charset);
3359 while (dimension > 0)
3361 rc = Fcons (make_int (code_point & 255), rc);
3365 rc = Fcons (XCHARSET_NAME (charset), rc);
3367 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
3369 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
3371 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
3375 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
3384 #ifdef ENABLE_COMPOSITE_CHARS
3385 /************************************************************************/
3386 /* composite character functions */
3387 /************************************************************************/
3390 lookup_composite_char (Bufbyte *str, int len)
3392 Lisp_Object lispstr = make_string (str, len);
3393 Lisp_Object ch = Fgethash (lispstr,
3394 Vcomposite_char_string2char_hash_table,
3400 if (composite_char_row_next >= 128)
3401 signal_simple_error ("No more composite chars available", lispstr);
3402 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
3403 composite_char_col_next);
3404 Fputhash (make_char (emch), lispstr,
3405 Vcomposite_char_char2string_hash_table);
3406 Fputhash (lispstr, make_char (emch),
3407 Vcomposite_char_string2char_hash_table);
3408 composite_char_col_next++;
3409 if (composite_char_col_next >= 128)
3411 composite_char_col_next = 32;
3412 composite_char_row_next++;
3421 composite_char_string (Emchar ch)
3423 Lisp_Object str = Fgethash (make_char (ch),
3424 Vcomposite_char_char2string_hash_table,
3426 assert (!UNBOUNDP (str));
3430 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
3431 Convert a string into a single composite character.
3432 The character is the result of overstriking all the characters in
3437 CHECK_STRING (string);
3438 return make_char (lookup_composite_char (XSTRING_DATA (string),
3439 XSTRING_LENGTH (string)));
3442 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3443 Return a string of the characters comprising a composite character.
3451 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
3452 signal_simple_error ("Must be composite char", ch);
3453 return composite_char_string (emch);
3455 #endif /* ENABLE_COMPOSITE_CHARS */
3458 /************************************************************************/
3459 /* initialization */
3460 /************************************************************************/
3463 syms_of_mule_charset (void)
3466 INIT_LRECORD_IMPLEMENTATION (uint8_byte_table);
3467 INIT_LRECORD_IMPLEMENTATION (uint16_byte_table);
3468 INIT_LRECORD_IMPLEMENTATION (byte_table);
3469 INIT_LRECORD_IMPLEMENTATION (char_id_table);
3471 INIT_LRECORD_IMPLEMENTATION (charset);
3473 DEFSUBR (Fcharsetp);
3474 DEFSUBR (Ffind_charset);
3475 DEFSUBR (Fget_charset);
3476 DEFSUBR (Fcharset_list);
3477 DEFSUBR (Fcharset_name);
3478 DEFSUBR (Fmake_charset);
3479 DEFSUBR (Fmake_reverse_direction_charset);
3480 /* DEFSUBR (Freverse_direction_charset); */
3481 DEFSUBR (Fdefine_charset_alias);
3482 DEFSUBR (Fcharset_from_attributes);
3483 DEFSUBR (Fcharset_short_name);
3484 DEFSUBR (Fcharset_long_name);
3485 DEFSUBR (Fcharset_description);
3486 DEFSUBR (Fcharset_dimension);
3487 DEFSUBR (Fcharset_property);
3488 DEFSUBR (Fcharset_id);
3489 DEFSUBR (Fset_charset_ccl_program);
3490 DEFSUBR (Fset_charset_registry);
3492 DEFSUBR (Fchar_attribute_list);
3493 DEFSUBR (Ffind_char_attribute_table);
3494 DEFSUBR (Fchar_attribute_alist);
3495 DEFSUBR (Fget_char_attribute);
3496 DEFSUBR (Fput_char_attribute);
3497 DEFSUBR (Fremove_char_attribute);
3498 DEFSUBR (Fdefine_char);
3499 DEFSUBR (Fchar_variants);
3500 DEFSUBR (Fget_composite_char);
3501 DEFSUBR (Fcharset_mapping_table);
3502 DEFSUBR (Fset_charset_mapping_table);
3506 DEFSUBR (Fdecode_char);
3507 DEFSUBR (Fdecode_builtin_char);
3509 DEFSUBR (Fmake_char);
3510 DEFSUBR (Fchar_charset);
3511 DEFSUBR (Fchar_octet);
3512 DEFSUBR (Fsplit_char);
3514 #ifdef ENABLE_COMPOSITE_CHARS
3515 DEFSUBR (Fmake_composite_char);
3516 DEFSUBR (Fcomposite_char_string);
3519 defsymbol (&Qcharsetp, "charsetp");
3520 defsymbol (&Qregistry, "registry");
3521 defsymbol (&Qfinal, "final");
3522 defsymbol (&Qgraphic, "graphic");
3523 defsymbol (&Qdirection, "direction");
3524 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3525 defsymbol (&Qshort_name, "short-name");
3526 defsymbol (&Qlong_name, "long-name");
3528 defsymbol (&Ql2r, "l2r");
3529 defsymbol (&Qr2l, "r2l");
3531 /* Charsets, compatible with FSF 20.3
3532 Naming convention is Script-Charset[-Edition] */
3533 defsymbol (&Qascii, "ascii");
3534 defsymbol (&Qcontrol_1, "control-1");
3535 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3536 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3537 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3538 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3539 defsymbol (&Qthai_tis620, "thai-tis620");
3540 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3541 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3542 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3543 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3544 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3545 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3546 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3547 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
3548 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
3549 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
3550 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
3551 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
3552 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
3553 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
3554 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
3556 defsymbol (&Q_ucs, "->ucs");
3557 defsymbol (&Q_decomposition, "->decomposition");
3558 defsymbol (&Qcompat, "compat");
3559 defsymbol (&Qisolated, "isolated");
3560 defsymbol (&Qinitial, "initial");
3561 defsymbol (&Qmedial, "medial");
3562 defsymbol (&Qfinal, "final");
3563 defsymbol (&Qvertical, "vertical");
3564 defsymbol (&QnoBreak, "noBreak");
3565 defsymbol (&Qfraction, "fraction");
3566 defsymbol (&Qsuper, "super");
3567 defsymbol (&Qsub, "sub");
3568 defsymbol (&Qcircle, "circle");
3569 defsymbol (&Qsquare, "square");
3570 defsymbol (&Qwide, "wide");
3571 defsymbol (&Qnarrow, "narrow");
3572 defsymbol (&Qsmall, "small");
3573 defsymbol (&Qfont, "font");
3574 defsymbol (&Qucs, "ucs");
3575 defsymbol (&Qucs_bmp, "ucs-bmp");
3576 defsymbol (&Qlatin_viscii, "latin-viscii");
3577 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3578 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3579 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3580 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3581 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3582 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
3583 defsymbol (&Qmojikyo, "mojikyo");
3584 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
3585 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
3586 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
3587 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
3588 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
3589 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
3590 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
3591 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
3592 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
3593 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
3594 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
3595 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
3596 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
3597 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
3598 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
3599 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
3600 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
3601 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
3602 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
3603 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
3604 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
3605 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3607 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3608 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3610 defsymbol (&Qcomposite, "composite");
3614 vars_of_mule_charset (void)
3621 chlook = xnew (struct charset_lookup);
3622 dumpstruct (&chlook, &charset_lookup_description);
3624 /* Table of charsets indexed by leading byte. */
3625 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3626 chlook->charset_by_leading_byte[i] = Qnil;
3629 /* Table of charsets indexed by type/final-byte. */
3630 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3631 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3632 chlook->charset_by_attributes[i][j] = Qnil;
3634 /* Table of charsets indexed by type/final-byte/direction. */
3635 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3636 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3637 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3638 chlook->charset_by_attributes[i][j][k] = Qnil;
3642 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3644 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3645 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3649 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3650 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3651 Leading-code of private TYPE9N charset of column-width 1.
3653 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3657 Vutf_2000_version = build_string("0.16 (ÅŒji)");
3658 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
3659 Version number of UTF-2000.
3662 staticpro (&Vcharacter_composition_table);
3663 Vcharacter_composition_table = make_char_id_table (Qnil);
3665 staticpro (&Vcharacter_variant_table);
3666 Vcharacter_variant_table = make_char_id_table (Qnil);
3668 Vdefault_coded_charset_priority_list = Qnil;
3669 DEFVAR_LISP ("default-coded-charset-priority-list",
3670 &Vdefault_coded_charset_priority_list /*
3671 Default order of preferred coded-character-sets.
3677 complex_vars_of_mule_charset (void)
3679 staticpro (&Vcharset_hash_table);
3680 Vcharset_hash_table =
3681 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3683 /* Predefined character sets. We store them into variables for
3687 staticpro (&Vchar_attribute_hash_table);
3688 Vchar_attribute_hash_table
3689 = make_lisp_hash_table (16, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3691 staticpro (&Vcharset_ucs);
3693 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
3694 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3695 build_string ("UCS"),
3696 build_string ("UCS"),
3697 build_string ("ISO/IEC 10646"),
3699 Qnil, 0, 0xFFFFFFF, 0, 0);
3700 staticpro (&Vcharset_ucs_bmp);
3702 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3703 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3704 build_string ("BMP"),
3705 build_string ("BMP"),
3706 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3707 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
3708 Qnil, 0, 0xFFFF, 0, 0);
3710 # define MIN_CHAR_THAI 0
3711 # define MAX_CHAR_THAI 0
3712 # define MIN_CHAR_HEBREW 0
3713 # define MAX_CHAR_HEBREW 0
3714 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3715 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3717 staticpro (&Vcharset_ascii);
3719 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3720 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3721 build_string ("ASCII"),
3722 build_string ("ASCII)"),
3723 build_string ("ASCII (ISO646 IRV)"),
3724 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3725 Qnil, 0, 0x7F, 0, 0);
3726 staticpro (&Vcharset_control_1);
3727 Vcharset_control_1 =
3728 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3729 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3730 build_string ("C1"),
3731 build_string ("Control characters"),
3732 build_string ("Control characters 128-191"),
3734 Qnil, 0x80, 0x9F, 0, 0);
3735 staticpro (&Vcharset_latin_iso8859_1);
3736 Vcharset_latin_iso8859_1 =
3737 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3738 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3739 build_string ("Latin-1"),
3740 build_string ("ISO8859-1 (Latin-1)"),
3741 build_string ("ISO8859-1 (Latin-1)"),
3742 build_string ("iso8859-1"),
3743 Qnil, 0xA0, 0xFF, 0, 32);
3744 staticpro (&Vcharset_latin_iso8859_2);
3745 Vcharset_latin_iso8859_2 =
3746 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3747 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3748 build_string ("Latin-2"),
3749 build_string ("ISO8859-2 (Latin-2)"),
3750 build_string ("ISO8859-2 (Latin-2)"),
3751 build_string ("iso8859-2"),
3753 staticpro (&Vcharset_latin_iso8859_3);
3754 Vcharset_latin_iso8859_3 =
3755 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3756 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3757 build_string ("Latin-3"),
3758 build_string ("ISO8859-3 (Latin-3)"),
3759 build_string ("ISO8859-3 (Latin-3)"),
3760 build_string ("iso8859-3"),
3762 staticpro (&Vcharset_latin_iso8859_4);
3763 Vcharset_latin_iso8859_4 =
3764 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3765 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3766 build_string ("Latin-4"),
3767 build_string ("ISO8859-4 (Latin-4)"),
3768 build_string ("ISO8859-4 (Latin-4)"),
3769 build_string ("iso8859-4"),
3771 staticpro (&Vcharset_thai_tis620);
3772 Vcharset_thai_tis620 =
3773 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3774 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3775 build_string ("TIS620"),
3776 build_string ("TIS620 (Thai)"),
3777 build_string ("TIS620.2529 (Thai)"),
3778 build_string ("tis620"),
3779 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3780 staticpro (&Vcharset_greek_iso8859_7);
3781 Vcharset_greek_iso8859_7 =
3782 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3783 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3784 build_string ("ISO8859-7"),
3785 build_string ("ISO8859-7 (Greek)"),
3786 build_string ("ISO8859-7 (Greek)"),
3787 build_string ("iso8859-7"),
3789 0 /* MIN_CHAR_GREEK */,
3790 0 /* MAX_CHAR_GREEK */, 0, 32);
3791 staticpro (&Vcharset_arabic_iso8859_6);
3792 Vcharset_arabic_iso8859_6 =
3793 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3794 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3795 build_string ("ISO8859-6"),
3796 build_string ("ISO8859-6 (Arabic)"),
3797 build_string ("ISO8859-6 (Arabic)"),
3798 build_string ("iso8859-6"),
3800 staticpro (&Vcharset_hebrew_iso8859_8);
3801 Vcharset_hebrew_iso8859_8 =
3802 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3803 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3804 build_string ("ISO8859-8"),
3805 build_string ("ISO8859-8 (Hebrew)"),
3806 build_string ("ISO8859-8 (Hebrew)"),
3807 build_string ("iso8859-8"),
3808 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3809 staticpro (&Vcharset_katakana_jisx0201);
3810 Vcharset_katakana_jisx0201 =
3811 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3812 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3813 build_string ("JISX0201 Kana"),
3814 build_string ("JISX0201.1976 (Japanese Kana)"),
3815 build_string ("JISX0201.1976 Japanese Kana"),
3816 build_string ("jisx0201\\.1976"),
3818 staticpro (&Vcharset_latin_jisx0201);
3819 Vcharset_latin_jisx0201 =
3820 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3821 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3822 build_string ("JISX0201 Roman"),
3823 build_string ("JISX0201.1976 (Japanese Roman)"),
3824 build_string ("JISX0201.1976 Japanese Roman"),
3825 build_string ("jisx0201\\.1976"),
3827 staticpro (&Vcharset_cyrillic_iso8859_5);
3828 Vcharset_cyrillic_iso8859_5 =
3829 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3830 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3831 build_string ("ISO8859-5"),
3832 build_string ("ISO8859-5 (Cyrillic)"),
3833 build_string ("ISO8859-5 (Cyrillic)"),
3834 build_string ("iso8859-5"),
3836 0 /* MIN_CHAR_CYRILLIC */,
3837 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3838 staticpro (&Vcharset_latin_iso8859_9);
3839 Vcharset_latin_iso8859_9 =
3840 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3841 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3842 build_string ("Latin-5"),
3843 build_string ("ISO8859-9 (Latin-5)"),
3844 build_string ("ISO8859-9 (Latin-5)"),
3845 build_string ("iso8859-9"),
3847 staticpro (&Vcharset_japanese_jisx0208_1978);
3848 Vcharset_japanese_jisx0208_1978 =
3849 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3850 Qjapanese_jisx0208_1978, 94, 2,
3851 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3852 build_string ("JIS X0208:1978"),
3853 build_string ("JIS X0208:1978 (Japanese)"),
3855 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3856 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3858 staticpro (&Vcharset_chinese_gb2312);
3859 Vcharset_chinese_gb2312 =
3860 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3861 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3862 build_string ("GB2312"),
3863 build_string ("GB2312)"),
3864 build_string ("GB2312 Chinese simplified"),
3865 build_string ("gb2312"),
3867 staticpro (&Vcharset_japanese_jisx0208);
3868 Vcharset_japanese_jisx0208 =
3869 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3870 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3871 build_string ("JISX0208"),
3872 build_string ("JIS X0208:1983 (Japanese)"),
3873 build_string ("JIS X0208:1983 Japanese Kanji"),
3874 build_string ("jisx0208\\.1983"),
3877 staticpro (&Vcharset_japanese_jisx0208_1990);
3878 Vcharset_japanese_jisx0208_1990 =
3879 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3880 Qjapanese_jisx0208_1990, 94, 2,
3881 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3882 build_string ("JISX0208-1990"),
3883 build_string ("JIS X0208:1990 (Japanese)"),
3884 build_string ("JIS X0208:1990 Japanese Kanji"),
3885 build_string ("jisx0208\\.1990"),
3887 MIN_CHAR_JIS_X0208_1990,
3888 MAX_CHAR_JIS_X0208_1990, 0, 33);
3890 staticpro (&Vcharset_korean_ksc5601);
3891 Vcharset_korean_ksc5601 =
3892 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3893 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3894 build_string ("KSC5601"),
3895 build_string ("KSC5601 (Korean"),
3896 build_string ("KSC5601 Korean Hangul and Hanja"),
3897 build_string ("ksc5601"),
3899 staticpro (&Vcharset_japanese_jisx0212);
3900 Vcharset_japanese_jisx0212 =
3901 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3902 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3903 build_string ("JISX0212"),
3904 build_string ("JISX0212 (Japanese)"),
3905 build_string ("JISX0212 Japanese Supplement"),
3906 build_string ("jisx0212"),
3909 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3910 staticpro (&Vcharset_chinese_cns11643_1);
3911 Vcharset_chinese_cns11643_1 =
3912 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3913 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3914 build_string ("CNS11643-1"),
3915 build_string ("CNS11643-1 (Chinese traditional)"),
3917 ("CNS 11643 Plane 1 Chinese traditional"),
3918 build_string (CHINESE_CNS_PLANE_RE("1")),
3920 staticpro (&Vcharset_chinese_cns11643_2);
3921 Vcharset_chinese_cns11643_2 =
3922 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3923 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3924 build_string ("CNS11643-2"),
3925 build_string ("CNS11643-2 (Chinese traditional)"),
3927 ("CNS 11643 Plane 2 Chinese traditional"),
3928 build_string (CHINESE_CNS_PLANE_RE("2")),
3931 staticpro (&Vcharset_latin_tcvn5712);
3932 Vcharset_latin_tcvn5712 =
3933 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3934 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3935 build_string ("TCVN 5712"),
3936 build_string ("TCVN 5712 (VSCII-2)"),
3937 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3938 build_string ("tcvn5712-1"),
3940 staticpro (&Vcharset_latin_viscii_lower);
3941 Vcharset_latin_viscii_lower =
3942 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3943 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3944 build_string ("VISCII lower"),
3945 build_string ("VISCII lower (Vietnamese)"),
3946 build_string ("VISCII lower (Vietnamese)"),
3947 build_string ("MULEVISCII-LOWER"),
3949 staticpro (&Vcharset_latin_viscii_upper);
3950 Vcharset_latin_viscii_upper =
3951 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3952 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3953 build_string ("VISCII upper"),
3954 build_string ("VISCII upper (Vietnamese)"),
3955 build_string ("VISCII upper (Vietnamese)"),
3956 build_string ("MULEVISCII-UPPER"),
3958 staticpro (&Vcharset_latin_viscii);
3959 Vcharset_latin_viscii =
3960 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3961 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3962 build_string ("VISCII"),
3963 build_string ("VISCII 1.1 (Vietnamese)"),
3964 build_string ("VISCII 1.1 (Vietnamese)"),
3965 build_string ("VISCII1\\.1"),
3967 staticpro (&Vcharset_ideograph_daikanwa);
3968 Vcharset_ideograph_daikanwa =
3969 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
3970 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3971 build_string ("Daikanwa"),
3972 build_string ("Morohashi's Daikanwa"),
3973 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3974 build_string ("Daikanwa"),
3975 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3976 staticpro (&Vcharset_mojikyo);
3978 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
3979 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3980 build_string ("Mojikyo"),
3981 build_string ("Mojikyo"),
3982 build_string ("Konjaku-Mojikyo"),
3984 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
3985 staticpro (&Vcharset_mojikyo_pj_1);
3986 Vcharset_mojikyo_pj_1 =
3987 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1, 94, 2,
3988 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3989 build_string ("Mojikyo-PJ-1"),
3990 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
3992 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
3993 build_string ("jisx0208\\.Mojikyo-1$"),
3995 staticpro (&Vcharset_mojikyo_pj_2);
3996 Vcharset_mojikyo_pj_2 =
3997 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2, 94, 2,
3998 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3999 build_string ("Mojikyo-PJ-2"),
4000 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
4002 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
4003 build_string ("jisx0208\\.Mojikyo-2$"),
4005 staticpro (&Vcharset_mojikyo_pj_3);
4006 Vcharset_mojikyo_pj_3 =
4007 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3, 94, 2,
4008 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4009 build_string ("Mojikyo-PJ-3"),
4010 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
4012 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
4013 build_string ("jisx0208\\.Mojikyo-3$"),
4015 staticpro (&Vcharset_mojikyo_pj_4);
4016 Vcharset_mojikyo_pj_4 =
4017 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4, 94, 2,
4018 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4019 build_string ("Mojikyo-PJ-4"),
4020 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
4022 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
4023 build_string ("jisx0208\\.Mojikyo-4$"),
4025 staticpro (&Vcharset_mojikyo_pj_5);
4026 Vcharset_mojikyo_pj_5 =
4027 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5, 94, 2,
4028 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4029 build_string ("Mojikyo-PJ-5"),
4030 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
4032 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
4033 build_string ("jisx0208\\.Mojikyo-5$"),
4035 staticpro (&Vcharset_mojikyo_pj_6);
4036 Vcharset_mojikyo_pj_6 =
4037 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6, 94, 2,
4038 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4039 build_string ("Mojikyo-PJ-6"),
4040 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
4042 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
4043 build_string ("jisx0208\\.Mojikyo-6$"),
4045 staticpro (&Vcharset_mojikyo_pj_7);
4046 Vcharset_mojikyo_pj_7 =
4047 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7, 94, 2,
4048 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4049 build_string ("Mojikyo-PJ-7"),
4050 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
4052 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
4053 build_string ("jisx0208\\.Mojikyo-7$"),
4055 staticpro (&Vcharset_mojikyo_pj_8);
4056 Vcharset_mojikyo_pj_8 =
4057 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8, 94, 2,
4058 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4059 build_string ("Mojikyo-PJ-8"),
4060 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
4062 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
4063 build_string ("jisx0208\\.Mojikyo-8$"),
4065 staticpro (&Vcharset_mojikyo_pj_9);
4066 Vcharset_mojikyo_pj_9 =
4067 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9, 94, 2,
4068 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4069 build_string ("Mojikyo-PJ-9"),
4070 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
4072 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
4073 build_string ("jisx0208\\.Mojikyo-9$"),
4075 staticpro (&Vcharset_mojikyo_pj_10);
4076 Vcharset_mojikyo_pj_10 =
4077 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10, 94, 2,
4078 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4079 build_string ("Mojikyo-PJ-10"),
4080 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
4082 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
4083 build_string ("jisx0208\\.Mojikyo-10$"),
4085 staticpro (&Vcharset_mojikyo_pj_11);
4086 Vcharset_mojikyo_pj_11 =
4087 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11, 94, 2,
4088 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4089 build_string ("Mojikyo-PJ-11"),
4090 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
4092 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
4093 build_string ("jisx0208\\.Mojikyo-11$"),
4095 staticpro (&Vcharset_mojikyo_pj_12);
4096 Vcharset_mojikyo_pj_12 =
4097 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12, 94, 2,
4098 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4099 build_string ("Mojikyo-PJ-12"),
4100 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
4102 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
4103 build_string ("jisx0208\\.Mojikyo-12$"),
4105 staticpro (&Vcharset_mojikyo_pj_13);
4106 Vcharset_mojikyo_pj_13 =
4107 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13, 94, 2,
4108 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4109 build_string ("Mojikyo-PJ-13"),
4110 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
4112 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
4113 build_string ("jisx0208\\.Mojikyo-13$"),
4115 staticpro (&Vcharset_mojikyo_pj_14);
4116 Vcharset_mojikyo_pj_14 =
4117 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14, 94, 2,
4118 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4119 build_string ("Mojikyo-PJ-14"),
4120 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
4122 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
4123 build_string ("jisx0208\\.Mojikyo-14$"),
4125 staticpro (&Vcharset_mojikyo_pj_15);
4126 Vcharset_mojikyo_pj_15 =
4127 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15, 94, 2,
4128 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4129 build_string ("Mojikyo-PJ-15"),
4130 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
4132 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
4133 build_string ("jisx0208\\.Mojikyo-15$"),
4135 staticpro (&Vcharset_mojikyo_pj_16);
4136 Vcharset_mojikyo_pj_16 =
4137 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16, 94, 2,
4138 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4139 build_string ("Mojikyo-PJ-16"),
4140 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
4142 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
4143 build_string ("jisx0208\\.Mojikyo-16$"),
4145 staticpro (&Vcharset_mojikyo_pj_17);
4146 Vcharset_mojikyo_pj_17 =
4147 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17, 94, 2,
4148 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4149 build_string ("Mojikyo-PJ-17"),
4150 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
4152 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
4153 build_string ("jisx0208\\.Mojikyo-17$"),
4155 staticpro (&Vcharset_mojikyo_pj_18);
4156 Vcharset_mojikyo_pj_18 =
4157 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18, 94, 2,
4158 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4159 build_string ("Mojikyo-PJ-18"),
4160 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
4162 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
4163 build_string ("jisx0208\\.Mojikyo-18$"),
4165 staticpro (&Vcharset_mojikyo_pj_19);
4166 Vcharset_mojikyo_pj_19 =
4167 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19, 94, 2,
4168 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4169 build_string ("Mojikyo-PJ-19"),
4170 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
4172 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
4173 build_string ("jisx0208\\.Mojikyo-19$"),
4175 staticpro (&Vcharset_mojikyo_pj_20);
4176 Vcharset_mojikyo_pj_20 =
4177 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20, 94, 2,
4178 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4179 build_string ("Mojikyo-PJ-20"),
4180 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
4182 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
4183 build_string ("jisx0208\\.Mojikyo-20$"),
4185 staticpro (&Vcharset_mojikyo_pj_21);
4186 Vcharset_mojikyo_pj_21 =
4187 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21, 94, 2,
4188 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4189 build_string ("Mojikyo-PJ-21"),
4190 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
4192 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
4193 build_string ("jisx0208\\.Mojikyo-21$"),
4195 staticpro (&Vcharset_ethiopic_ucs);
4196 Vcharset_ethiopic_ucs =
4197 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
4198 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4199 build_string ("Ethiopic (UCS)"),
4200 build_string ("Ethiopic (UCS)"),
4201 build_string ("Ethiopic of UCS"),
4202 build_string ("Ethiopic-Unicode"),
4203 Qnil, 0x1200, 0x137F, 0x1200, 0);
4205 staticpro (&Vcharset_chinese_big5_1);
4206 Vcharset_chinese_big5_1 =
4207 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
4208 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
4209 build_string ("Big5"),
4210 build_string ("Big5 (Level-1)"),
4212 ("Big5 Level-1 Chinese traditional"),
4213 build_string ("big5"),
4215 staticpro (&Vcharset_chinese_big5_2);
4216 Vcharset_chinese_big5_2 =
4217 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
4218 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
4219 build_string ("Big5"),
4220 build_string ("Big5 (Level-2)"),
4222 ("Big5 Level-2 Chinese traditional"),
4223 build_string ("big5"),
4226 #ifdef ENABLE_COMPOSITE_CHARS
4227 /* #### For simplicity, we put composite chars into a 96x96 charset.
4228 This is going to lead to problems because you can run out of
4229 room, esp. as we don't yet recycle numbers. */
4230 staticpro (&Vcharset_composite);
4231 Vcharset_composite =
4232 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
4233 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4234 build_string ("Composite"),
4235 build_string ("Composite characters"),
4236 build_string ("Composite characters"),
4239 /* #### not dumped properly */
4240 composite_char_row_next = 32;
4241 composite_char_col_next = 32;
4243 Vcomposite_char_string2char_hash_table =
4244 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
4245 Vcomposite_char_char2string_hash_table =
4246 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
4247 staticpro (&Vcomposite_char_string2char_hash_table);
4248 staticpro (&Vcomposite_char_char2string_hash_table);
4249 #endif /* ENABLE_COMPOSITE_CHARS */