1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_japanese_jisx0208;
60 Lisp_Object Vcharset_japanese_jisx0208_1990;
61 Lisp_Object Vcharset_korean_ksc5601;
62 Lisp_Object Vcharset_japanese_jisx0212;
63 Lisp_Object Vcharset_chinese_cns11643_1;
64 Lisp_Object Vcharset_chinese_cns11643_2;
66 Lisp_Object Vcharset_ucs;
67 Lisp_Object Vcharset_ucs_bmp;
68 Lisp_Object Vcharset_latin_viscii;
69 Lisp_Object Vcharset_latin_tcvn5712;
70 Lisp_Object Vcharset_latin_viscii_lower;
71 Lisp_Object Vcharset_latin_viscii_upper;
72 Lisp_Object Vcharset_ideograph_daikanwa;
73 Lisp_Object Vcharset_mojikyo;
74 Lisp_Object Vcharset_mojikyo_pj_1;
75 Lisp_Object Vcharset_mojikyo_pj_2;
76 Lisp_Object Vcharset_mojikyo_pj_3;
77 Lisp_Object Vcharset_mojikyo_pj_4;
78 Lisp_Object Vcharset_mojikyo_pj_5;
79 Lisp_Object Vcharset_mojikyo_pj_6;
80 Lisp_Object Vcharset_mojikyo_pj_7;
81 Lisp_Object Vcharset_mojikyo_pj_8;
82 Lisp_Object Vcharset_mojikyo_pj_9;
83 Lisp_Object Vcharset_mojikyo_pj_10;
84 Lisp_Object Vcharset_mojikyo_pj_11;
85 Lisp_Object Vcharset_mojikyo_pj_12;
86 Lisp_Object Vcharset_mojikyo_pj_13;
87 Lisp_Object Vcharset_mojikyo_pj_14;
88 Lisp_Object Vcharset_mojikyo_pj_15;
89 Lisp_Object Vcharset_mojikyo_pj_16;
90 Lisp_Object Vcharset_mojikyo_pj_17;
91 Lisp_Object Vcharset_mojikyo_pj_18;
92 Lisp_Object Vcharset_mojikyo_pj_19;
93 Lisp_Object Vcharset_mojikyo_pj_20;
94 Lisp_Object Vcharset_mojikyo_pj_21;
95 Lisp_Object Vcharset_ethiopic_ucs;
97 Lisp_Object Vcharset_chinese_big5_1;
98 Lisp_Object Vcharset_chinese_big5_2;
100 #ifdef ENABLE_COMPOSITE_CHARS
101 Lisp_Object Vcharset_composite;
103 /* Hash tables for composite chars. One maps string representing
104 composed chars to their equivalent chars; one goes the
106 Lisp_Object Vcomposite_char_char2string_hash_table;
107 Lisp_Object Vcomposite_char_string2char_hash_table;
109 static int composite_char_row_next;
110 static int composite_char_col_next;
112 #endif /* ENABLE_COMPOSITE_CHARS */
114 struct charset_lookup *chlook;
116 static const struct lrecord_description charset_lookup_description_1[] = {
117 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
126 static const struct struct_description charset_lookup_description = {
127 sizeof (struct charset_lookup),
128 charset_lookup_description_1
132 /* Table of number of bytes in the string representation of a character
133 indexed by the first byte of that representation.
135 rep_bytes_by_first_byte(c) is more efficient than the equivalent
136 canonical computation:
138 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
140 const Bytecount rep_bytes_by_first_byte[0xA0] =
141 { /* 0x00 - 0x7f are for straight ASCII */
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
147 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
148 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
149 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
150 /* 0x80 - 0x8f are for Dimension-1 official charsets */
152 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
154 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
156 /* 0x90 - 0x9d are for Dimension-2 official charsets */
157 /* 0x9e is for Dimension-1 private charsets */
158 /* 0x9f is for Dimension-2 private charsets */
159 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
165 #define BT_UINT8_MIN 0
166 #define BT_UINT8_MAX (UCHAR_MAX - 3)
167 #define BT_UINT8_t (UCHAR_MAX - 2)
168 #define BT_UINT8_nil (UCHAR_MAX - 1)
169 #define BT_UINT8_unbound UCHAR_MAX
171 INLINE_HEADER int INT_UINT8_P (Lisp_Object obj);
172 INLINE_HEADER int UINT8_VALUE_P (Lisp_Object obj);
173 INLINE_HEADER unsigned char UINT8_ENCODE (Lisp_Object obj);
174 INLINE_HEADER Lisp_Object UINT8_DECODE (unsigned char n);
175 INLINE_HEADER unsigned short UINT8_TO_UINT16 (unsigned char n);
178 INT_UINT8_P (Lisp_Object obj)
182 int num = XINT (obj);
184 return (BT_UINT8_MIN <= num) && (num <= BT_UINT8_MAX);
191 UINT8_VALUE_P (Lisp_Object obj)
193 return EQ (obj, Qunbound)
194 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT8_P (obj);
197 INLINE_HEADER unsigned char
198 UINT8_ENCODE (Lisp_Object obj)
200 if (EQ (obj, Qunbound))
201 return BT_UINT8_unbound;
202 else if (EQ (obj, Qnil))
204 else if (EQ (obj, Qt))
210 INLINE_HEADER Lisp_Object
211 UINT8_DECODE (unsigned char n)
213 if (n == BT_UINT8_unbound)
215 else if (n == BT_UINT8_nil)
217 else if (n == BT_UINT8_t)
224 mark_uint8_byte_table (Lisp_Object obj)
230 print_uint8_byte_table (Lisp_Object obj,
231 Lisp_Object printcharfun, int escapeflag)
233 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
235 struct gcpro gcpro1, gcpro2;
236 GCPRO2 (obj, printcharfun);
238 write_c_string ("\n#<uint8-byte-table", printcharfun);
239 for (i = 0; i < 256; i++)
241 unsigned char n = bte->property[i];
243 write_c_string ("\n ", printcharfun);
244 write_c_string (" ", printcharfun);
245 if (n == BT_UINT8_unbound)
246 write_c_string ("void", printcharfun);
247 else if (n == BT_UINT8_nil)
248 write_c_string ("nil", printcharfun);
249 else if (n == BT_UINT8_t)
250 write_c_string ("t", printcharfun);
255 sprintf (buf, "%hd", n);
256 write_c_string (buf, printcharfun);
260 write_c_string (">", printcharfun);
264 uint8_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
266 Lisp_Uint8_Byte_Table *te1 = XUINT8_BYTE_TABLE (obj1);
267 Lisp_Uint8_Byte_Table *te2 = XUINT8_BYTE_TABLE (obj2);
270 for (i = 0; i < 256; i++)
271 if (te1->property[i] != te2->property[i])
277 uint8_byte_table_hash (Lisp_Object obj, int depth)
279 Lisp_Uint8_Byte_Table *te = XUINT8_BYTE_TABLE (obj);
283 for (i = 0; i < 256; i++)
284 hash = HASH2 (hash, te->property[i]);
288 DEFINE_LRECORD_IMPLEMENTATION ("uint8-byte-table", uint8_byte_table,
289 mark_uint8_byte_table,
290 print_uint8_byte_table,
291 0, uint8_byte_table_equal,
292 uint8_byte_table_hash,
293 0 /* uint8_byte_table_description */,
294 Lisp_Uint8_Byte_Table);
297 make_uint8_byte_table (unsigned char initval)
301 Lisp_Uint8_Byte_Table *cte;
303 cte = alloc_lcrecord_type (Lisp_Uint8_Byte_Table,
304 &lrecord_uint8_byte_table);
306 for (i = 0; i < 256; i++)
307 cte->property[i] = initval;
309 XSETUINT8_BYTE_TABLE (obj, cte);
314 uint8_byte_table_same_value_p (Lisp_Object obj)
316 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
317 unsigned char v0 = bte->property[0];
320 for (i = 1; i < 256; i++)
322 if (bte->property[i] != v0)
329 #define BT_UINT16_MIN 0
330 #define BT_UINT16_MAX (USHRT_MAX - 3)
331 #define BT_UINT16_t (USHRT_MAX - 2)
332 #define BT_UINT16_nil (USHRT_MAX - 1)
333 #define BT_UINT16_unbound USHRT_MAX
335 INLINE_HEADER int INT_UINT16_P (Lisp_Object obj);
336 INLINE_HEADER int UINT16_VALUE_P (Lisp_Object obj);
337 INLINE_HEADER unsigned short UINT16_ENCODE (Lisp_Object obj);
338 INLINE_HEADER Lisp_Object UINT16_DECODE (unsigned short us);
341 INT_UINT16_P (Lisp_Object obj)
345 int num = XINT (obj);
347 return (BT_UINT16_MIN <= num) && (num <= BT_UINT16_MAX);
354 UINT16_VALUE_P (Lisp_Object obj)
356 return EQ (obj, Qunbound)
357 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT16_P (obj);
360 INLINE_HEADER unsigned short
361 UINT16_ENCODE (Lisp_Object obj)
363 if (EQ (obj, Qunbound))
364 return BT_UINT16_unbound;
365 else if (EQ (obj, Qnil))
366 return BT_UINT16_nil;
367 else if (EQ (obj, Qt))
373 INLINE_HEADER Lisp_Object
374 UINT16_DECODE (unsigned short n)
376 if (n == BT_UINT16_unbound)
378 else if (n == BT_UINT16_nil)
380 else if (n == BT_UINT16_t)
386 INLINE_HEADER unsigned short
387 UINT8_TO_UINT16 (unsigned char n)
389 if (n == BT_UINT8_unbound)
390 return BT_UINT16_unbound;
391 else if (n == BT_UINT8_nil)
392 return BT_UINT16_nil;
393 else if (n == BT_UINT8_t)
400 mark_uint16_byte_table (Lisp_Object obj)
406 print_uint16_byte_table (Lisp_Object obj,
407 Lisp_Object printcharfun, int escapeflag)
409 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
411 struct gcpro gcpro1, gcpro2;
412 GCPRO2 (obj, printcharfun);
414 write_c_string ("\n#<uint16-byte-table", printcharfun);
415 for (i = 0; i < 256; i++)
417 unsigned short n = bte->property[i];
419 write_c_string ("\n ", printcharfun);
420 write_c_string (" ", printcharfun);
421 if (n == BT_UINT16_unbound)
422 write_c_string ("void", printcharfun);
423 else if (n == BT_UINT16_nil)
424 write_c_string ("nil", printcharfun);
425 else if (n == BT_UINT16_t)
426 write_c_string ("t", printcharfun);
431 sprintf (buf, "%hd", n);
432 write_c_string (buf, printcharfun);
436 write_c_string (">", printcharfun);
440 uint16_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
442 Lisp_Uint16_Byte_Table *te1 = XUINT16_BYTE_TABLE (obj1);
443 Lisp_Uint16_Byte_Table *te2 = XUINT16_BYTE_TABLE (obj2);
446 for (i = 0; i < 256; i++)
447 if (te1->property[i] != te2->property[i])
453 uint16_byte_table_hash (Lisp_Object obj, int depth)
455 Lisp_Uint16_Byte_Table *te = XUINT16_BYTE_TABLE (obj);
459 for (i = 0; i < 256; i++)
460 hash = HASH2 (hash, te->property[i]);
464 DEFINE_LRECORD_IMPLEMENTATION ("uint16-byte-table", uint16_byte_table,
465 mark_uint16_byte_table,
466 print_uint16_byte_table,
467 0, uint16_byte_table_equal,
468 uint16_byte_table_hash,
469 0 /* uint16_byte_table_description */,
470 Lisp_Uint16_Byte_Table);
473 make_uint16_byte_table (unsigned short initval)
477 Lisp_Uint16_Byte_Table *cte;
479 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
480 &lrecord_uint16_byte_table);
482 for (i = 0; i < 256; i++)
483 cte->property[i] = initval;
485 XSETUINT16_BYTE_TABLE (obj, cte);
490 uint16_byte_table_same_value_p (Lisp_Object obj)
492 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
493 unsigned short v0 = bte->property[0];
496 for (i = 1; i < 256; i++)
498 if (bte->property[i] != v0)
506 mark_byte_table (Lisp_Object obj)
508 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
511 for (i = 0; i < 256; i++)
513 mark_object (cte->property[i]);
519 print_byte_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
521 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
523 struct gcpro gcpro1, gcpro2;
524 GCPRO2 (obj, printcharfun);
526 write_c_string ("\n#<byte-table", printcharfun);
527 for (i = 0; i < 256; i++)
529 Lisp_Object elt = bte->property[i];
531 write_c_string ("\n ", printcharfun);
532 write_c_string (" ", printcharfun);
533 if (EQ (elt, Qunbound))
534 write_c_string ("void", printcharfun);
536 print_internal (elt, printcharfun, escapeflag);
539 write_c_string (">", printcharfun);
543 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
545 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
546 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
549 for (i = 0; i < 256; i++)
550 if (BYTE_TABLE_P (cte1->property[i]))
552 if (BYTE_TABLE_P (cte2->property[i]))
554 if (!byte_table_equal (cte1->property[i],
555 cte2->property[i], depth + 1))
562 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
568 byte_table_hash (Lisp_Object obj, int depth)
570 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
572 return internal_array_hash (cte->property, 256, depth);
575 static const struct lrecord_description byte_table_description[] = {
576 { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Byte_Table, property), 256 },
580 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
585 byte_table_description,
589 make_byte_table (Lisp_Object initval)
593 Lisp_Byte_Table *cte;
595 cte = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
597 for (i = 0; i < 256; i++)
598 cte->property[i] = initval;
600 XSETBYTE_TABLE (obj, cte);
605 byte_table_same_value_p (Lisp_Object obj)
607 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
608 Lisp_Object v0 = bte->property[0];
611 for (i = 1; i < 256; i++)
613 if (!EQ (bte->property[i], v0))
620 Lisp_Object get_byte_table (Lisp_Object table, unsigned char idx);
621 Lisp_Object put_byte_table (Lisp_Object table, unsigned char idx,
625 get_byte_table (Lisp_Object table, unsigned char idx)
627 if (UINT8_BYTE_TABLE_P (table))
628 return UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[idx]);
629 else if (UINT16_BYTE_TABLE_P (table))
630 return UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[idx]);
631 else if (BYTE_TABLE_P (table))
632 return XBYTE_TABLE(table)->property[idx];
638 put_byte_table (Lisp_Object table, unsigned char idx, Lisp_Object value)
640 if (UINT8_BYTE_TABLE_P (table))
642 if (UINT8_VALUE_P (value))
644 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
645 if (!UINT8_BYTE_TABLE_P (value) &&
646 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
647 && uint8_byte_table_same_value_p (table))
652 else if (UINT16_VALUE_P (value))
654 Lisp_Object new = make_uint16_byte_table (Qnil);
657 for (i = 0; i < 256; i++)
659 XUINT16_BYTE_TABLE(new)->property[i]
660 = UINT8_TO_UINT16 (XUINT8_BYTE_TABLE(table)->property[i]);
662 XUINT16_BYTE_TABLE(new)->property[idx] = UINT16_ENCODE (value);
667 Lisp_Object new = make_byte_table (Qnil);
670 for (i = 0; i < 256; i++)
672 XBYTE_TABLE(new)->property[i]
673 = UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[i]);
675 XBYTE_TABLE(new)->property[idx] = value;
679 else if (UINT16_BYTE_TABLE_P (table))
681 if (UINT16_VALUE_P (value))
683 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
684 if (!UINT8_BYTE_TABLE_P (value) &&
685 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
686 && uint16_byte_table_same_value_p (table))
693 Lisp_Object new = make_byte_table (Qnil);
696 for (i = 0; i < 256; i++)
698 XBYTE_TABLE(new)->property[i]
699 = UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[i]);
701 XBYTE_TABLE(new)->property[idx] = value;
705 else if (BYTE_TABLE_P (table))
707 XBYTE_TABLE(table)->property[idx] = value;
708 if (!UINT8_BYTE_TABLE_P (value) &&
709 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
710 && byte_table_same_value_p (table))
715 else if (!EQ (table, value))
717 if (UINT8_VALUE_P (table) && UINT8_VALUE_P (value))
719 table = make_uint8_byte_table (UINT8_ENCODE (table));
720 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
722 else if (UINT16_VALUE_P (table) && UINT16_VALUE_P (value))
724 table = make_uint16_byte_table (UINT16_ENCODE (table));
725 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
729 table = make_byte_table (table);
730 XBYTE_TABLE(table)->property[idx] = value;
737 mark_char_id_table (Lisp_Object obj)
739 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
745 print_char_id_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
747 Lisp_Object table = XCHAR_ID_TABLE (obj)->table;
749 struct gcpro gcpro1, gcpro2;
750 GCPRO2 (obj, printcharfun);
752 write_c_string ("#<char-id-table ", printcharfun);
753 for (i = 0; i < 256; i++)
755 Lisp_Object elt = get_byte_table (table, i);
756 if (i != 0) write_c_string ("\n ", printcharfun);
757 if (EQ (elt, Qunbound))
758 write_c_string ("void", printcharfun);
760 print_internal (elt, printcharfun, escapeflag);
763 write_c_string (">", printcharfun);
767 char_id_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
769 Lisp_Char_ID_Table *cte1 = XCHAR_ID_TABLE (obj1);
770 Lisp_Char_ID_Table *cte2 = XCHAR_ID_TABLE (obj2);
772 return byte_table_equal (cte1->table, cte2->table, depth + 1);
776 char_id_table_hash (Lisp_Object obj, int depth)
778 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
780 return char_id_table_hash (cte->table, depth + 1);
783 static const struct lrecord_description char_id_table_description[] = {
784 { XD_LISP_OBJECT, offsetof(Lisp_Char_ID_Table, table) },
788 DEFINE_LRECORD_IMPLEMENTATION ("char-id-table", char_id_table,
791 0, char_id_table_equal,
793 char_id_table_description,
797 make_char_id_table (Lisp_Object initval)
800 Lisp_Char_ID_Table *cte;
802 cte = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
804 cte->table = make_byte_table (initval);
806 XSETCHAR_ID_TABLE (obj, cte);
812 get_char_id_table (Emchar ch, Lisp_Object table)
814 unsigned int code = ch;
821 (XCHAR_ID_TABLE (table)->table,
822 (unsigned char)(code >> 24)),
823 (unsigned char) (code >> 16)),
824 (unsigned char) (code >> 8)),
825 (unsigned char) code);
828 void put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table);
830 put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table)
832 unsigned int code = ch;
833 Lisp_Object table1, table2, table3, table4;
835 table1 = XCHAR_ID_TABLE (table)->table;
836 table2 = get_byte_table (table1, (unsigned char)(code >> 24));
837 table3 = get_byte_table (table2, (unsigned char)(code >> 16));
838 table4 = get_byte_table (table3, (unsigned char)(code >> 8));
840 table4 = put_byte_table (table4, (unsigned char)code, value);
841 table3 = put_byte_table (table3, (unsigned char)(code >> 8), table4);
842 table2 = put_byte_table (table2, (unsigned char)(code >> 16), table3);
843 XCHAR_ID_TABLE (table)->table
844 = put_byte_table (table1, (unsigned char)(code >> 24), table2);
848 Lisp_Object Vchar_attribute_hash_table;
849 Lisp_Object Vcharacter_composition_table;
850 Lisp_Object Vcharacter_variant_table;
852 Lisp_Object Qideograph_daikanwa;
853 Lisp_Object Q_decomposition;
857 Lisp_Object Qisolated;
858 Lisp_Object Qinitial;
861 Lisp_Object Qvertical;
862 Lisp_Object QnoBreak;
863 Lisp_Object Qfraction;
873 Emchar to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
875 Lisp_Object put_char_ccs_code_point (Lisp_Object character,
876 Lisp_Object ccs, Lisp_Object value);
877 Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
880 to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
886 else if (EQ (v, Qcompat))
888 else if (EQ (v, Qisolated))
890 else if (EQ (v, Qinitial))
892 else if (EQ (v, Qmedial))
894 else if (EQ (v, Qfinal))
896 else if (EQ (v, Qvertical))
898 else if (EQ (v, QnoBreak))
900 else if (EQ (v, Qfraction))
902 else if (EQ (v, Qsuper))
904 else if (EQ (v, Qsub))
906 else if (EQ (v, Qcircle))
908 else if (EQ (v, Qsquare))
910 else if (EQ (v, Qwide))
912 else if (EQ (v, Qnarrow))
914 else if (EQ (v, Qsmall))
916 else if (EQ (v, Qfont))
919 signal_simple_error (err_msg, err_arg);
922 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
923 Return character corresponding with list.
927 Lisp_Object table = Vcharacter_composition_table;
928 Lisp_Object rest = list;
932 Lisp_Object v = Fcar (rest);
934 Emchar c = to_char_id (v, "Invalid value for composition", list);
936 ret = get_char_id_table (c, table);
941 if (!CHAR_ID_TABLE_P (ret))
946 else if (!CONSP (rest))
948 else if (CHAR_ID_TABLE_P (ret))
951 signal_simple_error ("Invalid table is found with", list);
953 signal_simple_error ("Invalid value for composition", list);
956 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
957 Return variants of CHARACTER.
961 CHECK_CHAR (character);
962 return Fcopy_list (get_char_id_table (XCHAR (character),
963 Vcharacter_variant_table));
967 /* We store the char-attributes in hash tables with the names as the
968 key and the actual char-id-table object as the value. Occasionally
969 we need to use them in a list format. These routines provide us
971 struct char_attribute_list_closure
973 Lisp_Object *char_attribute_list;
977 add_char_attribute_to_list_mapper (Lisp_Object key, Lisp_Object value,
978 void *char_attribute_list_closure)
980 /* This function can GC */
981 struct char_attribute_list_closure *calcl
982 = (struct char_attribute_list_closure*) char_attribute_list_closure;
983 Lisp_Object *char_attribute_list = calcl->char_attribute_list;
985 *char_attribute_list = Fcons (key, *char_attribute_list);
989 DEFUN ("char-attribute-list", Fchar_attribute_list, 0, 0, 0, /*
990 Return the list of all existing character attributes except coded-charsets.
994 Lisp_Object char_attribute_list = Qnil;
996 struct char_attribute_list_closure char_attribute_list_closure;
998 GCPRO1 (char_attribute_list);
999 char_attribute_list_closure.char_attribute_list = &char_attribute_list;
1000 elisp_maphash (add_char_attribute_to_list_mapper,
1001 Vchar_attribute_hash_table,
1002 &char_attribute_list_closure);
1004 return char_attribute_list;
1007 DEFUN ("find-char-attribute-table", Ffind_char_attribute_table, 1, 1, 0, /*
1008 Return char-id-table corresponding to ATTRIBUTE.
1012 return Fgethash (attribute, Vchar_attribute_hash_table, Qnil);
1016 /* We store the char-id-tables in hash tables with the attributes as
1017 the key and the actual char-id-table object as the value. Each
1018 char-id-table stores values of an attribute corresponding with
1019 characters. Occasionally we need to get attributes of a character
1020 in a association-list format. These routines provide us with
1022 struct char_attribute_alist_closure
1025 Lisp_Object *char_attribute_alist;
1029 add_char_attribute_alist_mapper (Lisp_Object key, Lisp_Object value,
1030 void *char_attribute_alist_closure)
1032 /* This function can GC */
1033 struct char_attribute_alist_closure *caacl =
1034 (struct char_attribute_alist_closure*) char_attribute_alist_closure;
1035 Lisp_Object ret = get_char_id_table (caacl->char_id, value);
1036 if (!UNBOUNDP (ret))
1038 Lisp_Object *char_attribute_alist = caacl->char_attribute_alist;
1039 *char_attribute_alist
1040 = Fcons (Fcons (key, ret), *char_attribute_alist);
1045 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
1046 Return the alist of attributes of CHARACTER.
1050 Lisp_Object alist = Qnil;
1053 CHECK_CHAR (character);
1055 struct gcpro gcpro1;
1056 struct char_attribute_alist_closure char_attribute_alist_closure;
1059 char_attribute_alist_closure.char_id = XCHAR (character);
1060 char_attribute_alist_closure.char_attribute_alist = &alist;
1061 elisp_maphash (add_char_attribute_alist_mapper,
1062 Vchar_attribute_hash_table,
1063 &char_attribute_alist_closure);
1067 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1069 Lisp_Object ccs = chlook->charset_by_leading_byte[i];
1073 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1076 if ( CHAR_ID_TABLE_P (encoding_table)
1077 && INTP (cpos = get_char_id_table (XCHAR (character),
1080 alist = Fcons (Fcons (ccs, cpos), alist);
1087 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
1088 Return the value of CHARACTER's ATTRIBUTE.
1090 (character, attribute))
1094 CHECK_CHAR (character);
1095 if (!NILP (ccs = Ffind_charset (attribute)))
1097 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1099 if (CHAR_ID_TABLE_P (encoding_table))
1100 return get_char_id_table (XCHAR (character), encoding_table);
1106 Lisp_Object table = Fgethash (attribute,
1107 Vchar_attribute_hash_table,
1109 if (!UNBOUNDP (table))
1111 Lisp_Object ret = get_char_id_table (XCHAR (character), table);
1112 if (!UNBOUNDP (ret))
1119 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
1120 Store CHARACTER's ATTRIBUTE with VALUE.
1122 (character, attribute, value))
1126 CHECK_CHAR (character);
1127 ccs = Ffind_charset (attribute);
1130 return put_char_ccs_code_point (character, ccs, value);
1132 else if (EQ (attribute, Q_decomposition))
1137 signal_simple_error ("Invalid value for ->decomposition",
1140 if (CONSP (Fcdr (value)))
1142 Lisp_Object rest = value;
1143 Lisp_Object table = Vcharacter_composition_table;
1147 GET_EXTERNAL_LIST_LENGTH (rest, len);
1148 seq = make_vector (len, Qnil);
1150 while (CONSP (rest))
1152 Lisp_Object v = Fcar (rest);
1155 = to_char_id (v, "Invalid value for ->decomposition", value);
1158 XVECTOR_DATA(seq)[i++] = v;
1160 XVECTOR_DATA(seq)[i++] = make_char (c);
1164 put_char_id_table (c, character, table);
1169 ntable = get_char_id_table (c, table);
1170 if (!CHAR_ID_TABLE_P (ntable))
1172 ntable = make_char_id_table (Qnil);
1173 put_char_id_table (c, ntable, table);
1181 Lisp_Object v = Fcar (value);
1185 Emchar c = XINT (v);
1187 = get_char_id_table (c, Vcharacter_variant_table);
1189 if (NILP (Fmemq (v, ret)))
1191 put_char_id_table (c, Fcons (character, ret),
1192 Vcharacter_variant_table);
1195 seq = make_vector (1, v);
1199 else if (EQ (attribute, Q_ucs))
1205 signal_simple_error ("Invalid value for ->ucs", value);
1209 ret = get_char_id_table (c, Vcharacter_variant_table);
1210 if (NILP (Fmemq (character, ret)))
1212 put_char_id_table (c, Fcons (character, ret),
1213 Vcharacter_variant_table);
1217 Lisp_Object table = Fgethash (attribute,
1218 Vchar_attribute_hash_table,
1223 table = make_char_id_table (Qunbound);
1224 Fputhash (attribute, table, Vchar_attribute_hash_table);
1226 put_char_id_table (XCHAR (character), value, table);
1231 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
1232 Remove CHARACTER's ATTRIBUTE.
1234 (character, attribute))
1238 CHECK_CHAR (character);
1239 ccs = Ffind_charset (attribute);
1242 return remove_char_ccs (character, ccs);
1246 Lisp_Object table = Fgethash (attribute,
1247 Vchar_attribute_hash_table,
1249 if (!UNBOUNDP (table))
1251 put_char_id_table (XCHAR (character), Qunbound, table);
1258 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
1260 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
1262 /* ad-hoc method for `ascii' */
1263 if ((CHARSET_CHARS (cs) == 94) &&
1264 (CHARSET_BYTE_OFFSET (cs) != 33))
1265 return 128 - CHARSET_BYTE_OFFSET (cs);
1267 return CHARSET_CHARS (cs);
1270 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
1272 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
1274 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
1278 if (XVECTOR_LENGTH (v) > ccs_len)
1281 for (i = 0; i < XVECTOR_LENGTH (v); i++)
1283 Lisp_Object c = XVECTOR_DATA(v)[i];
1285 if (!NILP (c) && !CHARP (c))
1289 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
1301 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1304 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1314 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1315 nv = XVECTOR_DATA(v)[i];
1321 XVECTOR_DATA(v)[i] = Qnil;
1325 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1326 int code_point, Lisp_Object character);
1328 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1329 int code_point, Lisp_Object character)
1333 int ccs_len = XVECTOR_LENGTH (v);
1338 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1339 nv = XVECTOR_DATA(v)[i];
1343 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
1349 XVECTOR_DATA(v)[i] = character;
1353 put_char_ccs_code_point (Lisp_Object character,
1354 Lisp_Object ccs, Lisp_Object value)
1356 Lisp_Object encoding_table;
1358 if (!EQ (XCHARSET_NAME (ccs), Qucs)
1359 || (XCHAR (character) != XINT (value)))
1361 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
1362 int dim = XCHARSET_DIMENSION (ccs);
1363 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
1364 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
1368 { /* obsolete representation: value must be a list of bytes */
1369 Lisp_Object ret = Fcar (value);
1373 signal_simple_error ("Invalid value for coded-charset", value);
1374 code_point = XINT (ret);
1375 if (XCHARSET_GRAPHIC (ccs) == 1)
1377 rest = Fcdr (value);
1378 while (!NILP (rest))
1383 signal_simple_error ("Invalid value for coded-charset",
1387 signal_simple_error ("Invalid value for coded-charset",
1390 if (XCHARSET_GRAPHIC (ccs) == 1)
1392 code_point = (code_point << 8) | j;
1395 value = make_int (code_point);
1397 else if (INTP (value))
1399 code_point = XINT (value);
1400 if (XCHARSET_GRAPHIC (ccs) == 1)
1402 code_point &= 0x7F7F7F7F;
1403 value = make_int (code_point);
1407 signal_simple_error ("Invalid value for coded-charset", value);
1411 Lisp_Object cpos = Fget_char_attribute (character, ccs);
1414 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
1419 XCHARSET_DECODING_TABLE (ccs)
1420 = v = make_older_vector (ccs_len, Qnil);
1423 decoding_table_put_char (v, dim, byte_offset, code_point, character);
1425 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
1427 XCHARSET_ENCODING_TABLE (ccs)
1428 = encoding_table = make_char_id_table (Qnil);
1430 put_char_id_table (XCHAR (character), value, encoding_table);
1435 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
1437 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1438 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1440 if (VECTORP (decoding_table))
1442 Lisp_Object cpos = Fget_char_attribute (character, ccs);
1446 decoding_table_remove_char (decoding_table,
1447 XCHARSET_DIMENSION (ccs),
1448 XCHARSET_BYTE_OFFSET (ccs),
1452 if (CHAR_ID_TABLE_P (encoding_table))
1454 put_char_id_table (XCHAR (character), Qnil, encoding_table);
1459 EXFUN (Fmake_char, 3);
1460 EXFUN (Fdecode_char, 2);
1462 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
1463 Store character's ATTRIBUTES.
1467 Lisp_Object rest = attributes;
1468 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
1469 Lisp_Object character;
1471 Lisp_Object daikanwa = Qnil;
1476 while (CONSP (rest))
1478 Lisp_Object cell = Fcar (rest);
1482 signal_simple_error ("Invalid argument", attributes);
1483 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
1484 && ((XCHARSET_FINAL (ccs) != 0) ||
1485 (XCHARSET_UCS_MAX (ccs) > 0)) )
1489 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1491 character = Fdecode_char (ccs, cell);
1492 goto setup_attributes;
1496 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
1499 signal_simple_error ("Invalid argument", attributes);
1501 character = make_char (XINT (code) + 0x100000);
1502 goto setup_attributes;
1506 else if (!INTP (code))
1507 signal_simple_error ("Invalid argument", attributes);
1509 character = make_char (XINT (code));
1513 while (CONSP (rest))
1515 Lisp_Object cell = Fcar (rest);
1517 Lisp_Object key = Fcar (cell);
1518 Lisp_Object value = Fcdr (cell);
1522 signal_simple_error ("Invalid argument", attributes);
1525 if (EQ (key, Qmorohashi_daikanwa))
1528 GET_EXTERNAL_LIST_LENGTH (value, len);
1532 if (NILP (daikanwa))
1533 daikanwa = Fcdr (Fassq (Qideograph_daikanwa, rest));
1534 if (EQ (Fcar (value), daikanwa))
1538 else if (EQ (key, Qideograph_daikanwa))
1542 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
1551 Lisp_Object Vutf_2000_version;
1555 int leading_code_private_11;
1558 Lisp_Object Qcharsetp;
1560 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1561 Lisp_Object Qregistry, Qfinal, Qgraphic;
1562 Lisp_Object Qdirection;
1563 Lisp_Object Qreverse_direction_charset;
1564 Lisp_Object Qleading_byte;
1565 Lisp_Object Qshort_name, Qlong_name;
1579 Qcyrillic_iso8859_5,
1581 Qjapanese_jisx0208_1978,
1584 Qjapanese_jisx0208_1990,
1587 Qchinese_cns11643_1,
1588 Qchinese_cns11643_2,
1593 Qlatin_viscii_lower,
1594 Qlatin_viscii_upper,
1595 Qvietnamese_viscii_lower,
1596 Qvietnamese_viscii_upper,
1625 Lisp_Object Ql2r, Qr2l;
1627 Lisp_Object Vcharset_hash_table;
1629 /* Composite characters are characters constructed by overstriking two
1630 or more regular characters.
1632 1) The old Mule implementation involves storing composite characters
1633 in a buffer as a tag followed by all of the actual characters
1634 used to make up the composite character. I think this is a bad
1635 idea; it greatly complicates code that wants to handle strings
1636 one character at a time because it has to deal with the possibility
1637 of great big ungainly characters. It's much more reasonable to
1638 simply store an index into a table of composite characters.
1640 2) The current implementation only allows for 16,384 separate
1641 composite characters over the lifetime of the XEmacs process.
1642 This could become a potential problem if the user
1643 edited lots of different files that use composite characters.
1644 Due to FSF bogosity, increasing the number of allowable
1645 composite characters under Mule would decrease the number
1646 of possible faces that can exist. Mule already has shrunk
1647 this to 2048, and further shrinkage would become uncomfortable.
1648 No such problems exist in XEmacs.
1650 Composite characters could be represented as 0x80 C1 C2 C3,
1651 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1652 for slightly under 2^20 (one million) composite characters
1653 over the XEmacs process lifetime, and you only need to
1654 increase the size of a Mule character from 19 to 21 bits.
1655 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1656 85 million (slightly over 2^26) composite characters. */
1659 /************************************************************************/
1660 /* Basic Emchar functions */
1661 /************************************************************************/
1663 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1664 string in STR. Returns the number of bytes stored.
1665 Do not call this directly. Use the macro set_charptr_emchar() instead.
1669 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1675 Lisp_Object charset;
1684 else if ( c <= 0x7ff )
1686 *p++ = (c >> 6) | 0xc0;
1687 *p++ = (c & 0x3f) | 0x80;
1689 else if ( c <= 0xffff )
1691 *p++ = (c >> 12) | 0xe0;
1692 *p++ = ((c >> 6) & 0x3f) | 0x80;
1693 *p++ = (c & 0x3f) | 0x80;
1695 else if ( c <= 0x1fffff )
1697 *p++ = (c >> 18) | 0xf0;
1698 *p++ = ((c >> 12) & 0x3f) | 0x80;
1699 *p++ = ((c >> 6) & 0x3f) | 0x80;
1700 *p++ = (c & 0x3f) | 0x80;
1702 else if ( c <= 0x3ffffff )
1704 *p++ = (c >> 24) | 0xf8;
1705 *p++ = ((c >> 18) & 0x3f) | 0x80;
1706 *p++ = ((c >> 12) & 0x3f) | 0x80;
1707 *p++ = ((c >> 6) & 0x3f) | 0x80;
1708 *p++ = (c & 0x3f) | 0x80;
1712 *p++ = (c >> 30) | 0xfc;
1713 *p++ = ((c >> 24) & 0x3f) | 0x80;
1714 *p++ = ((c >> 18) & 0x3f) | 0x80;
1715 *p++ = ((c >> 12) & 0x3f) | 0x80;
1716 *p++ = ((c >> 6) & 0x3f) | 0x80;
1717 *p++ = (c & 0x3f) | 0x80;
1720 BREAKUP_CHAR (c, charset, c1, c2);
1721 lb = CHAR_LEADING_BYTE (c);
1722 if (LEADING_BYTE_PRIVATE_P (lb))
1723 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1725 if (EQ (charset, Vcharset_control_1))
1734 /* Return the first character from a Mule-encoded string in STR,
1735 assuming it's non-ASCII. Do not call this directly.
1736 Use the macro charptr_emchar() instead. */
1739 non_ascii_charptr_emchar (const Bufbyte *str)
1752 else if ( b >= 0xf8 )
1757 else if ( b >= 0xf0 )
1762 else if ( b >= 0xe0 )
1767 else if ( b >= 0xc0 )
1777 for( ; len > 0; len-- )
1780 ch = ( ch << 6 ) | ( b & 0x3f );
1784 Bufbyte i0 = *str, i1, i2 = 0;
1785 Lisp_Object charset;
1787 if (i0 == LEADING_BYTE_CONTROL_1)
1788 return (Emchar) (*++str - 0x20);
1790 if (LEADING_BYTE_PREFIX_P (i0))
1795 charset = CHARSET_BY_LEADING_BYTE (i0);
1796 if (XCHARSET_DIMENSION (charset) == 2)
1799 return MAKE_CHAR (charset, i1, i2);
1803 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1804 Do not call this directly. Use the macro valid_char_p() instead. */
1808 non_ascii_valid_char_p (Emchar ch)
1812 /* Must have only lowest 19 bits set */
1816 f1 = CHAR_FIELD1 (ch);
1817 f2 = CHAR_FIELD2 (ch);
1818 f3 = CHAR_FIELD3 (ch);
1822 Lisp_Object charset;
1824 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1825 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1826 f2 > MAX_CHAR_FIELD2_PRIVATE)
1831 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1832 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1836 NOTE: This takes advantage of the fact that
1837 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1838 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1840 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1841 if (EQ (charset, Qnil))
1843 return (XCHARSET_CHARS (charset) == 96);
1847 Lisp_Object charset;
1849 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1850 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1851 f1 > MAX_CHAR_FIELD1_PRIVATE)
1853 if (f2 < 0x20 || f3 < 0x20)
1856 #ifdef ENABLE_COMPOSITE_CHARS
1857 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1859 if (UNBOUNDP (Fgethash (make_int (ch),
1860 Vcomposite_char_char2string_hash_table,
1865 #endif /* ENABLE_COMPOSITE_CHARS */
1867 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1868 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1871 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1873 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1876 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1878 if (EQ (charset, Qnil))
1880 return (XCHARSET_CHARS (charset) == 96);
1886 /************************************************************************/
1887 /* Basic string functions */
1888 /************************************************************************/
1890 /* Copy the character pointed to by PTR into STR, assuming it's
1891 non-ASCII. Do not call this directly. Use the macro
1892 charptr_copy_char() instead. */
1895 non_ascii_charptr_copy_char (const Bufbyte *ptr, Bufbyte *str)
1897 Bufbyte *strptr = str;
1899 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1901 /* Notice fallthrough. */
1903 case 6: *++strptr = *ptr++;
1904 case 5: *++strptr = *ptr++;
1906 case 4: *++strptr = *ptr++;
1907 case 3: *++strptr = *ptr++;
1908 case 2: *++strptr = *ptr;
1913 return strptr + 1 - str;
1917 /************************************************************************/
1918 /* streams of Emchars */
1919 /************************************************************************/
1921 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1922 The functions below are not meant to be called directly; use
1923 the macros in insdel.h. */
1926 Lstream_get_emchar_1 (Lstream *stream, int ch)
1928 Bufbyte str[MAX_EMCHAR_LEN];
1929 Bufbyte *strptr = str;
1931 str[0] = (Bufbyte) ch;
1932 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1934 /* Notice fallthrough. */
1937 ch = Lstream_getc (stream);
1939 *++strptr = (Bufbyte) ch;
1941 ch = Lstream_getc (stream);
1943 *++strptr = (Bufbyte) ch;
1946 ch = Lstream_getc (stream);
1948 *++strptr = (Bufbyte) ch;
1950 ch = Lstream_getc (stream);
1952 *++strptr = (Bufbyte) ch;
1954 ch = Lstream_getc (stream);
1956 *++strptr = (Bufbyte) ch;
1961 return charptr_emchar (str);
1965 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1967 Bufbyte str[MAX_EMCHAR_LEN];
1968 Bytecount len = set_charptr_emchar (str, ch);
1969 return Lstream_write (stream, str, len);
1973 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1975 Bufbyte str[MAX_EMCHAR_LEN];
1976 Bytecount len = set_charptr_emchar (str, ch);
1977 Lstream_unread (stream, str, len);
1981 /************************************************************************/
1982 /* charset object */
1983 /************************************************************************/
1986 mark_charset (Lisp_Object obj)
1988 Lisp_Charset *cs = XCHARSET (obj);
1990 mark_object (cs->short_name);
1991 mark_object (cs->long_name);
1992 mark_object (cs->doc_string);
1993 mark_object (cs->registry);
1994 mark_object (cs->ccl_program);
1996 mark_object (cs->encoding_table);
1997 /* mark_object (cs->decoding_table); */
2003 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
2005 Lisp_Charset *cs = XCHARSET (obj);
2009 error ("printing unreadable object #<charset %s 0x%x>",
2010 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
2013 write_c_string ("#<charset ", printcharfun);
2014 print_internal (CHARSET_NAME (cs), printcharfun, 0);
2015 write_c_string (" ", printcharfun);
2016 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
2017 write_c_string (" ", printcharfun);
2018 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
2019 write_c_string (" ", printcharfun);
2020 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
2021 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
2023 CHARSET_DIMENSION (cs),
2024 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
2025 CHARSET_COLUMNS (cs),
2026 CHARSET_GRAPHIC (cs),
2027 CHARSET_FINAL (cs));
2028 write_c_string (buf, printcharfun);
2029 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
2030 sprintf (buf, " 0x%x>", cs->header.uid);
2031 write_c_string (buf, printcharfun);
2034 static const struct lrecord_description charset_description[] = {
2035 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
2036 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
2037 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
2038 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
2039 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
2040 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
2041 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
2043 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
2044 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
2049 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
2050 mark_charset, print_charset, 0, 0, 0,
2051 charset_description,
2053 /* Make a new charset. */
2056 make_charset (Charset_ID id, Lisp_Object name,
2057 unsigned short chars, unsigned char dimension,
2058 unsigned char columns, unsigned char graphic,
2059 Bufbyte final, unsigned char direction, Lisp_Object short_name,
2060 Lisp_Object long_name, Lisp_Object doc,
2062 Lisp_Object decoding_table,
2063 Emchar ucs_min, Emchar ucs_max,
2064 Emchar code_offset, unsigned char byte_offset)
2066 unsigned char type = 0;
2068 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
2072 XSETCHARSET (obj, cs);
2074 CHARSET_ID (cs) = id;
2075 CHARSET_NAME (cs) = name;
2076 CHARSET_SHORT_NAME (cs) = short_name;
2077 CHARSET_LONG_NAME (cs) = long_name;
2078 CHARSET_CHARS (cs) = chars;
2079 CHARSET_DIMENSION (cs) = dimension;
2080 CHARSET_DIRECTION (cs) = direction;
2081 CHARSET_COLUMNS (cs) = columns;
2082 CHARSET_GRAPHIC (cs) = graphic;
2083 CHARSET_FINAL (cs) = final;
2084 CHARSET_DOC_STRING (cs) = doc;
2085 CHARSET_REGISTRY (cs) = reg;
2086 CHARSET_CCL_PROGRAM (cs) = Qnil;
2087 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
2089 CHARSET_DECODING_TABLE(cs) = Qnil;
2090 CHARSET_ENCODING_TABLE(cs) = Qnil;
2091 CHARSET_UCS_MIN(cs) = ucs_min;
2092 CHARSET_UCS_MAX(cs) = ucs_max;
2093 CHARSET_CODE_OFFSET(cs) = code_offset;
2094 CHARSET_BYTE_OFFSET(cs) = byte_offset;
2097 switch (CHARSET_CHARS (cs))
2100 switch (CHARSET_DIMENSION (cs))
2103 type = CHARSET_TYPE_94;
2106 type = CHARSET_TYPE_94X94;
2111 switch (CHARSET_DIMENSION (cs))
2114 type = CHARSET_TYPE_96;
2117 type = CHARSET_TYPE_96X96;
2123 switch (CHARSET_DIMENSION (cs))
2126 type = CHARSET_TYPE_128;
2129 type = CHARSET_TYPE_128X128;
2134 switch (CHARSET_DIMENSION (cs))
2137 type = CHARSET_TYPE_256;
2140 type = CHARSET_TYPE_256X256;
2147 CHARSET_TYPE (cs) = type;
2151 if (id == LEADING_BYTE_ASCII)
2152 CHARSET_REP_BYTES (cs) = 1;
2154 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
2156 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
2161 /* some charsets do not have final characters. This includes
2162 ASCII, Control-1, Composite, and the two faux private
2165 if (code_offset == 0)
2167 assert (NILP (chlook->charset_by_attributes[type][final]));
2168 chlook->charset_by_attributes[type][final] = obj;
2171 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
2172 chlook->charset_by_attributes[type][final][direction] = obj;
2176 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
2177 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
2179 /* Some charsets are "faux" and don't have names or really exist at
2180 all except in the leading-byte table. */
2182 Fputhash (name, obj, Vcharset_hash_table);
2187 get_unallocated_leading_byte (int dimension)
2192 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
2195 lb = chlook->next_allocated_leading_byte++;
2199 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
2202 lb = chlook->next_allocated_1_byte_leading_byte++;
2206 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
2209 lb = chlook->next_allocated_2_byte_leading_byte++;
2215 ("No more character sets free for this dimension",
2216 make_int (dimension));
2223 make_builtin_char (Lisp_Object charset, int c1, int c2)
2225 if (XCHARSET_UCS_MAX (charset))
2228 = (XCHARSET_DIMENSION (charset) == 1
2230 c1 - XCHARSET_BYTE_OFFSET (charset)
2232 (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
2233 + c2 - XCHARSET_BYTE_OFFSET (charset))
2234 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
2235 if ((code < XCHARSET_UCS_MIN (charset))
2236 || (XCHARSET_UCS_MAX (charset) < code))
2237 signal_simple_error ("Arguments makes invalid character",
2241 else if (XCHARSET_DIMENSION (charset) == 1)
2243 switch (XCHARSET_CHARS (charset))
2247 + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
2250 + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
2257 switch (XCHARSET_CHARS (charset))
2260 return MIN_CHAR_94x94
2261 + (XCHARSET_FINAL (charset) - '0') * 94 * 94
2262 + (c1 - 33) * 94 + (c2 - 33);
2264 return MIN_CHAR_96x96
2265 + (XCHARSET_FINAL (charset) - '0') * 96 * 96
2266 + (c1 - 32) * 96 + (c2 - 32);
2274 range_charset_code_point (Lisp_Object charset, Emchar ch)
2278 if ((XCHARSET_UCS_MIN (charset) <= ch)
2279 && (ch <= XCHARSET_UCS_MAX (charset)))
2281 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
2283 if (XCHARSET_CHARS (charset) == 256)
2285 else if (XCHARSET_DIMENSION (charset) == 1)
2286 return d + XCHARSET_BYTE_OFFSET (charset);
2287 else if (XCHARSET_DIMENSION (charset) == 2)
2289 ((d / XCHARSET_CHARS (charset)
2290 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2291 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2292 else if (XCHARSET_DIMENSION (charset) == 3)
2294 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2295 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2296 | ((d / XCHARSET_CHARS (charset)
2297 % XCHARSET_CHARS (charset)
2298 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2299 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2300 else /* if (XCHARSET_DIMENSION (charset) == 4) */
2302 ((d / (XCHARSET_CHARS (charset)
2303 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2304 + XCHARSET_BYTE_OFFSET (charset)) << 24)
2305 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2306 % XCHARSET_CHARS (charset)
2307 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2308 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
2309 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2310 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2312 else if (XCHARSET_CODE_OFFSET (charset) == 0)
2314 if (XCHARSET_DIMENSION (charset) == 1)
2316 if (XCHARSET_CHARS (charset) == 94)
2318 if (((d = ch - (MIN_CHAR_94
2319 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
2323 else if (XCHARSET_CHARS (charset) == 96)
2325 if (((d = ch - (MIN_CHAR_96
2326 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
2333 else if (XCHARSET_DIMENSION (charset) == 2)
2335 if (XCHARSET_CHARS (charset) == 94)
2337 if (((d = ch - (MIN_CHAR_94x94
2338 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
2341 return (((d / 94) + 33) << 8) | (d % 94 + 33);
2343 else if (XCHARSET_CHARS (charset) == 96)
2345 if (((d = ch - (MIN_CHAR_96x96
2346 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
2349 return (((d / 96) + 32) << 8) | (d % 96 + 32);
2359 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
2361 if (c <= MAX_CHAR_BASIC_LATIN)
2363 *charset = Vcharset_ascii;
2368 *charset = Vcharset_control_1;
2373 *charset = Vcharset_latin_iso8859_1;
2377 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
2379 *charset = Vcharset_greek_iso8859_7;
2380 return c - MIN_CHAR_GREEK + 0x20;
2382 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
2384 *charset = Vcharset_cyrillic_iso8859_5;
2385 return c - MIN_CHAR_CYRILLIC + 0x20;
2388 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
2390 *charset = Vcharset_hebrew_iso8859_8;
2391 return c - MIN_CHAR_HEBREW + 0x20;
2393 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
2395 *charset = Vcharset_thai_tis620;
2396 return c - MIN_CHAR_THAI + 0x20;
2399 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
2400 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
2402 return list2 (Vcharset_katakana_jisx0201,
2403 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
2406 else if (c <= MAX_CHAR_BMP)
2408 *charset = Vcharset_ucs_bmp;
2411 else if (c < MIN_CHAR_DAIKANWA)
2413 *charset = Vcharset_ucs;
2417 else if (c <= MAX_CHAR_DAIKANWA)
2419 *charset = Vcharset_ideograph_daikanwa;
2420 return c - MIN_CHAR_DAIKANWA;
2423 else if (c <= MAX_CHAR_MOJIKYO)
2425 *charset = Vcharset_mojikyo;
2426 return c - MIN_CHAR_MOJIKYO;
2428 else if (c < MIN_CHAR_94)
2430 *charset = Vcharset_ucs;
2433 else if (c <= MAX_CHAR_94)
2435 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
2436 ((c - MIN_CHAR_94) / 94) + '0',
2437 CHARSET_LEFT_TO_RIGHT);
2438 if (!NILP (*charset))
2439 return ((c - MIN_CHAR_94) % 94) + 33;
2442 *charset = Vcharset_ucs;
2446 else if (c <= MAX_CHAR_96)
2448 *charset = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
2449 ((c - MIN_CHAR_96) / 96) + '0',
2450 CHARSET_LEFT_TO_RIGHT);
2451 if (!NILP (*charset))
2452 return ((c - MIN_CHAR_96) % 96) + 32;
2455 *charset = Vcharset_ucs;
2459 else if (c <= MAX_CHAR_94x94)
2462 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94,
2463 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
2464 CHARSET_LEFT_TO_RIGHT);
2465 if (!NILP (*charset))
2466 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
2467 | (((c - MIN_CHAR_94x94) % 94) + 33);
2470 *charset = Vcharset_ucs;
2474 else if (c <= MAX_CHAR_96x96)
2477 = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96X96,
2478 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
2479 CHARSET_LEFT_TO_RIGHT);
2480 if (!NILP (*charset))
2481 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
2482 | (((c - MIN_CHAR_96x96) % 96) + 32);
2485 *charset = Vcharset_ucs;
2491 *charset = Vcharset_ucs;
2496 Lisp_Object Vdefault_coded_charset_priority_list;
2500 /************************************************************************/
2501 /* Basic charset Lisp functions */
2502 /************************************************************************/
2504 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
2505 Return non-nil if OBJECT is a charset.
2509 return CHARSETP (object) ? Qt : Qnil;
2512 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
2513 Retrieve the charset of the given name.
2514 If CHARSET-OR-NAME is a charset object, it is simply returned.
2515 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
2516 nil is returned. Otherwise the associated charset object is returned.
2520 if (CHARSETP (charset_or_name))
2521 return charset_or_name;
2523 CHECK_SYMBOL (charset_or_name);
2524 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
2527 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
2528 Retrieve the charset of the given name.
2529 Same as `find-charset' except an error is signalled if there is no such
2530 charset instead of returning nil.
2534 Lisp_Object charset = Ffind_charset (name);
2537 signal_simple_error ("No such charset", name);
2541 /* We store the charsets in hash tables with the names as the key and the
2542 actual charset object as the value. Occasionally we need to use them
2543 in a list format. These routines provide us with that. */
2544 struct charset_list_closure
2546 Lisp_Object *charset_list;
2550 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
2551 void *charset_list_closure)
2553 /* This function can GC */
2554 struct charset_list_closure *chcl =
2555 (struct charset_list_closure*) charset_list_closure;
2556 Lisp_Object *charset_list = chcl->charset_list;
2558 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
2562 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
2563 Return a list of the names of all defined charsets.
2567 Lisp_Object charset_list = Qnil;
2568 struct gcpro gcpro1;
2569 struct charset_list_closure charset_list_closure;
2571 GCPRO1 (charset_list);
2572 charset_list_closure.charset_list = &charset_list;
2573 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
2574 &charset_list_closure);
2577 return charset_list;
2580 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
2581 Return the name of the given charset.
2585 return XCHARSET_NAME (Fget_charset (charset));
2588 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
2589 Define a new character set.
2590 This function is for use with Mule support.
2591 NAME is a symbol, the name by which the character set is normally referred.
2592 DOC-STRING is a string describing the character set.
2593 PROPS is a property list, describing the specific nature of the
2594 character set. Recognized properties are:
2596 'short-name Short version of the charset name (ex: Latin-1)
2597 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
2598 'registry A regular expression matching the font registry field for
2600 'dimension Number of octets used to index a character in this charset.
2601 Either 1 or 2. Defaults to 1.
2602 'columns Number of columns used to display a character in this charset.
2603 Only used in TTY mode. (Under X, the actual width of a
2604 character can be derived from the font used to display the
2605 characters.) If unspecified, defaults to the dimension
2606 (this is almost always the correct value).
2607 'chars Number of characters in each dimension (94 or 96).
2608 Defaults to 94. Note that if the dimension is 2, the
2609 character set thus described is 94x94 or 96x96.
2610 'final Final byte of ISO 2022 escape sequence. Must be
2611 supplied. Each combination of (DIMENSION, CHARS) defines a
2612 separate namespace for final bytes. Note that ISO
2613 2022 restricts the final byte to the range
2614 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2615 dimension == 2. Note also that final bytes in the range
2616 0x30 - 0x3F are reserved for user-defined (not official)
2618 'graphic 0 (use left half of font on output) or 1 (use right half
2619 of font on output). Defaults to 0. For example, for
2620 a font whose registry is ISO8859-1, the left half
2621 (octets 0x20 - 0x7F) is the `ascii' character set, while
2622 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2623 character set. With 'graphic set to 0, the octets
2624 will have their high bit cleared; with it set to 1,
2625 the octets will have their high bit set.
2626 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2628 'ccl-program A compiled CCL program used to convert a character in
2629 this charset into an index into the font. This is in
2630 addition to the 'graphic property. The CCL program
2631 is passed the octets of the character, with the high
2632 bit cleared and set depending upon whether the value
2633 of the 'graphic property is 0 or 1.
2635 (name, doc_string, props))
2637 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2638 int direction = CHARSET_LEFT_TO_RIGHT;
2640 Lisp_Object registry = Qnil;
2641 Lisp_Object charset;
2642 Lisp_Object rest, keyword, value;
2643 Lisp_Object ccl_program = Qnil;
2644 Lisp_Object short_name = Qnil, long_name = Qnil;
2645 int byte_offset = -1;
2647 CHECK_SYMBOL (name);
2648 if (!NILP (doc_string))
2649 CHECK_STRING (doc_string);
2651 charset = Ffind_charset (name);
2652 if (!NILP (charset))
2653 signal_simple_error ("Cannot redefine existing charset", name);
2655 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
2657 if (EQ (keyword, Qshort_name))
2659 CHECK_STRING (value);
2663 if (EQ (keyword, Qlong_name))
2665 CHECK_STRING (value);
2669 else if (EQ (keyword, Qdimension))
2672 dimension = XINT (value);
2673 if (dimension < 1 || dimension > 2)
2674 signal_simple_error ("Invalid value for 'dimension", value);
2677 else if (EQ (keyword, Qchars))
2680 chars = XINT (value);
2681 if (chars != 94 && chars != 96)
2682 signal_simple_error ("Invalid value for 'chars", value);
2685 else if (EQ (keyword, Qcolumns))
2688 columns = XINT (value);
2689 if (columns != 1 && columns != 2)
2690 signal_simple_error ("Invalid value for 'columns", value);
2693 else if (EQ (keyword, Qgraphic))
2696 graphic = XINT (value);
2698 if (graphic < 0 || graphic > 2)
2700 if (graphic < 0 || graphic > 1)
2702 signal_simple_error ("Invalid value for 'graphic", value);
2705 else if (EQ (keyword, Qregistry))
2707 CHECK_STRING (value);
2711 else if (EQ (keyword, Qdirection))
2713 if (EQ (value, Ql2r))
2714 direction = CHARSET_LEFT_TO_RIGHT;
2715 else if (EQ (value, Qr2l))
2716 direction = CHARSET_RIGHT_TO_LEFT;
2718 signal_simple_error ("Invalid value for 'direction", value);
2721 else if (EQ (keyword, Qfinal))
2723 CHECK_CHAR_COERCE_INT (value);
2724 final = XCHAR (value);
2725 if (final < '0' || final > '~')
2726 signal_simple_error ("Invalid value for 'final", value);
2729 else if (EQ (keyword, Qccl_program))
2731 CHECK_VECTOR (value);
2732 ccl_program = value;
2736 signal_simple_error ("Unrecognized property", keyword);
2740 error ("'final must be specified");
2741 if (dimension == 2 && final > 0x5F)
2743 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2747 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2749 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2751 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
2752 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
2754 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2756 id = get_unallocated_leading_byte (dimension);
2758 if (NILP (doc_string))
2759 doc_string = build_string ("");
2761 if (NILP (registry))
2762 registry = build_string ("");
2764 if (NILP (short_name))
2765 XSETSTRING (short_name, XSYMBOL (name)->name);
2767 if (NILP (long_name))
2768 long_name = doc_string;
2771 columns = dimension;
2773 if (byte_offset < 0)
2777 else if (chars == 96)
2783 charset = make_charset (id, name, chars, dimension, columns, graphic,
2784 final, direction, short_name, long_name,
2785 doc_string, registry,
2786 Qnil, 0, 0, 0, byte_offset);
2787 if (!NILP (ccl_program))
2788 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2792 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2794 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2795 NEW-NAME is the name of the new charset. Return the new charset.
2797 (charset, new_name))
2799 Lisp_Object new_charset = Qnil;
2800 int id, chars, dimension, columns, graphic, final;
2802 Lisp_Object registry, doc_string, short_name, long_name;
2805 charset = Fget_charset (charset);
2806 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2807 signal_simple_error ("Charset already has reverse-direction charset",
2810 CHECK_SYMBOL (new_name);
2811 if (!NILP (Ffind_charset (new_name)))
2812 signal_simple_error ("Cannot redefine existing charset", new_name);
2814 cs = XCHARSET (charset);
2816 chars = CHARSET_CHARS (cs);
2817 dimension = CHARSET_DIMENSION (cs);
2818 columns = CHARSET_COLUMNS (cs);
2819 id = get_unallocated_leading_byte (dimension);
2821 graphic = CHARSET_GRAPHIC (cs);
2822 final = CHARSET_FINAL (cs);
2823 direction = CHARSET_RIGHT_TO_LEFT;
2824 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2825 direction = CHARSET_LEFT_TO_RIGHT;
2826 doc_string = CHARSET_DOC_STRING (cs);
2827 short_name = CHARSET_SHORT_NAME (cs);
2828 long_name = CHARSET_LONG_NAME (cs);
2829 registry = CHARSET_REGISTRY (cs);
2831 new_charset = make_charset (id, new_name, chars, dimension, columns,
2832 graphic, final, direction, short_name, long_name,
2833 doc_string, registry,
2835 CHARSET_DECODING_TABLE(cs),
2836 CHARSET_UCS_MIN(cs),
2837 CHARSET_UCS_MAX(cs),
2838 CHARSET_CODE_OFFSET(cs),
2839 CHARSET_BYTE_OFFSET(cs)
2845 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2846 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2851 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2852 Define symbol ALIAS as an alias for CHARSET.
2856 CHECK_SYMBOL (alias);
2857 charset = Fget_charset (charset);
2858 return Fputhash (alias, charset, Vcharset_hash_table);
2861 /* #### Reverse direction charsets not yet implemented. */
2863 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2865 Return the reverse-direction charset parallel to CHARSET, if any.
2866 This is the charset with the same properties (in particular, the same
2867 dimension, number of characters per dimension, and final byte) as
2868 CHARSET but whose characters are displayed in the opposite direction.
2872 charset = Fget_charset (charset);
2873 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2877 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2878 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2879 If DIRECTION is omitted, both directions will be checked (left-to-right
2880 will be returned if character sets exist for both directions).
2882 (dimension, chars, final, direction))
2884 int dm, ch, fi, di = -1;
2886 Lisp_Object obj = Qnil;
2888 CHECK_INT (dimension);
2889 dm = XINT (dimension);
2890 if (dm < 1 || dm > 2)
2891 signal_simple_error ("Invalid value for DIMENSION", dimension);
2895 if (ch != 94 && ch != 96)
2896 signal_simple_error ("Invalid value for CHARS", chars);
2898 CHECK_CHAR_COERCE_INT (final);
2900 if (fi < '0' || fi > '~')
2901 signal_simple_error ("Invalid value for FINAL", final);
2903 if (EQ (direction, Ql2r))
2904 di = CHARSET_LEFT_TO_RIGHT;
2905 else if (EQ (direction, Qr2l))
2906 di = CHARSET_RIGHT_TO_LEFT;
2907 else if (!NILP (direction))
2908 signal_simple_error ("Invalid value for DIRECTION", direction);
2910 if (dm == 2 && fi > 0x5F)
2912 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2915 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
2917 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
2921 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
2923 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
2926 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
2929 return XCHARSET_NAME (obj);
2933 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2934 Return short name of CHARSET.
2938 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2941 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2942 Return long name of CHARSET.
2946 return XCHARSET_LONG_NAME (Fget_charset (charset));
2949 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2950 Return description of CHARSET.
2954 return XCHARSET_DOC_STRING (Fget_charset (charset));
2957 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2958 Return dimension of CHARSET.
2962 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2965 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2966 Return property PROP of CHARSET.
2967 Recognized properties are those listed in `make-charset', as well as
2968 'name and 'doc-string.
2974 charset = Fget_charset (charset);
2975 cs = XCHARSET (charset);
2977 CHECK_SYMBOL (prop);
2978 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2979 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2980 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2981 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2982 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2983 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2984 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2985 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2986 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2987 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2988 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2989 if (EQ (prop, Qdirection))
2990 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2991 if (EQ (prop, Qreverse_direction_charset))
2993 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2997 return XCHARSET_NAME (obj);
2999 signal_simple_error ("Unrecognized charset property name", prop);
3000 return Qnil; /* not reached */
3003 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
3004 Return charset identification number of CHARSET.
3008 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
3011 /* #### We need to figure out which properties we really want to
3014 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
3015 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
3017 (charset, ccl_program))
3019 charset = Fget_charset (charset);
3020 CHECK_VECTOR (ccl_program);
3021 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
3026 invalidate_charset_font_caches (Lisp_Object charset)
3028 /* Invalidate font cache entries for charset on all devices. */
3029 Lisp_Object devcons, concons, hash_table;
3030 DEVICE_LOOP_NO_BREAK (devcons, concons)
3032 struct device *d = XDEVICE (XCAR (devcons));
3033 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
3034 if (!UNBOUNDP (hash_table))
3035 Fclrhash (hash_table);
3039 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
3040 Set the 'registry property of CHARSET to REGISTRY.
3042 (charset, registry))
3044 charset = Fget_charset (charset);
3045 CHECK_STRING (registry);
3046 XCHARSET_REGISTRY (charset) = registry;
3047 invalidate_charset_font_caches (charset);
3048 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
3053 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
3054 Return mapping-table of CHARSET.
3058 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
3061 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
3062 Set mapping-table of CHARSET to TABLE.
3066 struct Lisp_Charset *cs;
3070 charset = Fget_charset (charset);
3071 cs = XCHARSET (charset);
3075 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
3076 make_vector_newer (CHARSET_DECODING_TABLE(cs));
3077 CHARSET_DECODING_TABLE(cs) = Qnil;
3080 else if (VECTORP (table))
3082 int ccs_len = CHARSET_BYTE_SIZE (cs);
3083 int ret = decoding_table_check_elements (table,
3084 CHARSET_DIMENSION (cs),
3089 signal_simple_error ("Too big table", table);
3091 signal_simple_error ("Invalid element is found", table);
3093 signal_simple_error ("Something wrong", table);
3095 CHARSET_DECODING_TABLE(cs) = Qnil;
3098 signal_error (Qwrong_type_argument,
3099 list2 (build_translated_string ("vector-or-nil-p"),
3102 byte_offset = CHARSET_BYTE_OFFSET (cs);
3103 switch (CHARSET_DIMENSION (cs))
3106 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3108 Lisp_Object c = XVECTOR_DATA(table)[i];
3111 put_char_ccs_code_point (c, charset,
3112 make_int (i + byte_offset));
3116 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3118 Lisp_Object v = XVECTOR_DATA(table)[i];
3124 for (j = 0; j < XVECTOR_LENGTH (v); j++)
3126 Lisp_Object c = XVECTOR_DATA(v)[j];
3129 put_char_ccs_code_point
3131 make_int ( ( (i + byte_offset) << 8 )
3137 put_char_ccs_code_point (v, charset,
3138 make_int (i + byte_offset));
3147 /************************************************************************/
3148 /* Lisp primitives for working with characters */
3149 /************************************************************************/
3152 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
3153 Make a character from CHARSET and code-point CODE.
3159 charset = Fget_charset (charset);
3162 if (XCHARSET_GRAPHIC (charset) == 1)
3164 return make_char (DECODE_CHAR (charset, c));
3167 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
3168 Make a builtin character from CHARSET and code-point CODE.
3175 charset = Fget_charset (charset);
3179 if ((final = XCHARSET_FINAL (charset)) >= '0')
3181 if (XCHARSET_DIMENSION (charset) == 1)
3183 switch (XCHARSET_CHARS (charset))
3187 make_char (MIN_CHAR_94 + (final - '0') * 94
3188 + ((c & 0x7F) - 33));
3191 make_char (MIN_CHAR_96 + (final - '0') * 96
3192 + ((c & 0x7F) - 32));
3194 return Fdecode_char (charset, code);
3199 switch (XCHARSET_CHARS (charset))
3203 make_char (MIN_CHAR_94x94
3204 + (final - '0') * 94 * 94
3205 + (((c >> 8) & 0x7F) - 33) * 94
3206 + ((c & 0x7F) - 33));
3209 make_char (MIN_CHAR_96x96
3210 + (final - '0') * 96 * 96
3211 + (((c >> 8) & 0x7F) - 32) * 96
3212 + ((c & 0x7F) - 32));
3214 return Fdecode_char (charset, code);
3218 else if (XCHARSET_UCS_MAX (charset))
3221 = (XCHARSET_DIMENSION (charset) == 1
3223 c - XCHARSET_BYTE_OFFSET (charset)
3225 ((c >> 8) - XCHARSET_BYTE_OFFSET (charset))
3226 * XCHARSET_CHARS (charset)
3227 + (c & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
3228 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
3229 if ((cid < XCHARSET_UCS_MIN (charset))
3230 || (XCHARSET_UCS_MAX (charset) < cid))
3231 return Fdecode_char (charset, code);
3232 return make_char (cid);
3235 return Fdecode_char (charset, code);
3239 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
3240 Make a character from CHARSET and octets ARG1 and ARG2.
3241 ARG2 is required only for characters from two-dimensional charsets.
3242 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
3243 character s with caron.
3245 (charset, arg1, arg2))
3249 int lowlim, highlim;
3251 charset = Fget_charset (charset);
3252 cs = XCHARSET (charset);
3254 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
3255 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
3257 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
3259 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
3260 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
3263 /* It is useful (and safe, according to Olivier Galibert) to strip
3264 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
3265 write (make-char 'latin-iso8859-2 CODE) where code is the actual
3266 Latin 2 code of the character. */
3274 if (a1 < lowlim || a1 > highlim)
3275 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
3277 if (CHARSET_DIMENSION (cs) == 1)
3281 ("Charset is of dimension one; second octet must be nil", arg2);
3282 return make_char (MAKE_CHAR (charset, a1, 0));
3291 a2 = XINT (arg2) & 0x7f;
3293 if (a2 < lowlim || a2 > highlim)
3294 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
3296 return make_char (MAKE_CHAR (charset, a1, a2));
3299 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
3300 Return the character set of char CH.
3304 CHECK_CHAR_COERCE_INT (ch);
3306 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
3309 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
3310 Return the octet numbered N (should be 0 or 1) of char CH.
3311 N defaults to 0 if omitted.
3315 Lisp_Object charset;
3318 CHECK_CHAR_COERCE_INT (ch);
3320 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
3322 if (NILP (n) || EQ (n, Qzero))
3323 return make_int (octet0);
3324 else if (EQ (n, make_int (1)))
3325 return make_int (octet1);
3327 signal_simple_error ("Octet number must be 0 or 1", n);
3330 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
3331 Return list of charset and one or two position-codes of CHAR.
3335 /* This function can GC */
3336 struct gcpro gcpro1, gcpro2;
3337 Lisp_Object charset = Qnil;
3338 Lisp_Object rc = Qnil;
3346 GCPRO2 (charset, rc);
3347 CHECK_CHAR_COERCE_INT (character);
3350 code_point = ENCODE_CHAR (XCHAR (character), charset);
3351 dimension = XCHARSET_DIMENSION (charset);
3352 while (dimension > 0)
3354 rc = Fcons (make_int (code_point & 255), rc);
3358 rc = Fcons (XCHARSET_NAME (charset), rc);
3360 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
3362 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
3364 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
3368 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
3377 #ifdef ENABLE_COMPOSITE_CHARS
3378 /************************************************************************/
3379 /* composite character functions */
3380 /************************************************************************/
3383 lookup_composite_char (Bufbyte *str, int len)
3385 Lisp_Object lispstr = make_string (str, len);
3386 Lisp_Object ch = Fgethash (lispstr,
3387 Vcomposite_char_string2char_hash_table,
3393 if (composite_char_row_next >= 128)
3394 signal_simple_error ("No more composite chars available", lispstr);
3395 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
3396 composite_char_col_next);
3397 Fputhash (make_char (emch), lispstr,
3398 Vcomposite_char_char2string_hash_table);
3399 Fputhash (lispstr, make_char (emch),
3400 Vcomposite_char_string2char_hash_table);
3401 composite_char_col_next++;
3402 if (composite_char_col_next >= 128)
3404 composite_char_col_next = 32;
3405 composite_char_row_next++;
3414 composite_char_string (Emchar ch)
3416 Lisp_Object str = Fgethash (make_char (ch),
3417 Vcomposite_char_char2string_hash_table,
3419 assert (!UNBOUNDP (str));
3423 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
3424 Convert a string into a single composite character.
3425 The character is the result of overstriking all the characters in
3430 CHECK_STRING (string);
3431 return make_char (lookup_composite_char (XSTRING_DATA (string),
3432 XSTRING_LENGTH (string)));
3435 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3436 Return a string of the characters comprising a composite character.
3444 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
3445 signal_simple_error ("Must be composite char", ch);
3446 return composite_char_string (emch);
3448 #endif /* ENABLE_COMPOSITE_CHARS */
3451 /************************************************************************/
3452 /* initialization */
3453 /************************************************************************/
3456 syms_of_mule_charset (void)
3459 INIT_LRECORD_IMPLEMENTATION (uint8_byte_table);
3460 INIT_LRECORD_IMPLEMENTATION (uint16_byte_table);
3461 INIT_LRECORD_IMPLEMENTATION (byte_table);
3462 INIT_LRECORD_IMPLEMENTATION (char_id_table);
3464 INIT_LRECORD_IMPLEMENTATION (charset);
3466 DEFSUBR (Fcharsetp);
3467 DEFSUBR (Ffind_charset);
3468 DEFSUBR (Fget_charset);
3469 DEFSUBR (Fcharset_list);
3470 DEFSUBR (Fcharset_name);
3471 DEFSUBR (Fmake_charset);
3472 DEFSUBR (Fmake_reverse_direction_charset);
3473 /* DEFSUBR (Freverse_direction_charset); */
3474 DEFSUBR (Fdefine_charset_alias);
3475 DEFSUBR (Fcharset_from_attributes);
3476 DEFSUBR (Fcharset_short_name);
3477 DEFSUBR (Fcharset_long_name);
3478 DEFSUBR (Fcharset_description);
3479 DEFSUBR (Fcharset_dimension);
3480 DEFSUBR (Fcharset_property);
3481 DEFSUBR (Fcharset_id);
3482 DEFSUBR (Fset_charset_ccl_program);
3483 DEFSUBR (Fset_charset_registry);
3485 DEFSUBR (Fchar_attribute_list);
3486 DEFSUBR (Ffind_char_attribute_table);
3487 DEFSUBR (Fchar_attribute_alist);
3488 DEFSUBR (Fget_char_attribute);
3489 DEFSUBR (Fput_char_attribute);
3490 DEFSUBR (Fremove_char_attribute);
3491 DEFSUBR (Fdefine_char);
3492 DEFSUBR (Fchar_variants);
3493 DEFSUBR (Fget_composite_char);
3494 DEFSUBR (Fcharset_mapping_table);
3495 DEFSUBR (Fset_charset_mapping_table);
3499 DEFSUBR (Fdecode_char);
3500 DEFSUBR (Fdecode_builtin_char);
3502 DEFSUBR (Fmake_char);
3503 DEFSUBR (Fchar_charset);
3504 DEFSUBR (Fchar_octet);
3505 DEFSUBR (Fsplit_char);
3507 #ifdef ENABLE_COMPOSITE_CHARS
3508 DEFSUBR (Fmake_composite_char);
3509 DEFSUBR (Fcomposite_char_string);
3512 defsymbol (&Qcharsetp, "charsetp");
3513 defsymbol (&Qregistry, "registry");
3514 defsymbol (&Qfinal, "final");
3515 defsymbol (&Qgraphic, "graphic");
3516 defsymbol (&Qdirection, "direction");
3517 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3518 defsymbol (&Qshort_name, "short-name");
3519 defsymbol (&Qlong_name, "long-name");
3521 defsymbol (&Ql2r, "l2r");
3522 defsymbol (&Qr2l, "r2l");
3524 /* Charsets, compatible with FSF 20.3
3525 Naming convention is Script-Charset[-Edition] */
3526 defsymbol (&Qascii, "ascii");
3527 defsymbol (&Qcontrol_1, "control-1");
3528 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3529 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3530 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3531 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3532 defsymbol (&Qthai_tis620, "thai-tis620");
3533 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3534 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3535 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3536 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3537 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3538 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3539 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3540 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
3541 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
3542 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
3543 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
3544 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
3545 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
3546 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
3547 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
3549 defsymbol (&Q_ucs, "->ucs");
3550 defsymbol (&Q_decomposition, "->decomposition");
3551 defsymbol (&Qcompat, "compat");
3552 defsymbol (&Qisolated, "isolated");
3553 defsymbol (&Qinitial, "initial");
3554 defsymbol (&Qmedial, "medial");
3555 defsymbol (&Qfinal, "final");
3556 defsymbol (&Qvertical, "vertical");
3557 defsymbol (&QnoBreak, "noBreak");
3558 defsymbol (&Qfraction, "fraction");
3559 defsymbol (&Qsuper, "super");
3560 defsymbol (&Qsub, "sub");
3561 defsymbol (&Qcircle, "circle");
3562 defsymbol (&Qsquare, "square");
3563 defsymbol (&Qwide, "wide");
3564 defsymbol (&Qnarrow, "narrow");
3565 defsymbol (&Qsmall, "small");
3566 defsymbol (&Qfont, "font");
3567 defsymbol (&Qucs, "ucs");
3568 defsymbol (&Qucs_bmp, "ucs-bmp");
3569 defsymbol (&Qlatin_viscii, "latin-viscii");
3570 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3571 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3572 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3573 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3574 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3575 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
3576 defsymbol (&Qmojikyo, "mojikyo");
3577 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
3578 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
3579 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
3580 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
3581 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
3582 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
3583 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
3584 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
3585 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
3586 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
3587 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
3588 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
3589 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
3590 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
3591 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
3592 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
3593 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
3594 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
3595 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
3596 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
3597 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
3598 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3600 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3601 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3603 defsymbol (&Qcomposite, "composite");
3607 vars_of_mule_charset (void)
3614 chlook = xnew (struct charset_lookup);
3615 dumpstruct (&chlook, &charset_lookup_description);
3617 /* Table of charsets indexed by leading byte. */
3618 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3619 chlook->charset_by_leading_byte[i] = Qnil;
3622 /* Table of charsets indexed by type/final-byte. */
3623 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3624 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3625 chlook->charset_by_attributes[i][j] = Qnil;
3627 /* Table of charsets indexed by type/final-byte/direction. */
3628 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3629 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3630 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3631 chlook->charset_by_attributes[i][j][k] = Qnil;
3635 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3637 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3638 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3642 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3643 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3644 Leading-code of private TYPE9N charset of column-width 1.
3646 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3650 Vutf_2000_version = build_string("0.16 (ÅŒji)");
3651 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
3652 Version number of UTF-2000.
3655 staticpro (&Vcharacter_composition_table);
3656 Vcharacter_composition_table = make_char_id_table (Qnil);
3658 staticpro (&Vcharacter_variant_table);
3659 Vcharacter_variant_table = make_char_id_table (Qnil);
3661 Vdefault_coded_charset_priority_list = Qnil;
3662 DEFVAR_LISP ("default-coded-charset-priority-list",
3663 &Vdefault_coded_charset_priority_list /*
3664 Default order of preferred coded-character-sets.
3670 complex_vars_of_mule_charset (void)
3672 staticpro (&Vcharset_hash_table);
3673 Vcharset_hash_table =
3674 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3676 /* Predefined character sets. We store them into variables for
3680 staticpro (&Vchar_attribute_hash_table);
3681 Vchar_attribute_hash_table
3682 = make_lisp_hash_table (16, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3684 staticpro (&Vcharset_ucs);
3686 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
3687 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3688 build_string ("UCS"),
3689 build_string ("UCS"),
3690 build_string ("ISO/IEC 10646"),
3692 Qnil, 0, 0xFFFFFFF, 0, 0);
3693 staticpro (&Vcharset_ucs_bmp);
3695 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3696 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3697 build_string ("BMP"),
3698 build_string ("BMP"),
3699 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3700 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
3701 Qnil, 0, 0xFFFF, 0, 0);
3703 # define MIN_CHAR_THAI 0
3704 # define MAX_CHAR_THAI 0
3705 # define MIN_CHAR_HEBREW 0
3706 # define MAX_CHAR_HEBREW 0
3707 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3708 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3710 staticpro (&Vcharset_ascii);
3712 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3713 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3714 build_string ("ASCII"),
3715 build_string ("ASCII)"),
3716 build_string ("ASCII (ISO646 IRV)"),
3717 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3718 Qnil, 0, 0x7F, 0, 0);
3719 staticpro (&Vcharset_control_1);
3720 Vcharset_control_1 =
3721 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3722 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3723 build_string ("C1"),
3724 build_string ("Control characters"),
3725 build_string ("Control characters 128-191"),
3727 Qnil, 0x80, 0x9F, 0, 0);
3728 staticpro (&Vcharset_latin_iso8859_1);
3729 Vcharset_latin_iso8859_1 =
3730 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3731 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3732 build_string ("Latin-1"),
3733 build_string ("ISO8859-1 (Latin-1)"),
3734 build_string ("ISO8859-1 (Latin-1)"),
3735 build_string ("iso8859-1"),
3736 Qnil, 0xA0, 0xFF, 0, 32);
3737 staticpro (&Vcharset_latin_iso8859_2);
3738 Vcharset_latin_iso8859_2 =
3739 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3740 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3741 build_string ("Latin-2"),
3742 build_string ("ISO8859-2 (Latin-2)"),
3743 build_string ("ISO8859-2 (Latin-2)"),
3744 build_string ("iso8859-2"),
3746 staticpro (&Vcharset_latin_iso8859_3);
3747 Vcharset_latin_iso8859_3 =
3748 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3749 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3750 build_string ("Latin-3"),
3751 build_string ("ISO8859-3 (Latin-3)"),
3752 build_string ("ISO8859-3 (Latin-3)"),
3753 build_string ("iso8859-3"),
3755 staticpro (&Vcharset_latin_iso8859_4);
3756 Vcharset_latin_iso8859_4 =
3757 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3758 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3759 build_string ("Latin-4"),
3760 build_string ("ISO8859-4 (Latin-4)"),
3761 build_string ("ISO8859-4 (Latin-4)"),
3762 build_string ("iso8859-4"),
3764 staticpro (&Vcharset_thai_tis620);
3765 Vcharset_thai_tis620 =
3766 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3767 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3768 build_string ("TIS620"),
3769 build_string ("TIS620 (Thai)"),
3770 build_string ("TIS620.2529 (Thai)"),
3771 build_string ("tis620"),
3772 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3773 staticpro (&Vcharset_greek_iso8859_7);
3774 Vcharset_greek_iso8859_7 =
3775 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3776 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3777 build_string ("ISO8859-7"),
3778 build_string ("ISO8859-7 (Greek)"),
3779 build_string ("ISO8859-7 (Greek)"),
3780 build_string ("iso8859-7"),
3782 0 /* MIN_CHAR_GREEK */,
3783 0 /* MAX_CHAR_GREEK */, 0, 32);
3784 staticpro (&Vcharset_arabic_iso8859_6);
3785 Vcharset_arabic_iso8859_6 =
3786 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3787 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3788 build_string ("ISO8859-6"),
3789 build_string ("ISO8859-6 (Arabic)"),
3790 build_string ("ISO8859-6 (Arabic)"),
3791 build_string ("iso8859-6"),
3793 staticpro (&Vcharset_hebrew_iso8859_8);
3794 Vcharset_hebrew_iso8859_8 =
3795 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3796 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3797 build_string ("ISO8859-8"),
3798 build_string ("ISO8859-8 (Hebrew)"),
3799 build_string ("ISO8859-8 (Hebrew)"),
3800 build_string ("iso8859-8"),
3801 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3802 staticpro (&Vcharset_katakana_jisx0201);
3803 Vcharset_katakana_jisx0201 =
3804 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3805 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3806 build_string ("JISX0201 Kana"),
3807 build_string ("JISX0201.1976 (Japanese Kana)"),
3808 build_string ("JISX0201.1976 Japanese Kana"),
3809 build_string ("jisx0201\\.1976"),
3811 staticpro (&Vcharset_latin_jisx0201);
3812 Vcharset_latin_jisx0201 =
3813 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3814 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3815 build_string ("JISX0201 Roman"),
3816 build_string ("JISX0201.1976 (Japanese Roman)"),
3817 build_string ("JISX0201.1976 Japanese Roman"),
3818 build_string ("jisx0201\\.1976"),
3820 staticpro (&Vcharset_cyrillic_iso8859_5);
3821 Vcharset_cyrillic_iso8859_5 =
3822 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3823 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3824 build_string ("ISO8859-5"),
3825 build_string ("ISO8859-5 (Cyrillic)"),
3826 build_string ("ISO8859-5 (Cyrillic)"),
3827 build_string ("iso8859-5"),
3829 0 /* MIN_CHAR_CYRILLIC */,
3830 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3831 staticpro (&Vcharset_latin_iso8859_9);
3832 Vcharset_latin_iso8859_9 =
3833 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3834 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3835 build_string ("Latin-5"),
3836 build_string ("ISO8859-9 (Latin-5)"),
3837 build_string ("ISO8859-9 (Latin-5)"),
3838 build_string ("iso8859-9"),
3840 staticpro (&Vcharset_japanese_jisx0208_1978);
3841 Vcharset_japanese_jisx0208_1978 =
3842 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3843 Qjapanese_jisx0208_1978, 94, 2,
3844 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3845 build_string ("JIS X0208:1978"),
3846 build_string ("JIS X0208:1978 (Japanese)"),
3848 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3849 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3851 staticpro (&Vcharset_chinese_gb2312);
3852 Vcharset_chinese_gb2312 =
3853 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3854 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3855 build_string ("GB2312"),
3856 build_string ("GB2312)"),
3857 build_string ("GB2312 Chinese simplified"),
3858 build_string ("gb2312"),
3860 staticpro (&Vcharset_japanese_jisx0208);
3861 Vcharset_japanese_jisx0208 =
3862 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3863 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3864 build_string ("JISX0208"),
3865 build_string ("JIS X0208:1983 (Japanese)"),
3866 build_string ("JIS X0208:1983 Japanese Kanji"),
3867 build_string ("jisx0208\\.1983"),
3870 staticpro (&Vcharset_japanese_jisx0208_1990);
3871 Vcharset_japanese_jisx0208_1990 =
3872 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3873 Qjapanese_jisx0208_1990, 94, 2,
3874 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3875 build_string ("JISX0208-1990"),
3876 build_string ("JIS X0208:1990 (Japanese)"),
3877 build_string ("JIS X0208:1990 Japanese Kanji"),
3878 build_string ("jisx0208\\.1990"),
3880 MIN_CHAR_JIS_X0208_1990,
3881 MAX_CHAR_JIS_X0208_1990, 0, 33);
3883 staticpro (&Vcharset_korean_ksc5601);
3884 Vcharset_korean_ksc5601 =
3885 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3886 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3887 build_string ("KSC5601"),
3888 build_string ("KSC5601 (Korean"),
3889 build_string ("KSC5601 Korean Hangul and Hanja"),
3890 build_string ("ksc5601"),
3892 staticpro (&Vcharset_japanese_jisx0212);
3893 Vcharset_japanese_jisx0212 =
3894 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3895 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3896 build_string ("JISX0212"),
3897 build_string ("JISX0212 (Japanese)"),
3898 build_string ("JISX0212 Japanese Supplement"),
3899 build_string ("jisx0212"),
3902 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3903 staticpro (&Vcharset_chinese_cns11643_1);
3904 Vcharset_chinese_cns11643_1 =
3905 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3906 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3907 build_string ("CNS11643-1"),
3908 build_string ("CNS11643-1 (Chinese traditional)"),
3910 ("CNS 11643 Plane 1 Chinese traditional"),
3911 build_string (CHINESE_CNS_PLANE_RE("1")),
3913 staticpro (&Vcharset_chinese_cns11643_2);
3914 Vcharset_chinese_cns11643_2 =
3915 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3916 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3917 build_string ("CNS11643-2"),
3918 build_string ("CNS11643-2 (Chinese traditional)"),
3920 ("CNS 11643 Plane 2 Chinese traditional"),
3921 build_string (CHINESE_CNS_PLANE_RE("2")),
3924 staticpro (&Vcharset_latin_tcvn5712);
3925 Vcharset_latin_tcvn5712 =
3926 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3927 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3928 build_string ("TCVN 5712"),
3929 build_string ("TCVN 5712 (VSCII-2)"),
3930 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3931 build_string ("tcvn5712-1"),
3933 staticpro (&Vcharset_latin_viscii_lower);
3934 Vcharset_latin_viscii_lower =
3935 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3936 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3937 build_string ("VISCII lower"),
3938 build_string ("VISCII lower (Vietnamese)"),
3939 build_string ("VISCII lower (Vietnamese)"),
3940 build_string ("MULEVISCII-LOWER"),
3942 staticpro (&Vcharset_latin_viscii_upper);
3943 Vcharset_latin_viscii_upper =
3944 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3945 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3946 build_string ("VISCII upper"),
3947 build_string ("VISCII upper (Vietnamese)"),
3948 build_string ("VISCII upper (Vietnamese)"),
3949 build_string ("MULEVISCII-UPPER"),
3951 staticpro (&Vcharset_latin_viscii);
3952 Vcharset_latin_viscii =
3953 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3954 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3955 build_string ("VISCII"),
3956 build_string ("VISCII 1.1 (Vietnamese)"),
3957 build_string ("VISCII 1.1 (Vietnamese)"),
3958 build_string ("VISCII1\\.1"),
3960 staticpro (&Vcharset_ideograph_daikanwa);
3961 Vcharset_ideograph_daikanwa =
3962 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
3963 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3964 build_string ("Daikanwa"),
3965 build_string ("Morohashi's Daikanwa"),
3966 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3967 build_string ("Daikanwa"),
3968 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3969 staticpro (&Vcharset_mojikyo);
3971 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
3972 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3973 build_string ("Mojikyo"),
3974 build_string ("Mojikyo"),
3975 build_string ("Konjaku-Mojikyo"),
3977 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
3978 staticpro (&Vcharset_mojikyo_pj_1);
3979 Vcharset_mojikyo_pj_1 =
3980 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1, 94, 2,
3981 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3982 build_string ("Mojikyo-PJ-1"),
3983 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
3985 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
3986 build_string ("jisx0208\\.Mojikyo-1$"),
3988 staticpro (&Vcharset_mojikyo_pj_2);
3989 Vcharset_mojikyo_pj_2 =
3990 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2, 94, 2,
3991 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3992 build_string ("Mojikyo-PJ-2"),
3993 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
3995 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
3996 build_string ("jisx0208\\.Mojikyo-2$"),
3998 staticpro (&Vcharset_mojikyo_pj_3);
3999 Vcharset_mojikyo_pj_3 =
4000 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3, 94, 2,
4001 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4002 build_string ("Mojikyo-PJ-3"),
4003 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
4005 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
4006 build_string ("jisx0208\\.Mojikyo-3$"),
4008 staticpro (&Vcharset_mojikyo_pj_4);
4009 Vcharset_mojikyo_pj_4 =
4010 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4, 94, 2,
4011 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4012 build_string ("Mojikyo-PJ-4"),
4013 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
4015 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
4016 build_string ("jisx0208\\.Mojikyo-4$"),
4018 staticpro (&Vcharset_mojikyo_pj_5);
4019 Vcharset_mojikyo_pj_5 =
4020 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5, 94, 2,
4021 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4022 build_string ("Mojikyo-PJ-5"),
4023 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
4025 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
4026 build_string ("jisx0208\\.Mojikyo-5$"),
4028 staticpro (&Vcharset_mojikyo_pj_6);
4029 Vcharset_mojikyo_pj_6 =
4030 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6, 94, 2,
4031 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4032 build_string ("Mojikyo-PJ-6"),
4033 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
4035 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
4036 build_string ("jisx0208\\.Mojikyo-6$"),
4038 staticpro (&Vcharset_mojikyo_pj_7);
4039 Vcharset_mojikyo_pj_7 =
4040 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7, 94, 2,
4041 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4042 build_string ("Mojikyo-PJ-7"),
4043 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
4045 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
4046 build_string ("jisx0208\\.Mojikyo-7$"),
4048 staticpro (&Vcharset_mojikyo_pj_8);
4049 Vcharset_mojikyo_pj_8 =
4050 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8, 94, 2,
4051 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4052 build_string ("Mojikyo-PJ-8"),
4053 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
4055 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
4056 build_string ("jisx0208\\.Mojikyo-8$"),
4058 staticpro (&Vcharset_mojikyo_pj_9);
4059 Vcharset_mojikyo_pj_9 =
4060 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9, 94, 2,
4061 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4062 build_string ("Mojikyo-PJ-9"),
4063 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
4065 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
4066 build_string ("jisx0208\\.Mojikyo-9$"),
4068 staticpro (&Vcharset_mojikyo_pj_10);
4069 Vcharset_mojikyo_pj_10 =
4070 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10, 94, 2,
4071 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4072 build_string ("Mojikyo-PJ-10"),
4073 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
4075 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
4076 build_string ("jisx0208\\.Mojikyo-10$"),
4078 staticpro (&Vcharset_mojikyo_pj_11);
4079 Vcharset_mojikyo_pj_11 =
4080 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11, 94, 2,
4081 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4082 build_string ("Mojikyo-PJ-11"),
4083 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
4085 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
4086 build_string ("jisx0208\\.Mojikyo-11$"),
4088 staticpro (&Vcharset_mojikyo_pj_12);
4089 Vcharset_mojikyo_pj_12 =
4090 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12, 94, 2,
4091 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4092 build_string ("Mojikyo-PJ-12"),
4093 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
4095 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
4096 build_string ("jisx0208\\.Mojikyo-12$"),
4098 staticpro (&Vcharset_mojikyo_pj_13);
4099 Vcharset_mojikyo_pj_13 =
4100 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13, 94, 2,
4101 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4102 build_string ("Mojikyo-PJ-13"),
4103 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
4105 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
4106 build_string ("jisx0208\\.Mojikyo-13$"),
4108 staticpro (&Vcharset_mojikyo_pj_14);
4109 Vcharset_mojikyo_pj_14 =
4110 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14, 94, 2,
4111 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4112 build_string ("Mojikyo-PJ-14"),
4113 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
4115 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
4116 build_string ("jisx0208\\.Mojikyo-14$"),
4118 staticpro (&Vcharset_mojikyo_pj_15);
4119 Vcharset_mojikyo_pj_15 =
4120 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15, 94, 2,
4121 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4122 build_string ("Mojikyo-PJ-15"),
4123 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
4125 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
4126 build_string ("jisx0208\\.Mojikyo-15$"),
4128 staticpro (&Vcharset_mojikyo_pj_16);
4129 Vcharset_mojikyo_pj_16 =
4130 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16, 94, 2,
4131 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4132 build_string ("Mojikyo-PJ-16"),
4133 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
4135 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
4136 build_string ("jisx0208\\.Mojikyo-16$"),
4138 staticpro (&Vcharset_mojikyo_pj_17);
4139 Vcharset_mojikyo_pj_17 =
4140 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17, 94, 2,
4141 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4142 build_string ("Mojikyo-PJ-17"),
4143 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
4145 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
4146 build_string ("jisx0208\\.Mojikyo-17$"),
4148 staticpro (&Vcharset_mojikyo_pj_18);
4149 Vcharset_mojikyo_pj_18 =
4150 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18, 94, 2,
4151 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4152 build_string ("Mojikyo-PJ-18"),
4153 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
4155 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
4156 build_string ("jisx0208\\.Mojikyo-18$"),
4158 staticpro (&Vcharset_mojikyo_pj_19);
4159 Vcharset_mojikyo_pj_19 =
4160 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19, 94, 2,
4161 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4162 build_string ("Mojikyo-PJ-19"),
4163 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
4165 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
4166 build_string ("jisx0208\\.Mojikyo-19$"),
4168 staticpro (&Vcharset_mojikyo_pj_20);
4169 Vcharset_mojikyo_pj_20 =
4170 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20, 94, 2,
4171 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4172 build_string ("Mojikyo-PJ-20"),
4173 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
4175 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
4176 build_string ("jisx0208\\.Mojikyo-20$"),
4178 staticpro (&Vcharset_mojikyo_pj_21);
4179 Vcharset_mojikyo_pj_21 =
4180 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21, 94, 2,
4181 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4182 build_string ("Mojikyo-PJ-21"),
4183 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
4185 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
4186 build_string ("jisx0208\\.Mojikyo-21$"),
4188 staticpro (&Vcharset_ethiopic_ucs);
4189 Vcharset_ethiopic_ucs =
4190 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
4191 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4192 build_string ("Ethiopic (UCS)"),
4193 build_string ("Ethiopic (UCS)"),
4194 build_string ("Ethiopic of UCS"),
4195 build_string ("Ethiopic-Unicode"),
4196 Qnil, 0x1200, 0x137F, 0x1200, 0);
4198 staticpro (&Vcharset_chinese_big5_1);
4199 Vcharset_chinese_big5_1 =
4200 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
4201 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
4202 build_string ("Big5"),
4203 build_string ("Big5 (Level-1)"),
4205 ("Big5 Level-1 Chinese traditional"),
4206 build_string ("big5"),
4208 staticpro (&Vcharset_chinese_big5_2);
4209 Vcharset_chinese_big5_2 =
4210 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
4211 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
4212 build_string ("Big5"),
4213 build_string ("Big5 (Level-2)"),
4215 ("Big5 Level-2 Chinese traditional"),
4216 build_string ("big5"),
4219 #ifdef ENABLE_COMPOSITE_CHARS
4220 /* #### For simplicity, we put composite chars into a 96x96 charset.
4221 This is going to lead to problems because you can run out of
4222 room, esp. as we don't yet recycle numbers. */
4223 staticpro (&Vcharset_composite);
4224 Vcharset_composite =
4225 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
4226 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4227 build_string ("Composite"),
4228 build_string ("Composite characters"),
4229 build_string ("Composite characters"),
4232 /* #### not dumped properly */
4233 composite_char_row_next = 32;
4234 composite_char_col_next = 32;
4236 Vcomposite_char_string2char_hash_table =
4237 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
4238 Vcomposite_char_char2string_hash_table =
4239 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
4240 staticpro (&Vcomposite_char_string2char_hash_table);
4241 staticpro (&Vcomposite_char_char2string_hash_table);
4242 #endif /* ENABLE_COMPOSITE_CHARS */