1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_japanese_jisx0208;
60 Lisp_Object Vcharset_japanese_jisx0208_1990;
61 Lisp_Object Vcharset_korean_ksc5601;
62 Lisp_Object Vcharset_japanese_jisx0212;
63 Lisp_Object Vcharset_chinese_cns11643_1;
64 Lisp_Object Vcharset_chinese_cns11643_2;
66 Lisp_Object Vcharset_ucs;
67 Lisp_Object Vcharset_ucs_bmp;
68 Lisp_Object Vcharset_latin_viscii;
69 Lisp_Object Vcharset_latin_tcvn5712;
70 Lisp_Object Vcharset_latin_viscii_lower;
71 Lisp_Object Vcharset_latin_viscii_upper;
72 Lisp_Object Vcharset_ideograph_daikanwa;
73 Lisp_Object Vcharset_mojikyo;
74 Lisp_Object Vcharset_mojikyo_2022_1;
75 Lisp_Object Vcharset_mojikyo_pj_1;
76 Lisp_Object Vcharset_mojikyo_pj_2;
77 Lisp_Object Vcharset_mojikyo_pj_3;
78 Lisp_Object Vcharset_mojikyo_pj_4;
79 Lisp_Object Vcharset_mojikyo_pj_5;
80 Lisp_Object Vcharset_mojikyo_pj_6;
81 Lisp_Object Vcharset_mojikyo_pj_7;
82 Lisp_Object Vcharset_mojikyo_pj_8;
83 Lisp_Object Vcharset_mojikyo_pj_9;
84 Lisp_Object Vcharset_mojikyo_pj_10;
85 Lisp_Object Vcharset_mojikyo_pj_11;
86 Lisp_Object Vcharset_mojikyo_pj_12;
87 Lisp_Object Vcharset_mojikyo_pj_13;
88 Lisp_Object Vcharset_mojikyo_pj_14;
89 Lisp_Object Vcharset_mojikyo_pj_15;
90 Lisp_Object Vcharset_mojikyo_pj_16;
91 Lisp_Object Vcharset_mojikyo_pj_17;
92 Lisp_Object Vcharset_mojikyo_pj_18;
93 Lisp_Object Vcharset_mojikyo_pj_19;
94 Lisp_Object Vcharset_mojikyo_pj_20;
95 Lisp_Object Vcharset_mojikyo_pj_21;
96 Lisp_Object Vcharset_ethiopic_ucs;
98 Lisp_Object Vcharset_chinese_big5_1;
99 Lisp_Object Vcharset_chinese_big5_2;
101 #ifdef ENABLE_COMPOSITE_CHARS
102 Lisp_Object Vcharset_composite;
104 /* Hash tables for composite chars. One maps string representing
105 composed chars to their equivalent chars; one goes the
107 Lisp_Object Vcomposite_char_char2string_hash_table;
108 Lisp_Object Vcomposite_char_string2char_hash_table;
110 static int composite_char_row_next;
111 static int composite_char_col_next;
113 #endif /* ENABLE_COMPOSITE_CHARS */
115 struct charset_lookup *chlook;
117 static const struct lrecord_description charset_lookup_description_1[] = {
118 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
127 static const struct struct_description charset_lookup_description = {
128 sizeof (struct charset_lookup),
129 charset_lookup_description_1
133 /* Table of number of bytes in the string representation of a character
134 indexed by the first byte of that representation.
136 rep_bytes_by_first_byte(c) is more efficient than the equivalent
137 canonical computation:
139 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
141 const Bytecount rep_bytes_by_first_byte[0xA0] =
142 { /* 0x00 - 0x7f are for straight ASCII */
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
147 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
148 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
149 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
150 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
151 /* 0x80 - 0x8f are for Dimension-1 official charsets */
153 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
155 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
157 /* 0x90 - 0x9d are for Dimension-2 official charsets */
158 /* 0x9e is for Dimension-1 private charsets */
159 /* 0x9f is for Dimension-2 private charsets */
160 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
166 #define BT_UINT8_MIN 0
167 #define BT_UINT8_MAX (UCHAR_MAX - 3)
168 #define BT_UINT8_t (UCHAR_MAX - 2)
169 #define BT_UINT8_nil (UCHAR_MAX - 1)
170 #define BT_UINT8_unbound UCHAR_MAX
172 INLINE_HEADER int INT_UINT8_P (Lisp_Object obj);
173 INLINE_HEADER int UINT8_VALUE_P (Lisp_Object obj);
174 INLINE_HEADER unsigned char UINT8_ENCODE (Lisp_Object obj);
175 INLINE_HEADER Lisp_Object UINT8_DECODE (unsigned char n);
176 INLINE_HEADER unsigned short UINT8_TO_UINT16 (unsigned char n);
179 INT_UINT8_P (Lisp_Object obj)
183 int num = XINT (obj);
185 return (BT_UINT8_MIN <= num) && (num <= BT_UINT8_MAX);
192 UINT8_VALUE_P (Lisp_Object obj)
194 return EQ (obj, Qunbound)
195 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT8_P (obj);
198 INLINE_HEADER unsigned char
199 UINT8_ENCODE (Lisp_Object obj)
201 if (EQ (obj, Qunbound))
202 return BT_UINT8_unbound;
203 else if (EQ (obj, Qnil))
205 else if (EQ (obj, Qt))
211 INLINE_HEADER Lisp_Object
212 UINT8_DECODE (unsigned char n)
214 if (n == BT_UINT8_unbound)
216 else if (n == BT_UINT8_nil)
218 else if (n == BT_UINT8_t)
225 mark_uint8_byte_table (Lisp_Object obj)
231 print_uint8_byte_table (Lisp_Object obj,
232 Lisp_Object printcharfun, int escapeflag)
234 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
236 struct gcpro gcpro1, gcpro2;
237 GCPRO2 (obj, printcharfun);
239 write_c_string ("\n#<uint8-byte-table", printcharfun);
240 for (i = 0; i < 256; i++)
242 unsigned char n = bte->property[i];
244 write_c_string ("\n ", printcharfun);
245 write_c_string (" ", printcharfun);
246 if (n == BT_UINT8_unbound)
247 write_c_string ("void", printcharfun);
248 else if (n == BT_UINT8_nil)
249 write_c_string ("nil", printcharfun);
250 else if (n == BT_UINT8_t)
251 write_c_string ("t", printcharfun);
256 sprintf (buf, "%hd", n);
257 write_c_string (buf, printcharfun);
261 write_c_string (">", printcharfun);
265 uint8_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
267 Lisp_Uint8_Byte_Table *te1 = XUINT8_BYTE_TABLE (obj1);
268 Lisp_Uint8_Byte_Table *te2 = XUINT8_BYTE_TABLE (obj2);
271 for (i = 0; i < 256; i++)
272 if (te1->property[i] != te2->property[i])
278 uint8_byte_table_hash (Lisp_Object obj, int depth)
280 Lisp_Uint8_Byte_Table *te = XUINT8_BYTE_TABLE (obj);
284 for (i = 0; i < 256; i++)
285 hash = HASH2 (hash, te->property[i]);
289 DEFINE_LRECORD_IMPLEMENTATION ("uint8-byte-table", uint8_byte_table,
290 mark_uint8_byte_table,
291 print_uint8_byte_table,
292 0, uint8_byte_table_equal,
293 uint8_byte_table_hash,
294 0 /* uint8_byte_table_description */,
295 Lisp_Uint8_Byte_Table);
298 make_uint8_byte_table (unsigned char initval)
302 Lisp_Uint8_Byte_Table *cte;
304 cte = alloc_lcrecord_type (Lisp_Uint8_Byte_Table,
305 &lrecord_uint8_byte_table);
307 for (i = 0; i < 256; i++)
308 cte->property[i] = initval;
310 XSETUINT8_BYTE_TABLE (obj, cte);
315 uint8_byte_table_same_value_p (Lisp_Object obj)
317 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
318 unsigned char v0 = bte->property[0];
321 for (i = 1; i < 256; i++)
323 if (bte->property[i] != v0)
330 #define BT_UINT16_MIN 0
331 #define BT_UINT16_MAX (USHRT_MAX - 3)
332 #define BT_UINT16_t (USHRT_MAX - 2)
333 #define BT_UINT16_nil (USHRT_MAX - 1)
334 #define BT_UINT16_unbound USHRT_MAX
336 INLINE_HEADER int INT_UINT16_P (Lisp_Object obj);
337 INLINE_HEADER int UINT16_VALUE_P (Lisp_Object obj);
338 INLINE_HEADER unsigned short UINT16_ENCODE (Lisp_Object obj);
339 INLINE_HEADER Lisp_Object UINT16_DECODE (unsigned short us);
342 INT_UINT16_P (Lisp_Object obj)
346 int num = XINT (obj);
348 return (BT_UINT16_MIN <= num) && (num <= BT_UINT16_MAX);
355 UINT16_VALUE_P (Lisp_Object obj)
357 return EQ (obj, Qunbound)
358 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT16_P (obj);
361 INLINE_HEADER unsigned short
362 UINT16_ENCODE (Lisp_Object obj)
364 if (EQ (obj, Qunbound))
365 return BT_UINT16_unbound;
366 else if (EQ (obj, Qnil))
367 return BT_UINT16_nil;
368 else if (EQ (obj, Qt))
374 INLINE_HEADER Lisp_Object
375 UINT16_DECODE (unsigned short n)
377 if (n == BT_UINT16_unbound)
379 else if (n == BT_UINT16_nil)
381 else if (n == BT_UINT16_t)
387 INLINE_HEADER unsigned short
388 UINT8_TO_UINT16 (unsigned char n)
390 if (n == BT_UINT8_unbound)
391 return BT_UINT16_unbound;
392 else if (n == BT_UINT8_nil)
393 return BT_UINT16_nil;
394 else if (n == BT_UINT8_t)
401 mark_uint16_byte_table (Lisp_Object obj)
407 print_uint16_byte_table (Lisp_Object obj,
408 Lisp_Object printcharfun, int escapeflag)
410 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
412 struct gcpro gcpro1, gcpro2;
413 GCPRO2 (obj, printcharfun);
415 write_c_string ("\n#<uint16-byte-table", printcharfun);
416 for (i = 0; i < 256; i++)
418 unsigned short n = bte->property[i];
420 write_c_string ("\n ", printcharfun);
421 write_c_string (" ", printcharfun);
422 if (n == BT_UINT16_unbound)
423 write_c_string ("void", printcharfun);
424 else if (n == BT_UINT16_nil)
425 write_c_string ("nil", printcharfun);
426 else if (n == BT_UINT16_t)
427 write_c_string ("t", printcharfun);
432 sprintf (buf, "%hd", n);
433 write_c_string (buf, printcharfun);
437 write_c_string (">", printcharfun);
441 uint16_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
443 Lisp_Uint16_Byte_Table *te1 = XUINT16_BYTE_TABLE (obj1);
444 Lisp_Uint16_Byte_Table *te2 = XUINT16_BYTE_TABLE (obj2);
447 for (i = 0; i < 256; i++)
448 if (te1->property[i] != te2->property[i])
454 uint16_byte_table_hash (Lisp_Object obj, int depth)
456 Lisp_Uint16_Byte_Table *te = XUINT16_BYTE_TABLE (obj);
460 for (i = 0; i < 256; i++)
461 hash = HASH2 (hash, te->property[i]);
465 DEFINE_LRECORD_IMPLEMENTATION ("uint16-byte-table", uint16_byte_table,
466 mark_uint16_byte_table,
467 print_uint16_byte_table,
468 0, uint16_byte_table_equal,
469 uint16_byte_table_hash,
470 0 /* uint16_byte_table_description */,
471 Lisp_Uint16_Byte_Table);
474 make_uint16_byte_table (unsigned short initval)
478 Lisp_Uint16_Byte_Table *cte;
480 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
481 &lrecord_uint16_byte_table);
483 for (i = 0; i < 256; i++)
484 cte->property[i] = initval;
486 XSETUINT16_BYTE_TABLE (obj, cte);
491 uint16_byte_table_same_value_p (Lisp_Object obj)
493 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
494 unsigned short v0 = bte->property[0];
497 for (i = 1; i < 256; i++)
499 if (bte->property[i] != v0)
507 mark_byte_table (Lisp_Object obj)
509 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
512 for (i = 0; i < 256; i++)
514 mark_object (cte->property[i]);
520 print_byte_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
522 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
524 struct gcpro gcpro1, gcpro2;
525 GCPRO2 (obj, printcharfun);
527 write_c_string ("\n#<byte-table", printcharfun);
528 for (i = 0; i < 256; i++)
530 Lisp_Object elt = bte->property[i];
532 write_c_string ("\n ", printcharfun);
533 write_c_string (" ", printcharfun);
534 if (EQ (elt, Qunbound))
535 write_c_string ("void", printcharfun);
537 print_internal (elt, printcharfun, escapeflag);
540 write_c_string (">", printcharfun);
544 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
546 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
547 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
550 for (i = 0; i < 256; i++)
551 if (BYTE_TABLE_P (cte1->property[i]))
553 if (BYTE_TABLE_P (cte2->property[i]))
555 if (!byte_table_equal (cte1->property[i],
556 cte2->property[i], depth + 1))
563 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
569 byte_table_hash (Lisp_Object obj, int depth)
571 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
573 return internal_array_hash (cte->property, 256, depth);
576 static const struct lrecord_description byte_table_description[] = {
577 { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Byte_Table, property), 256 },
581 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
586 byte_table_description,
590 make_byte_table (Lisp_Object initval)
594 Lisp_Byte_Table *cte;
596 cte = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
598 for (i = 0; i < 256; i++)
599 cte->property[i] = initval;
601 XSETBYTE_TABLE (obj, cte);
606 byte_table_same_value_p (Lisp_Object obj)
608 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
609 Lisp_Object v0 = bte->property[0];
612 for (i = 1; i < 256; i++)
614 if (!internal_equal (bte->property[i], v0, 0))
621 Lisp_Object get_byte_table (Lisp_Object table, unsigned char idx);
622 Lisp_Object put_byte_table (Lisp_Object table, unsigned char idx,
626 get_byte_table (Lisp_Object table, unsigned char idx)
628 if (UINT8_BYTE_TABLE_P (table))
629 return UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[idx]);
630 else if (UINT16_BYTE_TABLE_P (table))
631 return UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[idx]);
632 else if (BYTE_TABLE_P (table))
633 return XBYTE_TABLE(table)->property[idx];
639 put_byte_table (Lisp_Object table, unsigned char idx, Lisp_Object value)
641 if (UINT8_BYTE_TABLE_P (table))
643 if (UINT8_VALUE_P (value))
645 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
646 if (!UINT8_BYTE_TABLE_P (value) &&
647 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
648 && uint8_byte_table_same_value_p (table))
653 else if (UINT16_VALUE_P (value))
655 Lisp_Object new = make_uint16_byte_table (Qnil);
658 for (i = 0; i < 256; i++)
660 XUINT16_BYTE_TABLE(new)->property[i]
661 = UINT8_TO_UINT16 (XUINT8_BYTE_TABLE(table)->property[i]);
663 XUINT16_BYTE_TABLE(new)->property[idx] = UINT16_ENCODE (value);
668 Lisp_Object new = make_byte_table (Qnil);
671 for (i = 0; i < 256; i++)
673 XBYTE_TABLE(new)->property[i]
674 = UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[i]);
676 XBYTE_TABLE(new)->property[idx] = value;
680 else if (UINT16_BYTE_TABLE_P (table))
682 if (UINT16_VALUE_P (value))
684 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
685 if (!UINT8_BYTE_TABLE_P (value) &&
686 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
687 && uint16_byte_table_same_value_p (table))
694 Lisp_Object new = make_byte_table (Qnil);
697 for (i = 0; i < 256; i++)
699 XBYTE_TABLE(new)->property[i]
700 = UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[i]);
702 XBYTE_TABLE(new)->property[idx] = value;
706 else if (BYTE_TABLE_P (table))
708 XBYTE_TABLE(table)->property[idx] = value;
709 if (!UINT8_BYTE_TABLE_P (value) &&
710 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
711 && byte_table_same_value_p (table))
716 else if (!internal_equal (table, value, 0))
718 if (UINT8_VALUE_P (table) && UINT8_VALUE_P (value))
720 table = make_uint8_byte_table (UINT8_ENCODE (table));
721 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
723 else if (UINT16_VALUE_P (table) && UINT16_VALUE_P (value))
725 table = make_uint16_byte_table (UINT16_ENCODE (table));
726 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
730 table = make_byte_table (table);
731 XBYTE_TABLE(table)->property[idx] = value;
738 mark_char_id_table (Lisp_Object obj)
740 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
746 print_char_id_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
748 Lisp_Object table = XCHAR_ID_TABLE (obj)->table;
750 struct gcpro gcpro1, gcpro2;
751 GCPRO2 (obj, printcharfun);
753 write_c_string ("#<char-id-table ", printcharfun);
754 for (i = 0; i < 256; i++)
756 Lisp_Object elt = get_byte_table (table, i);
757 if (i != 0) write_c_string ("\n ", printcharfun);
758 if (EQ (elt, Qunbound))
759 write_c_string ("void", printcharfun);
761 print_internal (elt, printcharfun, escapeflag);
764 write_c_string (">", printcharfun);
768 char_id_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
770 Lisp_Object table1 = XCHAR_ID_TABLE (obj1)->table;
771 Lisp_Object table2 = XCHAR_ID_TABLE (obj2)->table;
774 for (i = 0; i < 256; i++)
776 if (!internal_equal (get_byte_table (table1, i),
777 get_byte_table (table2, i), 0))
784 char_id_table_hash (Lisp_Object obj, int depth)
786 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
788 return char_id_table_hash (cte->table, depth + 1);
791 static const struct lrecord_description char_id_table_description[] = {
792 { XD_LISP_OBJECT, offsetof(Lisp_Char_ID_Table, table) },
796 DEFINE_LRECORD_IMPLEMENTATION ("char-id-table", char_id_table,
799 0, char_id_table_equal,
801 char_id_table_description,
805 make_char_id_table (Lisp_Object initval)
808 Lisp_Char_ID_Table *cte;
810 cte = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
812 cte->table = make_byte_table (initval);
814 XSETCHAR_ID_TABLE (obj, cte);
820 get_char_id_table (Emchar ch, Lisp_Object table)
822 unsigned int code = ch;
829 (XCHAR_ID_TABLE (table)->table,
830 (unsigned char)(code >> 24)),
831 (unsigned char) (code >> 16)),
832 (unsigned char) (code >> 8)),
833 (unsigned char) code);
836 void put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table);
838 put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table)
840 unsigned int code = ch;
841 Lisp_Object table1, table2, table3, table4;
843 table1 = XCHAR_ID_TABLE (table)->table;
844 table2 = get_byte_table (table1, (unsigned char)(code >> 24));
845 table3 = get_byte_table (table2, (unsigned char)(code >> 16));
846 table4 = get_byte_table (table3, (unsigned char)(code >> 8));
848 table4 = put_byte_table (table4, (unsigned char)code, value);
849 table3 = put_byte_table (table3, (unsigned char)(code >> 8), table4);
850 table2 = put_byte_table (table2, (unsigned char)(code >> 16), table3);
851 XCHAR_ID_TABLE (table)->table
852 = put_byte_table (table1, (unsigned char)(code >> 24), table2);
856 Lisp_Object Vchar_attribute_hash_table;
857 Lisp_Object Vcharacter_composition_table;
858 Lisp_Object Vcharacter_variant_table;
860 Lisp_Object Qideograph_daikanwa;
861 Lisp_Object Q_decomposition;
865 Lisp_Object Qisolated;
866 Lisp_Object Qinitial;
869 Lisp_Object Qvertical;
870 Lisp_Object QnoBreak;
871 Lisp_Object Qfraction;
881 Emchar to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
883 Lisp_Object put_char_ccs_code_point (Lisp_Object character,
884 Lisp_Object ccs, Lisp_Object value);
885 Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
888 to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
894 else if (EQ (v, Qcompat))
896 else if (EQ (v, Qisolated))
898 else if (EQ (v, Qinitial))
900 else if (EQ (v, Qmedial))
902 else if (EQ (v, Qfinal))
904 else if (EQ (v, Qvertical))
906 else if (EQ (v, QnoBreak))
908 else if (EQ (v, Qfraction))
910 else if (EQ (v, Qsuper))
912 else if (EQ (v, Qsub))
914 else if (EQ (v, Qcircle))
916 else if (EQ (v, Qsquare))
918 else if (EQ (v, Qwide))
920 else if (EQ (v, Qnarrow))
922 else if (EQ (v, Qsmall))
924 else if (EQ (v, Qfont))
927 signal_simple_error (err_msg, err_arg);
930 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
931 Return character corresponding with list.
935 Lisp_Object table = Vcharacter_composition_table;
936 Lisp_Object rest = list;
940 Lisp_Object v = Fcar (rest);
942 Emchar c = to_char_id (v, "Invalid value for composition", list);
944 ret = get_char_id_table (c, table);
949 if (!CHAR_ID_TABLE_P (ret))
954 else if (!CONSP (rest))
956 else if (CHAR_ID_TABLE_P (ret))
959 signal_simple_error ("Invalid table is found with", list);
961 signal_simple_error ("Invalid value for composition", list);
964 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
965 Return variants of CHARACTER.
969 CHECK_CHAR (character);
970 return Fcopy_list (get_char_id_table (XCHAR (character),
971 Vcharacter_variant_table));
975 /* We store the char-attributes in hash tables with the names as the
976 key and the actual char-id-table object as the value. Occasionally
977 we need to use them in a list format. These routines provide us
979 struct char_attribute_list_closure
981 Lisp_Object *char_attribute_list;
985 add_char_attribute_to_list_mapper (Lisp_Object key, Lisp_Object value,
986 void *char_attribute_list_closure)
988 /* This function can GC */
989 struct char_attribute_list_closure *calcl
990 = (struct char_attribute_list_closure*) char_attribute_list_closure;
991 Lisp_Object *char_attribute_list = calcl->char_attribute_list;
993 *char_attribute_list = Fcons (key, *char_attribute_list);
997 DEFUN ("char-attribute-list", Fchar_attribute_list, 0, 0, 0, /*
998 Return the list of all existing character attributes except coded-charsets.
1002 Lisp_Object char_attribute_list = Qnil;
1003 struct gcpro gcpro1;
1004 struct char_attribute_list_closure char_attribute_list_closure;
1006 GCPRO1 (char_attribute_list);
1007 char_attribute_list_closure.char_attribute_list = &char_attribute_list;
1008 elisp_maphash (add_char_attribute_to_list_mapper,
1009 Vchar_attribute_hash_table,
1010 &char_attribute_list_closure);
1012 return char_attribute_list;
1015 DEFUN ("find-char-attribute-table", Ffind_char_attribute_table, 1, 1, 0, /*
1016 Return char-id-table corresponding to ATTRIBUTE.
1020 return Fgethash (attribute, Vchar_attribute_hash_table, Qnil);
1024 /* We store the char-id-tables in hash tables with the attributes as
1025 the key and the actual char-id-table object as the value. Each
1026 char-id-table stores values of an attribute corresponding with
1027 characters. Occasionally we need to get attributes of a character
1028 in a association-list format. These routines provide us with
1030 struct char_attribute_alist_closure
1033 Lisp_Object *char_attribute_alist;
1037 add_char_attribute_alist_mapper (Lisp_Object key, Lisp_Object value,
1038 void *char_attribute_alist_closure)
1040 /* This function can GC */
1041 struct char_attribute_alist_closure *caacl =
1042 (struct char_attribute_alist_closure*) char_attribute_alist_closure;
1043 Lisp_Object ret = get_char_id_table (caacl->char_id, value);
1044 if (!UNBOUNDP (ret))
1046 Lisp_Object *char_attribute_alist = caacl->char_attribute_alist;
1047 *char_attribute_alist
1048 = Fcons (Fcons (key, ret), *char_attribute_alist);
1053 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
1054 Return the alist of attributes of CHARACTER.
1058 Lisp_Object alist = Qnil;
1061 CHECK_CHAR (character);
1063 struct gcpro gcpro1;
1064 struct char_attribute_alist_closure char_attribute_alist_closure;
1067 char_attribute_alist_closure.char_id = XCHAR (character);
1068 char_attribute_alist_closure.char_attribute_alist = &alist;
1069 elisp_maphash (add_char_attribute_alist_mapper,
1070 Vchar_attribute_hash_table,
1071 &char_attribute_alist_closure);
1075 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1077 Lisp_Object ccs = chlook->charset_by_leading_byte[i];
1081 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1084 if ( CHAR_ID_TABLE_P (encoding_table)
1085 && INTP (cpos = get_char_id_table (XCHAR (character),
1088 alist = Fcons (Fcons (ccs, cpos), alist);
1095 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
1096 Return the value of CHARACTER's ATTRIBUTE.
1098 (character, attribute))
1102 CHECK_CHAR (character);
1103 if (!NILP (ccs = Ffind_charset (attribute)))
1105 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1107 if (CHAR_ID_TABLE_P (encoding_table))
1108 return get_char_id_table (XCHAR (character), encoding_table);
1114 Lisp_Object table = Fgethash (attribute,
1115 Vchar_attribute_hash_table,
1117 if (!UNBOUNDP (table))
1119 Lisp_Object ret = get_char_id_table (XCHAR (character), table);
1120 if (!UNBOUNDP (ret))
1127 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
1128 Store CHARACTER's ATTRIBUTE with VALUE.
1130 (character, attribute, value))
1134 CHECK_CHAR (character);
1135 ccs = Ffind_charset (attribute);
1138 return put_char_ccs_code_point (character, ccs, value);
1140 else if (EQ (attribute, Q_decomposition))
1145 signal_simple_error ("Invalid value for ->decomposition",
1148 if (CONSP (Fcdr (value)))
1150 Lisp_Object rest = value;
1151 Lisp_Object table = Vcharacter_composition_table;
1155 GET_EXTERNAL_LIST_LENGTH (rest, len);
1156 seq = make_vector (len, Qnil);
1158 while (CONSP (rest))
1160 Lisp_Object v = Fcar (rest);
1163 = to_char_id (v, "Invalid value for ->decomposition", value);
1166 XVECTOR_DATA(seq)[i++] = v;
1168 XVECTOR_DATA(seq)[i++] = make_char (c);
1172 put_char_id_table (c, character, table);
1177 ntable = get_char_id_table (c, table);
1178 if (!CHAR_ID_TABLE_P (ntable))
1180 ntable = make_char_id_table (Qnil);
1181 put_char_id_table (c, ntable, table);
1189 Lisp_Object v = Fcar (value);
1193 Emchar c = XINT (v);
1195 = get_char_id_table (c, Vcharacter_variant_table);
1197 if (NILP (Fmemq (v, ret)))
1199 put_char_id_table (c, Fcons (character, ret),
1200 Vcharacter_variant_table);
1203 seq = make_vector (1, v);
1207 else if (EQ (attribute, Q_ucs))
1213 signal_simple_error ("Invalid value for ->ucs", value);
1217 ret = get_char_id_table (c, Vcharacter_variant_table);
1218 if (NILP (Fmemq (character, ret)))
1220 put_char_id_table (c, Fcons (character, ret),
1221 Vcharacter_variant_table);
1225 Lisp_Object table = Fgethash (attribute,
1226 Vchar_attribute_hash_table,
1231 table = make_char_id_table (Qunbound);
1232 Fputhash (attribute, table, Vchar_attribute_hash_table);
1234 put_char_id_table (XCHAR (character), value, table);
1239 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
1240 Remove CHARACTER's ATTRIBUTE.
1242 (character, attribute))
1246 CHECK_CHAR (character);
1247 ccs = Ffind_charset (attribute);
1250 return remove_char_ccs (character, ccs);
1254 Lisp_Object table = Fgethash (attribute,
1255 Vchar_attribute_hash_table,
1257 if (!UNBOUNDP (table))
1259 put_char_id_table (XCHAR (character), Qunbound, table);
1266 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
1268 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
1270 /* ad-hoc method for `ascii' */
1271 if ((CHARSET_CHARS (cs) == 94) &&
1272 (CHARSET_BYTE_OFFSET (cs) != 33))
1273 return 128 - CHARSET_BYTE_OFFSET (cs);
1275 return CHARSET_CHARS (cs);
1278 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
1280 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
1282 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
1286 if (XVECTOR_LENGTH (v) > ccs_len)
1289 for (i = 0; i < XVECTOR_LENGTH (v); i++)
1291 Lisp_Object c = XVECTOR_DATA(v)[i];
1293 if (!NILP (c) && !CHARP (c))
1297 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
1309 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1312 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1322 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1323 nv = XVECTOR_DATA(v)[i];
1329 XVECTOR_DATA(v)[i] = Qnil;
1333 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1334 int code_point, Lisp_Object character);
1336 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1337 int code_point, Lisp_Object character)
1341 int ccs_len = XVECTOR_LENGTH (v);
1346 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1347 nv = XVECTOR_DATA(v)[i];
1351 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
1357 XVECTOR_DATA(v)[i] = character;
1361 put_char_ccs_code_point (Lisp_Object character,
1362 Lisp_Object ccs, Lisp_Object value)
1364 Lisp_Object encoding_table;
1366 if (!EQ (XCHARSET_NAME (ccs), Qucs)
1367 || (XCHAR (character) != XINT (value)))
1369 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
1370 int dim = XCHARSET_DIMENSION (ccs);
1371 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
1372 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
1376 { /* obsolete representation: value must be a list of bytes */
1377 Lisp_Object ret = Fcar (value);
1381 signal_simple_error ("Invalid value for coded-charset", value);
1382 code_point = XINT (ret);
1383 if (XCHARSET_GRAPHIC (ccs) == 1)
1385 rest = Fcdr (value);
1386 while (!NILP (rest))
1391 signal_simple_error ("Invalid value for coded-charset",
1395 signal_simple_error ("Invalid value for coded-charset",
1398 if (XCHARSET_GRAPHIC (ccs) == 1)
1400 code_point = (code_point << 8) | j;
1403 value = make_int (code_point);
1405 else if (INTP (value))
1407 code_point = XINT (value);
1408 if (XCHARSET_GRAPHIC (ccs) == 1)
1410 code_point &= 0x7F7F7F7F;
1411 value = make_int (code_point);
1415 signal_simple_error ("Invalid value for coded-charset", value);
1419 Lisp_Object cpos = Fget_char_attribute (character, ccs);
1422 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
1427 XCHARSET_DECODING_TABLE (ccs)
1428 = v = make_older_vector (ccs_len, Qnil);
1431 decoding_table_put_char (v, dim, byte_offset, code_point, character);
1433 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
1435 XCHARSET_ENCODING_TABLE (ccs)
1436 = encoding_table = make_char_id_table (Qnil);
1438 put_char_id_table (XCHAR (character), value, encoding_table);
1443 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
1445 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1446 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1448 if (VECTORP (decoding_table))
1450 Lisp_Object cpos = Fget_char_attribute (character, ccs);
1454 decoding_table_remove_char (decoding_table,
1455 XCHARSET_DIMENSION (ccs),
1456 XCHARSET_BYTE_OFFSET (ccs),
1460 if (CHAR_ID_TABLE_P (encoding_table))
1462 put_char_id_table (XCHAR (character), Qnil, encoding_table);
1467 EXFUN (Fmake_char, 3);
1468 EXFUN (Fdecode_char, 2);
1470 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
1471 Store character's ATTRIBUTES.
1475 Lisp_Object rest = attributes;
1476 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
1477 Lisp_Object character;
1479 Lisp_Object daikanwa = Qnil;
1484 while (CONSP (rest))
1486 Lisp_Object cell = Fcar (rest);
1490 signal_simple_error ("Invalid argument", attributes);
1491 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
1492 && ((XCHARSET_FINAL (ccs) != 0) ||
1493 (XCHARSET_UCS_MAX (ccs) > 0)) )
1497 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1499 character = Fdecode_char (ccs, cell);
1500 goto setup_attributes;
1504 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
1507 signal_simple_error ("Invalid argument", attributes);
1509 character = make_char (XINT (code) + 0x100000);
1510 goto setup_attributes;
1514 else if (!INTP (code))
1515 signal_simple_error ("Invalid argument", attributes);
1517 character = make_char (XINT (code));
1521 while (CONSP (rest))
1523 Lisp_Object cell = Fcar (rest);
1525 Lisp_Object key = Fcar (cell);
1526 Lisp_Object value = Fcdr (cell);
1530 signal_simple_error ("Invalid argument", attributes);
1533 if (EQ (key, Qmorohashi_daikanwa))
1536 GET_EXTERNAL_LIST_LENGTH (value, len);
1540 if (NILP (daikanwa))
1541 daikanwa = Fcdr (Fassq (Qideograph_daikanwa, rest));
1542 if (EQ (Fcar (value), daikanwa))
1546 else if (EQ (key, Qideograph_daikanwa))
1550 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
1559 Lisp_Object Vutf_2000_version;
1563 int leading_code_private_11;
1566 Lisp_Object Qcharsetp;
1568 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1569 Lisp_Object Qregistry, Qfinal, Qgraphic;
1570 Lisp_Object Qdirection;
1571 Lisp_Object Qreverse_direction_charset;
1572 Lisp_Object Qleading_byte;
1573 Lisp_Object Qshort_name, Qlong_name;
1587 Qcyrillic_iso8859_5,
1589 Qjapanese_jisx0208_1978,
1592 Qjapanese_jisx0208_1990,
1595 Qchinese_cns11643_1,
1596 Qchinese_cns11643_2,
1601 Qlatin_viscii_lower,
1602 Qlatin_viscii_upper,
1603 Qvietnamese_viscii_lower,
1604 Qvietnamese_viscii_upper,
1634 Lisp_Object Ql2r, Qr2l;
1636 Lisp_Object Vcharset_hash_table;
1638 /* Composite characters are characters constructed by overstriking two
1639 or more regular characters.
1641 1) The old Mule implementation involves storing composite characters
1642 in a buffer as a tag followed by all of the actual characters
1643 used to make up the composite character. I think this is a bad
1644 idea; it greatly complicates code that wants to handle strings
1645 one character at a time because it has to deal with the possibility
1646 of great big ungainly characters. It's much more reasonable to
1647 simply store an index into a table of composite characters.
1649 2) The current implementation only allows for 16,384 separate
1650 composite characters over the lifetime of the XEmacs process.
1651 This could become a potential problem if the user
1652 edited lots of different files that use composite characters.
1653 Due to FSF bogosity, increasing the number of allowable
1654 composite characters under Mule would decrease the number
1655 of possible faces that can exist. Mule already has shrunk
1656 this to 2048, and further shrinkage would become uncomfortable.
1657 No such problems exist in XEmacs.
1659 Composite characters could be represented as 0x80 C1 C2 C3,
1660 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1661 for slightly under 2^20 (one million) composite characters
1662 over the XEmacs process lifetime, and you only need to
1663 increase the size of a Mule character from 19 to 21 bits.
1664 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1665 85 million (slightly over 2^26) composite characters. */
1668 /************************************************************************/
1669 /* Basic Emchar functions */
1670 /************************************************************************/
1672 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1673 string in STR. Returns the number of bytes stored.
1674 Do not call this directly. Use the macro set_charptr_emchar() instead.
1678 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1684 Lisp_Object charset;
1693 else if ( c <= 0x7ff )
1695 *p++ = (c >> 6) | 0xc0;
1696 *p++ = (c & 0x3f) | 0x80;
1698 else if ( c <= 0xffff )
1700 *p++ = (c >> 12) | 0xe0;
1701 *p++ = ((c >> 6) & 0x3f) | 0x80;
1702 *p++ = (c & 0x3f) | 0x80;
1704 else if ( c <= 0x1fffff )
1706 *p++ = (c >> 18) | 0xf0;
1707 *p++ = ((c >> 12) & 0x3f) | 0x80;
1708 *p++ = ((c >> 6) & 0x3f) | 0x80;
1709 *p++ = (c & 0x3f) | 0x80;
1711 else if ( c <= 0x3ffffff )
1713 *p++ = (c >> 24) | 0xf8;
1714 *p++ = ((c >> 18) & 0x3f) | 0x80;
1715 *p++ = ((c >> 12) & 0x3f) | 0x80;
1716 *p++ = ((c >> 6) & 0x3f) | 0x80;
1717 *p++ = (c & 0x3f) | 0x80;
1721 *p++ = (c >> 30) | 0xfc;
1722 *p++ = ((c >> 24) & 0x3f) | 0x80;
1723 *p++ = ((c >> 18) & 0x3f) | 0x80;
1724 *p++ = ((c >> 12) & 0x3f) | 0x80;
1725 *p++ = ((c >> 6) & 0x3f) | 0x80;
1726 *p++ = (c & 0x3f) | 0x80;
1729 BREAKUP_CHAR (c, charset, c1, c2);
1730 lb = CHAR_LEADING_BYTE (c);
1731 if (LEADING_BYTE_PRIVATE_P (lb))
1732 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1734 if (EQ (charset, Vcharset_control_1))
1743 /* Return the first character from a Mule-encoded string in STR,
1744 assuming it's non-ASCII. Do not call this directly.
1745 Use the macro charptr_emchar() instead. */
1748 non_ascii_charptr_emchar (const Bufbyte *str)
1761 else if ( b >= 0xf8 )
1766 else if ( b >= 0xf0 )
1771 else if ( b >= 0xe0 )
1776 else if ( b >= 0xc0 )
1786 for( ; len > 0; len-- )
1789 ch = ( ch << 6 ) | ( b & 0x3f );
1793 Bufbyte i0 = *str, i1, i2 = 0;
1794 Lisp_Object charset;
1796 if (i0 == LEADING_BYTE_CONTROL_1)
1797 return (Emchar) (*++str - 0x20);
1799 if (LEADING_BYTE_PREFIX_P (i0))
1804 charset = CHARSET_BY_LEADING_BYTE (i0);
1805 if (XCHARSET_DIMENSION (charset) == 2)
1808 return MAKE_CHAR (charset, i1, i2);
1812 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1813 Do not call this directly. Use the macro valid_char_p() instead. */
1817 non_ascii_valid_char_p (Emchar ch)
1821 /* Must have only lowest 19 bits set */
1825 f1 = CHAR_FIELD1 (ch);
1826 f2 = CHAR_FIELD2 (ch);
1827 f3 = CHAR_FIELD3 (ch);
1831 Lisp_Object charset;
1833 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1834 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1835 f2 > MAX_CHAR_FIELD2_PRIVATE)
1840 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1841 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1845 NOTE: This takes advantage of the fact that
1846 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1847 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1849 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1850 if (EQ (charset, Qnil))
1852 return (XCHARSET_CHARS (charset) == 96);
1856 Lisp_Object charset;
1858 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1859 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1860 f1 > MAX_CHAR_FIELD1_PRIVATE)
1862 if (f2 < 0x20 || f3 < 0x20)
1865 #ifdef ENABLE_COMPOSITE_CHARS
1866 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1868 if (UNBOUNDP (Fgethash (make_int (ch),
1869 Vcomposite_char_char2string_hash_table,
1874 #endif /* ENABLE_COMPOSITE_CHARS */
1876 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1877 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1880 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1882 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1885 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1887 if (EQ (charset, Qnil))
1889 return (XCHARSET_CHARS (charset) == 96);
1895 /************************************************************************/
1896 /* Basic string functions */
1897 /************************************************************************/
1899 /* Copy the character pointed to by PTR into STR, assuming it's
1900 non-ASCII. Do not call this directly. Use the macro
1901 charptr_copy_char() instead. */
1904 non_ascii_charptr_copy_char (const Bufbyte *ptr, Bufbyte *str)
1906 Bufbyte *strptr = str;
1908 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1910 /* Notice fallthrough. */
1912 case 6: *++strptr = *ptr++;
1913 case 5: *++strptr = *ptr++;
1915 case 4: *++strptr = *ptr++;
1916 case 3: *++strptr = *ptr++;
1917 case 2: *++strptr = *ptr;
1922 return strptr + 1 - str;
1926 /************************************************************************/
1927 /* streams of Emchars */
1928 /************************************************************************/
1930 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1931 The functions below are not meant to be called directly; use
1932 the macros in insdel.h. */
1935 Lstream_get_emchar_1 (Lstream *stream, int ch)
1937 Bufbyte str[MAX_EMCHAR_LEN];
1938 Bufbyte *strptr = str;
1940 str[0] = (Bufbyte) ch;
1941 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1943 /* Notice fallthrough. */
1946 ch = Lstream_getc (stream);
1948 *++strptr = (Bufbyte) ch;
1950 ch = Lstream_getc (stream);
1952 *++strptr = (Bufbyte) ch;
1955 ch = Lstream_getc (stream);
1957 *++strptr = (Bufbyte) ch;
1959 ch = Lstream_getc (stream);
1961 *++strptr = (Bufbyte) ch;
1963 ch = Lstream_getc (stream);
1965 *++strptr = (Bufbyte) ch;
1970 return charptr_emchar (str);
1974 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1976 Bufbyte str[MAX_EMCHAR_LEN];
1977 Bytecount len = set_charptr_emchar (str, ch);
1978 return Lstream_write (stream, str, len);
1982 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1984 Bufbyte str[MAX_EMCHAR_LEN];
1985 Bytecount len = set_charptr_emchar (str, ch);
1986 Lstream_unread (stream, str, len);
1990 /************************************************************************/
1991 /* charset object */
1992 /************************************************************************/
1995 mark_charset (Lisp_Object obj)
1997 Lisp_Charset *cs = XCHARSET (obj);
1999 mark_object (cs->short_name);
2000 mark_object (cs->long_name);
2001 mark_object (cs->doc_string);
2002 mark_object (cs->registry);
2003 mark_object (cs->ccl_program);
2005 mark_object (cs->encoding_table);
2006 /* mark_object (cs->decoding_table); */
2012 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
2014 Lisp_Charset *cs = XCHARSET (obj);
2018 error ("printing unreadable object #<charset %s 0x%x>",
2019 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
2022 write_c_string ("#<charset ", printcharfun);
2023 print_internal (CHARSET_NAME (cs), printcharfun, 0);
2024 write_c_string (" ", printcharfun);
2025 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
2026 write_c_string (" ", printcharfun);
2027 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
2028 write_c_string (" ", printcharfun);
2029 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
2030 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
2032 CHARSET_DIMENSION (cs),
2033 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
2034 CHARSET_COLUMNS (cs),
2035 CHARSET_GRAPHIC (cs),
2036 CHARSET_FINAL (cs));
2037 write_c_string (buf, printcharfun);
2038 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
2039 sprintf (buf, " 0x%x>", cs->header.uid);
2040 write_c_string (buf, printcharfun);
2043 static const struct lrecord_description charset_description[] = {
2044 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
2045 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
2046 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
2047 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
2048 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
2049 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
2050 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
2052 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
2053 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
2058 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
2059 mark_charset, print_charset, 0, 0, 0,
2060 charset_description,
2062 /* Make a new charset. */
2065 make_charset (Charset_ID id, Lisp_Object name,
2066 unsigned short chars, unsigned char dimension,
2067 unsigned char columns, unsigned char graphic,
2068 Bufbyte final, unsigned char direction, Lisp_Object short_name,
2069 Lisp_Object long_name, Lisp_Object doc,
2071 Lisp_Object decoding_table,
2072 Emchar ucs_min, Emchar ucs_max,
2073 Emchar code_offset, unsigned char byte_offset)
2076 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
2080 XSETCHARSET (obj, cs);
2082 CHARSET_ID (cs) = id;
2083 CHARSET_NAME (cs) = name;
2084 CHARSET_SHORT_NAME (cs) = short_name;
2085 CHARSET_LONG_NAME (cs) = long_name;
2086 CHARSET_CHARS (cs) = chars;
2087 CHARSET_DIMENSION (cs) = dimension;
2088 CHARSET_DIRECTION (cs) = direction;
2089 CHARSET_COLUMNS (cs) = columns;
2090 CHARSET_GRAPHIC (cs) = graphic;
2091 CHARSET_FINAL (cs) = final;
2092 CHARSET_DOC_STRING (cs) = doc;
2093 CHARSET_REGISTRY (cs) = reg;
2094 CHARSET_CCL_PROGRAM (cs) = Qnil;
2095 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
2097 CHARSET_DECODING_TABLE(cs) = Qnil;
2098 CHARSET_ENCODING_TABLE(cs) = Qnil;
2099 CHARSET_UCS_MIN(cs) = ucs_min;
2100 CHARSET_UCS_MAX(cs) = ucs_max;
2101 CHARSET_CODE_OFFSET(cs) = code_offset;
2102 CHARSET_BYTE_OFFSET(cs) = byte_offset;
2106 if (id == LEADING_BYTE_ASCII)
2107 CHARSET_REP_BYTES (cs) = 1;
2109 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
2111 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
2116 /* some charsets do not have final characters. This includes
2117 ASCII, Control-1, Composite, and the two faux private
2119 unsigned char iso2022_type
2120 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
2122 if (code_offset == 0)
2124 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
2125 chlook->charset_by_attributes[iso2022_type][final] = obj;
2129 (chlook->charset_by_attributes[iso2022_type][final][direction]));
2130 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
2134 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
2135 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
2137 /* Some charsets are "faux" and don't have names or really exist at
2138 all except in the leading-byte table. */
2140 Fputhash (name, obj, Vcharset_hash_table);
2145 get_unallocated_leading_byte (int dimension)
2150 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
2153 lb = chlook->next_allocated_leading_byte++;
2157 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
2160 lb = chlook->next_allocated_1_byte_leading_byte++;
2164 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
2167 lb = chlook->next_allocated_2_byte_leading_byte++;
2173 ("No more character sets free for this dimension",
2174 make_int (dimension));
2181 make_builtin_char (Lisp_Object charset, int c1, int c2)
2183 if (XCHARSET_UCS_MAX (charset))
2186 = (XCHARSET_DIMENSION (charset) == 1
2188 c1 - XCHARSET_BYTE_OFFSET (charset)
2190 (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
2191 + c2 - XCHARSET_BYTE_OFFSET (charset))
2192 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
2193 if ((code < XCHARSET_UCS_MIN (charset))
2194 || (XCHARSET_UCS_MAX (charset) < code))
2195 signal_simple_error ("Arguments makes invalid character",
2199 else if (XCHARSET_DIMENSION (charset) == 1)
2201 switch (XCHARSET_CHARS (charset))
2205 + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
2208 + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
2215 switch (XCHARSET_CHARS (charset))
2218 return MIN_CHAR_94x94
2219 + (XCHARSET_FINAL (charset) - '0') * 94 * 94
2220 + (c1 - 33) * 94 + (c2 - 33);
2222 return MIN_CHAR_96x96
2223 + (XCHARSET_FINAL (charset) - '0') * 96 * 96
2224 + (c1 - 32) * 96 + (c2 - 32);
2232 range_charset_code_point (Lisp_Object charset, Emchar ch)
2236 if ((XCHARSET_UCS_MIN (charset) <= ch)
2237 && (ch <= XCHARSET_UCS_MAX (charset)))
2239 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
2241 if (XCHARSET_CHARS (charset) == 256)
2243 else if (XCHARSET_DIMENSION (charset) == 1)
2244 return d + XCHARSET_BYTE_OFFSET (charset);
2245 else if (XCHARSET_DIMENSION (charset) == 2)
2247 ((d / XCHARSET_CHARS (charset)
2248 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2249 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2250 else if (XCHARSET_DIMENSION (charset) == 3)
2252 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2253 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2254 | ((d / XCHARSET_CHARS (charset)
2255 % XCHARSET_CHARS (charset)
2256 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2257 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2258 else /* if (XCHARSET_DIMENSION (charset) == 4) */
2260 ((d / (XCHARSET_CHARS (charset)
2261 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2262 + XCHARSET_BYTE_OFFSET (charset)) << 24)
2263 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2264 % XCHARSET_CHARS (charset)
2265 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2266 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
2267 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2268 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2270 else if (XCHARSET_CODE_OFFSET (charset) == 0)
2272 if (XCHARSET_DIMENSION (charset) == 1)
2274 if (XCHARSET_CHARS (charset) == 94)
2276 if (((d = ch - (MIN_CHAR_94
2277 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
2281 else if (XCHARSET_CHARS (charset) == 96)
2283 if (((d = ch - (MIN_CHAR_96
2284 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
2291 else if (XCHARSET_DIMENSION (charset) == 2)
2293 if (XCHARSET_CHARS (charset) == 94)
2295 if (((d = ch - (MIN_CHAR_94x94
2296 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
2299 return (((d / 94) + 33) << 8) | (d % 94 + 33);
2301 else if (XCHARSET_CHARS (charset) == 96)
2303 if (((d = ch - (MIN_CHAR_96x96
2304 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
2307 return (((d / 96) + 32) << 8) | (d % 96 + 32);
2313 if (EQ (charset, Vcharset_mojikyo_2022_1)
2314 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
2316 int m = ch - MIN_CHAR_MOJIKYO - 1;
2317 int byte1 = m / (94 * 60) + 33;
2318 int byte2 = (m % (94 * 60)) / 94;
2319 int byte3 = m % 94 + 33;
2325 return (byte1 << 16) | (byte2 << 8) | byte3;
2331 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
2333 if (c <= MAX_CHAR_BASIC_LATIN)
2335 *charset = Vcharset_ascii;
2340 *charset = Vcharset_control_1;
2345 *charset = Vcharset_latin_iso8859_1;
2349 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
2351 *charset = Vcharset_greek_iso8859_7;
2352 return c - MIN_CHAR_GREEK + 0x20;
2354 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
2356 *charset = Vcharset_cyrillic_iso8859_5;
2357 return c - MIN_CHAR_CYRILLIC + 0x20;
2360 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
2362 *charset = Vcharset_hebrew_iso8859_8;
2363 return c - MIN_CHAR_HEBREW + 0x20;
2365 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
2367 *charset = Vcharset_thai_tis620;
2368 return c - MIN_CHAR_THAI + 0x20;
2371 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
2372 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
2374 return list2 (Vcharset_katakana_jisx0201,
2375 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
2378 else if (c <= MAX_CHAR_BMP)
2380 *charset = Vcharset_ucs_bmp;
2383 else if (c < MIN_CHAR_DAIKANWA)
2385 *charset = Vcharset_ucs;
2389 else if (c <= MAX_CHAR_DAIKANWA)
2391 *charset = Vcharset_ideograph_daikanwa;
2392 return c - MIN_CHAR_DAIKANWA;
2395 else if (c <= MAX_CHAR_MOJIKYO)
2397 *charset = Vcharset_mojikyo;
2398 return c - MIN_CHAR_MOJIKYO;
2400 else if (c < MIN_CHAR_94)
2402 *charset = Vcharset_ucs;
2405 else if (c <= MAX_CHAR_94)
2407 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
2408 ((c - MIN_CHAR_94) / 94) + '0',
2409 CHARSET_LEFT_TO_RIGHT);
2410 if (!NILP (*charset))
2411 return ((c - MIN_CHAR_94) % 94) + 33;
2414 *charset = Vcharset_ucs;
2418 else if (c <= MAX_CHAR_96)
2420 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
2421 ((c - MIN_CHAR_96) / 96) + '0',
2422 CHARSET_LEFT_TO_RIGHT);
2423 if (!NILP (*charset))
2424 return ((c - MIN_CHAR_96) % 96) + 32;
2427 *charset = Vcharset_ucs;
2431 else if (c <= MAX_CHAR_94x94)
2434 = CHARSET_BY_ATTRIBUTES (94, 2,
2435 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
2436 CHARSET_LEFT_TO_RIGHT);
2437 if (!NILP (*charset))
2438 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
2439 | (((c - MIN_CHAR_94x94) % 94) + 33);
2442 *charset = Vcharset_ucs;
2446 else if (c <= MAX_CHAR_96x96)
2449 = CHARSET_BY_ATTRIBUTES (96, 2,
2450 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
2451 CHARSET_LEFT_TO_RIGHT);
2452 if (!NILP (*charset))
2453 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
2454 | (((c - MIN_CHAR_96x96) % 96) + 32);
2457 *charset = Vcharset_ucs;
2463 *charset = Vcharset_ucs;
2468 Lisp_Object Vdefault_coded_charset_priority_list;
2472 /************************************************************************/
2473 /* Basic charset Lisp functions */
2474 /************************************************************************/
2476 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
2477 Return non-nil if OBJECT is a charset.
2481 return CHARSETP (object) ? Qt : Qnil;
2484 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
2485 Retrieve the charset of the given name.
2486 If CHARSET-OR-NAME is a charset object, it is simply returned.
2487 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
2488 nil is returned. Otherwise the associated charset object is returned.
2492 if (CHARSETP (charset_or_name))
2493 return charset_or_name;
2495 CHECK_SYMBOL (charset_or_name);
2496 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
2499 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
2500 Retrieve the charset of the given name.
2501 Same as `find-charset' except an error is signalled if there is no such
2502 charset instead of returning nil.
2506 Lisp_Object charset = Ffind_charset (name);
2509 signal_simple_error ("No such charset", name);
2513 /* We store the charsets in hash tables with the names as the key and the
2514 actual charset object as the value. Occasionally we need to use them
2515 in a list format. These routines provide us with that. */
2516 struct charset_list_closure
2518 Lisp_Object *charset_list;
2522 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
2523 void *charset_list_closure)
2525 /* This function can GC */
2526 struct charset_list_closure *chcl =
2527 (struct charset_list_closure*) charset_list_closure;
2528 Lisp_Object *charset_list = chcl->charset_list;
2530 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
2534 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
2535 Return a list of the names of all defined charsets.
2539 Lisp_Object charset_list = Qnil;
2540 struct gcpro gcpro1;
2541 struct charset_list_closure charset_list_closure;
2543 GCPRO1 (charset_list);
2544 charset_list_closure.charset_list = &charset_list;
2545 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
2546 &charset_list_closure);
2549 return charset_list;
2552 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
2553 Return the name of the given charset.
2557 return XCHARSET_NAME (Fget_charset (charset));
2560 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
2561 Define a new character set.
2562 This function is for use with Mule support.
2563 NAME is a symbol, the name by which the character set is normally referred.
2564 DOC-STRING is a string describing the character set.
2565 PROPS is a property list, describing the specific nature of the
2566 character set. Recognized properties are:
2568 'short-name Short version of the charset name (ex: Latin-1)
2569 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
2570 'registry A regular expression matching the font registry field for
2572 'dimension Number of octets used to index a character in this charset.
2573 Either 1 or 2. Defaults to 1.
2574 'columns Number of columns used to display a character in this charset.
2575 Only used in TTY mode. (Under X, the actual width of a
2576 character can be derived from the font used to display the
2577 characters.) If unspecified, defaults to the dimension
2578 (this is almost always the correct value).
2579 'chars Number of characters in each dimension (94 or 96).
2580 Defaults to 94. Note that if the dimension is 2, the
2581 character set thus described is 94x94 or 96x96.
2582 'final Final byte of ISO 2022 escape sequence. Must be
2583 supplied. Each combination of (DIMENSION, CHARS) defines a
2584 separate namespace for final bytes. Note that ISO
2585 2022 restricts the final byte to the range
2586 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2587 dimension == 2. Note also that final bytes in the range
2588 0x30 - 0x3F are reserved for user-defined (not official)
2590 'graphic 0 (use left half of font on output) or 1 (use right half
2591 of font on output). Defaults to 0. For example, for
2592 a font whose registry is ISO8859-1, the left half
2593 (octets 0x20 - 0x7F) is the `ascii' character set, while
2594 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2595 character set. With 'graphic set to 0, the octets
2596 will have their high bit cleared; with it set to 1,
2597 the octets will have their high bit set.
2598 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2600 'ccl-program A compiled CCL program used to convert a character in
2601 this charset into an index into the font. This is in
2602 addition to the 'graphic property. The CCL program
2603 is passed the octets of the character, with the high
2604 bit cleared and set depending upon whether the value
2605 of the 'graphic property is 0 or 1.
2607 (name, doc_string, props))
2609 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2610 int direction = CHARSET_LEFT_TO_RIGHT;
2611 Lisp_Object registry = Qnil;
2612 Lisp_Object charset;
2613 Lisp_Object rest, keyword, value;
2614 Lisp_Object ccl_program = Qnil;
2615 Lisp_Object short_name = Qnil, long_name = Qnil;
2616 int byte_offset = -1;
2618 CHECK_SYMBOL (name);
2619 if (!NILP (doc_string))
2620 CHECK_STRING (doc_string);
2622 charset = Ffind_charset (name);
2623 if (!NILP (charset))
2624 signal_simple_error ("Cannot redefine existing charset", name);
2626 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
2628 if (EQ (keyword, Qshort_name))
2630 CHECK_STRING (value);
2634 if (EQ (keyword, Qlong_name))
2636 CHECK_STRING (value);
2640 else if (EQ (keyword, Qdimension))
2643 dimension = XINT (value);
2644 if (dimension < 1 || dimension > 2)
2645 signal_simple_error ("Invalid value for 'dimension", value);
2648 else if (EQ (keyword, Qchars))
2651 chars = XINT (value);
2652 if (chars != 94 && chars != 96)
2653 signal_simple_error ("Invalid value for 'chars", value);
2656 else if (EQ (keyword, Qcolumns))
2659 columns = XINT (value);
2660 if (columns != 1 && columns != 2)
2661 signal_simple_error ("Invalid value for 'columns", value);
2664 else if (EQ (keyword, Qgraphic))
2667 graphic = XINT (value);
2669 if (graphic < 0 || graphic > 2)
2671 if (graphic < 0 || graphic > 1)
2673 signal_simple_error ("Invalid value for 'graphic", value);
2676 else if (EQ (keyword, Qregistry))
2678 CHECK_STRING (value);
2682 else if (EQ (keyword, Qdirection))
2684 if (EQ (value, Ql2r))
2685 direction = CHARSET_LEFT_TO_RIGHT;
2686 else if (EQ (value, Qr2l))
2687 direction = CHARSET_RIGHT_TO_LEFT;
2689 signal_simple_error ("Invalid value for 'direction", value);
2692 else if (EQ (keyword, Qfinal))
2694 CHECK_CHAR_COERCE_INT (value);
2695 final = XCHAR (value);
2696 if (final < '0' || final > '~')
2697 signal_simple_error ("Invalid value for 'final", value);
2700 else if (EQ (keyword, Qccl_program))
2702 CHECK_VECTOR (value);
2703 ccl_program = value;
2707 signal_simple_error ("Unrecognized property", keyword);
2711 error ("'final must be specified");
2712 if (dimension == 2 && final > 0x5F)
2714 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2717 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2718 CHARSET_LEFT_TO_RIGHT)) ||
2719 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2720 CHARSET_RIGHT_TO_LEFT)))
2722 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2724 id = get_unallocated_leading_byte (dimension);
2726 if (NILP (doc_string))
2727 doc_string = build_string ("");
2729 if (NILP (registry))
2730 registry = build_string ("");
2732 if (NILP (short_name))
2733 XSETSTRING (short_name, XSYMBOL (name)->name);
2735 if (NILP (long_name))
2736 long_name = doc_string;
2739 columns = dimension;
2741 if (byte_offset < 0)
2745 else if (chars == 96)
2751 charset = make_charset (id, name, chars, dimension, columns, graphic,
2752 final, direction, short_name, long_name,
2753 doc_string, registry,
2754 Qnil, 0, 0, 0, byte_offset);
2755 if (!NILP (ccl_program))
2756 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2760 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2762 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2763 NEW-NAME is the name of the new charset. Return the new charset.
2765 (charset, new_name))
2767 Lisp_Object new_charset = Qnil;
2768 int id, chars, dimension, columns, graphic, final;
2770 Lisp_Object registry, doc_string, short_name, long_name;
2773 charset = Fget_charset (charset);
2774 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2775 signal_simple_error ("Charset already has reverse-direction charset",
2778 CHECK_SYMBOL (new_name);
2779 if (!NILP (Ffind_charset (new_name)))
2780 signal_simple_error ("Cannot redefine existing charset", new_name);
2782 cs = XCHARSET (charset);
2784 chars = CHARSET_CHARS (cs);
2785 dimension = CHARSET_DIMENSION (cs);
2786 columns = CHARSET_COLUMNS (cs);
2787 id = get_unallocated_leading_byte (dimension);
2789 graphic = CHARSET_GRAPHIC (cs);
2790 final = CHARSET_FINAL (cs);
2791 direction = CHARSET_RIGHT_TO_LEFT;
2792 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2793 direction = CHARSET_LEFT_TO_RIGHT;
2794 doc_string = CHARSET_DOC_STRING (cs);
2795 short_name = CHARSET_SHORT_NAME (cs);
2796 long_name = CHARSET_LONG_NAME (cs);
2797 registry = CHARSET_REGISTRY (cs);
2799 new_charset = make_charset (id, new_name, chars, dimension, columns,
2800 graphic, final, direction, short_name, long_name,
2801 doc_string, registry,
2803 CHARSET_DECODING_TABLE(cs),
2804 CHARSET_UCS_MIN(cs),
2805 CHARSET_UCS_MAX(cs),
2806 CHARSET_CODE_OFFSET(cs),
2807 CHARSET_BYTE_OFFSET(cs)
2813 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2814 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2819 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2820 Define symbol ALIAS as an alias for CHARSET.
2824 CHECK_SYMBOL (alias);
2825 charset = Fget_charset (charset);
2826 return Fputhash (alias, charset, Vcharset_hash_table);
2829 /* #### Reverse direction charsets not yet implemented. */
2831 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2833 Return the reverse-direction charset parallel to CHARSET, if any.
2834 This is the charset with the same properties (in particular, the same
2835 dimension, number of characters per dimension, and final byte) as
2836 CHARSET but whose characters are displayed in the opposite direction.
2840 charset = Fget_charset (charset);
2841 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2845 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2846 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2847 If DIRECTION is omitted, both directions will be checked (left-to-right
2848 will be returned if character sets exist for both directions).
2850 (dimension, chars, final, direction))
2852 int dm, ch, fi, di = -1;
2853 Lisp_Object obj = Qnil;
2855 CHECK_INT (dimension);
2856 dm = XINT (dimension);
2857 if (dm < 1 || dm > 2)
2858 signal_simple_error ("Invalid value for DIMENSION", dimension);
2862 if (ch != 94 && ch != 96)
2863 signal_simple_error ("Invalid value for CHARS", chars);
2865 CHECK_CHAR_COERCE_INT (final);
2867 if (fi < '0' || fi > '~')
2868 signal_simple_error ("Invalid value for FINAL", final);
2870 if (EQ (direction, Ql2r))
2871 di = CHARSET_LEFT_TO_RIGHT;
2872 else if (EQ (direction, Qr2l))
2873 di = CHARSET_RIGHT_TO_LEFT;
2874 else if (!NILP (direction))
2875 signal_simple_error ("Invalid value for DIRECTION", direction);
2877 if (dm == 2 && fi > 0x5F)
2879 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2883 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
2885 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
2888 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
2891 return XCHARSET_NAME (obj);
2895 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2896 Return short name of CHARSET.
2900 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2903 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2904 Return long name of CHARSET.
2908 return XCHARSET_LONG_NAME (Fget_charset (charset));
2911 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2912 Return description of CHARSET.
2916 return XCHARSET_DOC_STRING (Fget_charset (charset));
2919 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2920 Return dimension of CHARSET.
2924 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2927 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2928 Return property PROP of CHARSET.
2929 Recognized properties are those listed in `make-charset', as well as
2930 'name and 'doc-string.
2936 charset = Fget_charset (charset);
2937 cs = XCHARSET (charset);
2939 CHECK_SYMBOL (prop);
2940 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2941 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2942 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2943 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2944 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2945 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2946 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2947 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2948 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2949 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2950 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2951 if (EQ (prop, Qdirection))
2952 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2953 if (EQ (prop, Qreverse_direction_charset))
2955 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2959 return XCHARSET_NAME (obj);
2961 signal_simple_error ("Unrecognized charset property name", prop);
2962 return Qnil; /* not reached */
2965 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2966 Return charset identification number of CHARSET.
2970 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2973 /* #### We need to figure out which properties we really want to
2976 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2977 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2979 (charset, ccl_program))
2981 charset = Fget_charset (charset);
2982 CHECK_VECTOR (ccl_program);
2983 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2988 invalidate_charset_font_caches (Lisp_Object charset)
2990 /* Invalidate font cache entries for charset on all devices. */
2991 Lisp_Object devcons, concons, hash_table;
2992 DEVICE_LOOP_NO_BREAK (devcons, concons)
2994 struct device *d = XDEVICE (XCAR (devcons));
2995 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
2996 if (!UNBOUNDP (hash_table))
2997 Fclrhash (hash_table);
3001 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
3002 Set the 'registry property of CHARSET to REGISTRY.
3004 (charset, registry))
3006 charset = Fget_charset (charset);
3007 CHECK_STRING (registry);
3008 XCHARSET_REGISTRY (charset) = registry;
3009 invalidate_charset_font_caches (charset);
3010 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
3015 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
3016 Return mapping-table of CHARSET.
3020 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
3023 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
3024 Set mapping-table of CHARSET to TABLE.
3028 struct Lisp_Charset *cs;
3032 charset = Fget_charset (charset);
3033 cs = XCHARSET (charset);
3037 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
3038 make_vector_newer (CHARSET_DECODING_TABLE(cs));
3039 CHARSET_DECODING_TABLE(cs) = Qnil;
3042 else if (VECTORP (table))
3044 int ccs_len = CHARSET_BYTE_SIZE (cs);
3045 int ret = decoding_table_check_elements (table,
3046 CHARSET_DIMENSION (cs),
3051 signal_simple_error ("Too big table", table);
3053 signal_simple_error ("Invalid element is found", table);
3055 signal_simple_error ("Something wrong", table);
3057 CHARSET_DECODING_TABLE(cs) = Qnil;
3060 signal_error (Qwrong_type_argument,
3061 list2 (build_translated_string ("vector-or-nil-p"),
3064 byte_offset = CHARSET_BYTE_OFFSET (cs);
3065 switch (CHARSET_DIMENSION (cs))
3068 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3070 Lisp_Object c = XVECTOR_DATA(table)[i];
3073 put_char_ccs_code_point (c, charset,
3074 make_int (i + byte_offset));
3078 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3080 Lisp_Object v = XVECTOR_DATA(table)[i];
3086 for (j = 0; j < XVECTOR_LENGTH (v); j++)
3088 Lisp_Object c = XVECTOR_DATA(v)[j];
3091 put_char_ccs_code_point
3093 make_int ( ( (i + byte_offset) << 8 )
3099 put_char_ccs_code_point (v, charset,
3100 make_int (i + byte_offset));
3109 /************************************************************************/
3110 /* Lisp primitives for working with characters */
3111 /************************************************************************/
3114 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
3115 Make a character from CHARSET and code-point CODE.
3121 charset = Fget_charset (charset);
3124 if (XCHARSET_GRAPHIC (charset) == 1)
3126 return make_char (DECODE_CHAR (charset, c));
3129 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
3130 Make a builtin character from CHARSET and code-point CODE.
3137 charset = Fget_charset (charset);
3141 if ((final = XCHARSET_FINAL (charset)) >= '0')
3143 if (XCHARSET_DIMENSION (charset) == 1)
3145 switch (XCHARSET_CHARS (charset))
3149 make_char (MIN_CHAR_94 + (final - '0') * 94
3150 + ((c & 0x7F) - 33));
3153 make_char (MIN_CHAR_96 + (final - '0') * 96
3154 + ((c & 0x7F) - 32));
3156 return Fdecode_char (charset, code);
3161 switch (XCHARSET_CHARS (charset))
3165 make_char (MIN_CHAR_94x94
3166 + (final - '0') * 94 * 94
3167 + (((c >> 8) & 0x7F) - 33) * 94
3168 + ((c & 0x7F) - 33));
3171 make_char (MIN_CHAR_96x96
3172 + (final - '0') * 96 * 96
3173 + (((c >> 8) & 0x7F) - 32) * 96
3174 + ((c & 0x7F) - 32));
3176 return Fdecode_char (charset, code);
3180 else if (XCHARSET_UCS_MAX (charset))
3183 = (XCHARSET_DIMENSION (charset) == 1
3185 c - XCHARSET_BYTE_OFFSET (charset)
3187 ((c >> 8) - XCHARSET_BYTE_OFFSET (charset))
3188 * XCHARSET_CHARS (charset)
3189 + (c & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
3190 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
3191 if ((cid < XCHARSET_UCS_MIN (charset))
3192 || (XCHARSET_UCS_MAX (charset) < cid))
3193 return Fdecode_char (charset, code);
3194 return make_char (cid);
3197 return Fdecode_char (charset, code);
3201 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
3202 Make a character from CHARSET and octets ARG1 and ARG2.
3203 ARG2 is required only for characters from two-dimensional charsets.
3204 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
3205 character s with caron.
3207 (charset, arg1, arg2))
3211 int lowlim, highlim;
3213 charset = Fget_charset (charset);
3214 cs = XCHARSET (charset);
3216 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
3217 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
3219 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
3221 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
3222 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
3225 /* It is useful (and safe, according to Olivier Galibert) to strip
3226 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
3227 write (make-char 'latin-iso8859-2 CODE) where code is the actual
3228 Latin 2 code of the character. */
3236 if (a1 < lowlim || a1 > highlim)
3237 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
3239 if (CHARSET_DIMENSION (cs) == 1)
3243 ("Charset is of dimension one; second octet must be nil", arg2);
3244 return make_char (MAKE_CHAR (charset, a1, 0));
3253 a2 = XINT (arg2) & 0x7f;
3255 if (a2 < lowlim || a2 > highlim)
3256 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
3258 return make_char (MAKE_CHAR (charset, a1, a2));
3261 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
3262 Return the character set of char CH.
3266 CHECK_CHAR_COERCE_INT (ch);
3268 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
3271 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
3272 Return the octet numbered N (should be 0 or 1) of char CH.
3273 N defaults to 0 if omitted.
3277 Lisp_Object charset;
3280 CHECK_CHAR_COERCE_INT (ch);
3282 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
3284 if (NILP (n) || EQ (n, Qzero))
3285 return make_int (octet0);
3286 else if (EQ (n, make_int (1)))
3287 return make_int (octet1);
3289 signal_simple_error ("Octet number must be 0 or 1", n);
3292 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
3293 Return list of charset and one or two position-codes of CHAR.
3297 /* This function can GC */
3298 struct gcpro gcpro1, gcpro2;
3299 Lisp_Object charset = Qnil;
3300 Lisp_Object rc = Qnil;
3308 GCPRO2 (charset, rc);
3309 CHECK_CHAR_COERCE_INT (character);
3312 code_point = ENCODE_CHAR (XCHAR (character), charset);
3313 dimension = XCHARSET_DIMENSION (charset);
3314 while (dimension > 0)
3316 rc = Fcons (make_int (code_point & 255), rc);
3320 rc = Fcons (XCHARSET_NAME (charset), rc);
3322 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
3324 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
3326 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
3330 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
3339 #ifdef ENABLE_COMPOSITE_CHARS
3340 /************************************************************************/
3341 /* composite character functions */
3342 /************************************************************************/
3345 lookup_composite_char (Bufbyte *str, int len)
3347 Lisp_Object lispstr = make_string (str, len);
3348 Lisp_Object ch = Fgethash (lispstr,
3349 Vcomposite_char_string2char_hash_table,
3355 if (composite_char_row_next >= 128)
3356 signal_simple_error ("No more composite chars available", lispstr);
3357 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
3358 composite_char_col_next);
3359 Fputhash (make_char (emch), lispstr,
3360 Vcomposite_char_char2string_hash_table);
3361 Fputhash (lispstr, make_char (emch),
3362 Vcomposite_char_string2char_hash_table);
3363 composite_char_col_next++;
3364 if (composite_char_col_next >= 128)
3366 composite_char_col_next = 32;
3367 composite_char_row_next++;
3376 composite_char_string (Emchar ch)
3378 Lisp_Object str = Fgethash (make_char (ch),
3379 Vcomposite_char_char2string_hash_table,
3381 assert (!UNBOUNDP (str));
3385 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
3386 Convert a string into a single composite character.
3387 The character is the result of overstriking all the characters in
3392 CHECK_STRING (string);
3393 return make_char (lookup_composite_char (XSTRING_DATA (string),
3394 XSTRING_LENGTH (string)));
3397 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3398 Return a string of the characters comprising a composite character.
3406 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
3407 signal_simple_error ("Must be composite char", ch);
3408 return composite_char_string (emch);
3410 #endif /* ENABLE_COMPOSITE_CHARS */
3413 /************************************************************************/
3414 /* initialization */
3415 /************************************************************************/
3418 syms_of_mule_charset (void)
3421 INIT_LRECORD_IMPLEMENTATION (uint8_byte_table);
3422 INIT_LRECORD_IMPLEMENTATION (uint16_byte_table);
3423 INIT_LRECORD_IMPLEMENTATION (byte_table);
3424 INIT_LRECORD_IMPLEMENTATION (char_id_table);
3426 INIT_LRECORD_IMPLEMENTATION (charset);
3428 DEFSUBR (Fcharsetp);
3429 DEFSUBR (Ffind_charset);
3430 DEFSUBR (Fget_charset);
3431 DEFSUBR (Fcharset_list);
3432 DEFSUBR (Fcharset_name);
3433 DEFSUBR (Fmake_charset);
3434 DEFSUBR (Fmake_reverse_direction_charset);
3435 /* DEFSUBR (Freverse_direction_charset); */
3436 DEFSUBR (Fdefine_charset_alias);
3437 DEFSUBR (Fcharset_from_attributes);
3438 DEFSUBR (Fcharset_short_name);
3439 DEFSUBR (Fcharset_long_name);
3440 DEFSUBR (Fcharset_description);
3441 DEFSUBR (Fcharset_dimension);
3442 DEFSUBR (Fcharset_property);
3443 DEFSUBR (Fcharset_id);
3444 DEFSUBR (Fset_charset_ccl_program);
3445 DEFSUBR (Fset_charset_registry);
3447 DEFSUBR (Fchar_attribute_list);
3448 DEFSUBR (Ffind_char_attribute_table);
3449 DEFSUBR (Fchar_attribute_alist);
3450 DEFSUBR (Fget_char_attribute);
3451 DEFSUBR (Fput_char_attribute);
3452 DEFSUBR (Fremove_char_attribute);
3453 DEFSUBR (Fdefine_char);
3454 DEFSUBR (Fchar_variants);
3455 DEFSUBR (Fget_composite_char);
3456 DEFSUBR (Fcharset_mapping_table);
3457 DEFSUBR (Fset_charset_mapping_table);
3461 DEFSUBR (Fdecode_char);
3462 DEFSUBR (Fdecode_builtin_char);
3464 DEFSUBR (Fmake_char);
3465 DEFSUBR (Fchar_charset);
3466 DEFSUBR (Fchar_octet);
3467 DEFSUBR (Fsplit_char);
3469 #ifdef ENABLE_COMPOSITE_CHARS
3470 DEFSUBR (Fmake_composite_char);
3471 DEFSUBR (Fcomposite_char_string);
3474 defsymbol (&Qcharsetp, "charsetp");
3475 defsymbol (&Qregistry, "registry");
3476 defsymbol (&Qfinal, "final");
3477 defsymbol (&Qgraphic, "graphic");
3478 defsymbol (&Qdirection, "direction");
3479 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3480 defsymbol (&Qshort_name, "short-name");
3481 defsymbol (&Qlong_name, "long-name");
3483 defsymbol (&Ql2r, "l2r");
3484 defsymbol (&Qr2l, "r2l");
3486 /* Charsets, compatible with FSF 20.3
3487 Naming convention is Script-Charset[-Edition] */
3488 defsymbol (&Qascii, "ascii");
3489 defsymbol (&Qcontrol_1, "control-1");
3490 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3491 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3492 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3493 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3494 defsymbol (&Qthai_tis620, "thai-tis620");
3495 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3496 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3497 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3498 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3499 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3500 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3501 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3502 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
3503 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
3504 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
3505 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
3506 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
3507 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
3508 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
3509 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
3511 defsymbol (&Q_ucs, "->ucs");
3512 defsymbol (&Q_decomposition, "->decomposition");
3513 defsymbol (&Qcompat, "compat");
3514 defsymbol (&Qisolated, "isolated");
3515 defsymbol (&Qinitial, "initial");
3516 defsymbol (&Qmedial, "medial");
3517 defsymbol (&Qfinal, "final");
3518 defsymbol (&Qvertical, "vertical");
3519 defsymbol (&QnoBreak, "noBreak");
3520 defsymbol (&Qfraction, "fraction");
3521 defsymbol (&Qsuper, "super");
3522 defsymbol (&Qsub, "sub");
3523 defsymbol (&Qcircle, "circle");
3524 defsymbol (&Qsquare, "square");
3525 defsymbol (&Qwide, "wide");
3526 defsymbol (&Qnarrow, "narrow");
3527 defsymbol (&Qsmall, "small");
3528 defsymbol (&Qfont, "font");
3529 defsymbol (&Qucs, "ucs");
3530 defsymbol (&Qucs_bmp, "ucs-bmp");
3531 defsymbol (&Qlatin_viscii, "latin-viscii");
3532 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3533 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3534 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3535 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3536 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3537 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
3538 defsymbol (&Qmojikyo, "mojikyo");
3539 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
3540 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
3541 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
3542 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
3543 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
3544 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
3545 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
3546 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
3547 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
3548 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
3549 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
3550 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
3551 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
3552 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
3553 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
3554 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
3555 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
3556 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
3557 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
3558 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
3559 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
3560 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
3561 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3563 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3564 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3566 defsymbol (&Qcomposite, "composite");
3570 vars_of_mule_charset (void)
3577 chlook = xnew (struct charset_lookup);
3578 dumpstruct (&chlook, &charset_lookup_description);
3580 /* Table of charsets indexed by leading byte. */
3581 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3582 chlook->charset_by_leading_byte[i] = Qnil;
3585 /* Table of charsets indexed by type/final-byte. */
3586 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3587 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3588 chlook->charset_by_attributes[i][j] = Qnil;
3590 /* Table of charsets indexed by type/final-byte/direction. */
3591 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3592 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3593 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3594 chlook->charset_by_attributes[i][j][k] = Qnil;
3598 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3600 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3601 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3605 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3606 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3607 Leading-code of private TYPE9N charset of column-width 1.
3609 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3613 Vutf_2000_version = build_string("0.16 (ÅŒji)");
3614 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
3615 Version number of UTF-2000.
3618 staticpro (&Vcharacter_composition_table);
3619 Vcharacter_composition_table = make_char_id_table (Qnil);
3621 staticpro (&Vcharacter_variant_table);
3622 Vcharacter_variant_table = make_char_id_table (Qnil);
3624 Vdefault_coded_charset_priority_list = Qnil;
3625 DEFVAR_LISP ("default-coded-charset-priority-list",
3626 &Vdefault_coded_charset_priority_list /*
3627 Default order of preferred coded-character-sets.
3633 complex_vars_of_mule_charset (void)
3635 staticpro (&Vcharset_hash_table);
3636 Vcharset_hash_table =
3637 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3639 /* Predefined character sets. We store them into variables for
3643 staticpro (&Vchar_attribute_hash_table);
3644 Vchar_attribute_hash_table
3645 = make_lisp_hash_table (16, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3647 staticpro (&Vcharset_ucs);
3649 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
3650 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3651 build_string ("UCS"),
3652 build_string ("UCS"),
3653 build_string ("ISO/IEC 10646"),
3655 Qnil, 0, 0xFFFFFFF, 0, 0);
3656 staticpro (&Vcharset_ucs_bmp);
3658 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3659 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3660 build_string ("BMP"),
3661 build_string ("BMP"),
3662 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3663 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
3664 Qnil, 0, 0xFFFF, 0, 0);
3666 # define MIN_CHAR_THAI 0
3667 # define MAX_CHAR_THAI 0
3668 # define MIN_CHAR_HEBREW 0
3669 # define MAX_CHAR_HEBREW 0
3670 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3671 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3673 staticpro (&Vcharset_ascii);
3675 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3676 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3677 build_string ("ASCII"),
3678 build_string ("ASCII)"),
3679 build_string ("ASCII (ISO646 IRV)"),
3680 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3681 Qnil, 0, 0x7F, 0, 0);
3682 staticpro (&Vcharset_control_1);
3683 Vcharset_control_1 =
3684 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3685 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3686 build_string ("C1"),
3687 build_string ("Control characters"),
3688 build_string ("Control characters 128-191"),
3690 Qnil, 0x80, 0x9F, 0, 0);
3691 staticpro (&Vcharset_latin_iso8859_1);
3692 Vcharset_latin_iso8859_1 =
3693 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3694 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3695 build_string ("Latin-1"),
3696 build_string ("ISO8859-1 (Latin-1)"),
3697 build_string ("ISO8859-1 (Latin-1)"),
3698 build_string ("iso8859-1"),
3699 Qnil, 0xA0, 0xFF, 0, 32);
3700 staticpro (&Vcharset_latin_iso8859_2);
3701 Vcharset_latin_iso8859_2 =
3702 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3703 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3704 build_string ("Latin-2"),
3705 build_string ("ISO8859-2 (Latin-2)"),
3706 build_string ("ISO8859-2 (Latin-2)"),
3707 build_string ("iso8859-2"),
3709 staticpro (&Vcharset_latin_iso8859_3);
3710 Vcharset_latin_iso8859_3 =
3711 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3712 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3713 build_string ("Latin-3"),
3714 build_string ("ISO8859-3 (Latin-3)"),
3715 build_string ("ISO8859-3 (Latin-3)"),
3716 build_string ("iso8859-3"),
3718 staticpro (&Vcharset_latin_iso8859_4);
3719 Vcharset_latin_iso8859_4 =
3720 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3721 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3722 build_string ("Latin-4"),
3723 build_string ("ISO8859-4 (Latin-4)"),
3724 build_string ("ISO8859-4 (Latin-4)"),
3725 build_string ("iso8859-4"),
3727 staticpro (&Vcharset_thai_tis620);
3728 Vcharset_thai_tis620 =
3729 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3730 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3731 build_string ("TIS620"),
3732 build_string ("TIS620 (Thai)"),
3733 build_string ("TIS620.2529 (Thai)"),
3734 build_string ("tis620"),
3735 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3736 staticpro (&Vcharset_greek_iso8859_7);
3737 Vcharset_greek_iso8859_7 =
3738 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3739 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3740 build_string ("ISO8859-7"),
3741 build_string ("ISO8859-7 (Greek)"),
3742 build_string ("ISO8859-7 (Greek)"),
3743 build_string ("iso8859-7"),
3745 0 /* MIN_CHAR_GREEK */,
3746 0 /* MAX_CHAR_GREEK */, 0, 32);
3747 staticpro (&Vcharset_arabic_iso8859_6);
3748 Vcharset_arabic_iso8859_6 =
3749 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3750 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3751 build_string ("ISO8859-6"),
3752 build_string ("ISO8859-6 (Arabic)"),
3753 build_string ("ISO8859-6 (Arabic)"),
3754 build_string ("iso8859-6"),
3756 staticpro (&Vcharset_hebrew_iso8859_8);
3757 Vcharset_hebrew_iso8859_8 =
3758 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3759 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3760 build_string ("ISO8859-8"),
3761 build_string ("ISO8859-8 (Hebrew)"),
3762 build_string ("ISO8859-8 (Hebrew)"),
3763 build_string ("iso8859-8"),
3764 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3765 staticpro (&Vcharset_katakana_jisx0201);
3766 Vcharset_katakana_jisx0201 =
3767 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3768 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3769 build_string ("JISX0201 Kana"),
3770 build_string ("JISX0201.1976 (Japanese Kana)"),
3771 build_string ("JISX0201.1976 Japanese Kana"),
3772 build_string ("jisx0201\\.1976"),
3774 staticpro (&Vcharset_latin_jisx0201);
3775 Vcharset_latin_jisx0201 =
3776 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3777 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3778 build_string ("JISX0201 Roman"),
3779 build_string ("JISX0201.1976 (Japanese Roman)"),
3780 build_string ("JISX0201.1976 Japanese Roman"),
3781 build_string ("jisx0201\\.1976"),
3783 staticpro (&Vcharset_cyrillic_iso8859_5);
3784 Vcharset_cyrillic_iso8859_5 =
3785 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3786 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3787 build_string ("ISO8859-5"),
3788 build_string ("ISO8859-5 (Cyrillic)"),
3789 build_string ("ISO8859-5 (Cyrillic)"),
3790 build_string ("iso8859-5"),
3792 0 /* MIN_CHAR_CYRILLIC */,
3793 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3794 staticpro (&Vcharset_latin_iso8859_9);
3795 Vcharset_latin_iso8859_9 =
3796 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3797 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3798 build_string ("Latin-5"),
3799 build_string ("ISO8859-9 (Latin-5)"),
3800 build_string ("ISO8859-9 (Latin-5)"),
3801 build_string ("iso8859-9"),
3803 staticpro (&Vcharset_japanese_jisx0208_1978);
3804 Vcharset_japanese_jisx0208_1978 =
3805 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3806 Qjapanese_jisx0208_1978, 94, 2,
3807 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3808 build_string ("JIS X0208:1978"),
3809 build_string ("JIS X0208:1978 (Japanese)"),
3811 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3812 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3814 staticpro (&Vcharset_chinese_gb2312);
3815 Vcharset_chinese_gb2312 =
3816 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3817 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3818 build_string ("GB2312"),
3819 build_string ("GB2312)"),
3820 build_string ("GB2312 Chinese simplified"),
3821 build_string ("gb2312"),
3823 staticpro (&Vcharset_japanese_jisx0208);
3824 Vcharset_japanese_jisx0208 =
3825 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3826 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3827 build_string ("JISX0208"),
3828 build_string ("JIS X0208:1983 (Japanese)"),
3829 build_string ("JIS X0208:1983 Japanese Kanji"),
3830 build_string ("jisx0208\\.1983"),
3833 staticpro (&Vcharset_japanese_jisx0208_1990);
3834 Vcharset_japanese_jisx0208_1990 =
3835 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3836 Qjapanese_jisx0208_1990, 94, 2,
3837 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3838 build_string ("JISX0208-1990"),
3839 build_string ("JIS X0208:1990 (Japanese)"),
3840 build_string ("JIS X0208:1990 Japanese Kanji"),
3841 build_string ("jisx0208\\.1990"),
3843 MIN_CHAR_JIS_X0208_1990,
3844 MAX_CHAR_JIS_X0208_1990, 0, 33);
3846 staticpro (&Vcharset_korean_ksc5601);
3847 Vcharset_korean_ksc5601 =
3848 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3849 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3850 build_string ("KSC5601"),
3851 build_string ("KSC5601 (Korean"),
3852 build_string ("KSC5601 Korean Hangul and Hanja"),
3853 build_string ("ksc5601"),
3855 staticpro (&Vcharset_japanese_jisx0212);
3856 Vcharset_japanese_jisx0212 =
3857 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3858 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3859 build_string ("JISX0212"),
3860 build_string ("JISX0212 (Japanese)"),
3861 build_string ("JISX0212 Japanese Supplement"),
3862 build_string ("jisx0212"),
3865 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3866 staticpro (&Vcharset_chinese_cns11643_1);
3867 Vcharset_chinese_cns11643_1 =
3868 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3869 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3870 build_string ("CNS11643-1"),
3871 build_string ("CNS11643-1 (Chinese traditional)"),
3873 ("CNS 11643 Plane 1 Chinese traditional"),
3874 build_string (CHINESE_CNS_PLANE_RE("1")),
3876 staticpro (&Vcharset_chinese_cns11643_2);
3877 Vcharset_chinese_cns11643_2 =
3878 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3879 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3880 build_string ("CNS11643-2"),
3881 build_string ("CNS11643-2 (Chinese traditional)"),
3883 ("CNS 11643 Plane 2 Chinese traditional"),
3884 build_string (CHINESE_CNS_PLANE_RE("2")),
3887 staticpro (&Vcharset_latin_tcvn5712);
3888 Vcharset_latin_tcvn5712 =
3889 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3890 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3891 build_string ("TCVN 5712"),
3892 build_string ("TCVN 5712 (VSCII-2)"),
3893 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3894 build_string ("tcvn5712-1"),
3896 staticpro (&Vcharset_latin_viscii_lower);
3897 Vcharset_latin_viscii_lower =
3898 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3899 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3900 build_string ("VISCII lower"),
3901 build_string ("VISCII lower (Vietnamese)"),
3902 build_string ("VISCII lower (Vietnamese)"),
3903 build_string ("MULEVISCII-LOWER"),
3905 staticpro (&Vcharset_latin_viscii_upper);
3906 Vcharset_latin_viscii_upper =
3907 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3908 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3909 build_string ("VISCII upper"),
3910 build_string ("VISCII upper (Vietnamese)"),
3911 build_string ("VISCII upper (Vietnamese)"),
3912 build_string ("MULEVISCII-UPPER"),
3914 staticpro (&Vcharset_latin_viscii);
3915 Vcharset_latin_viscii =
3916 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3917 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3918 build_string ("VISCII"),
3919 build_string ("VISCII 1.1 (Vietnamese)"),
3920 build_string ("VISCII 1.1 (Vietnamese)"),
3921 build_string ("VISCII1\\.1"),
3923 staticpro (&Vcharset_ideograph_daikanwa);
3924 Vcharset_ideograph_daikanwa =
3925 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
3926 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3927 build_string ("Daikanwa"),
3928 build_string ("Morohashi's Daikanwa"),
3929 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3930 build_string ("Daikanwa"),
3931 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3932 staticpro (&Vcharset_mojikyo);
3934 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
3935 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3936 build_string ("Mojikyo"),
3937 build_string ("Mojikyo"),
3938 build_string ("Konjaku-Mojikyo"),
3940 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
3941 staticpro (&Vcharset_mojikyo_2022_1);
3942 Vcharset_mojikyo_2022_1 =
3943 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
3944 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
3945 build_string ("Mojikyo-2022-1"),
3946 build_string ("Mojikyo ISO-2022 Part 1"),
3947 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
3950 staticpro (&Vcharset_mojikyo_pj_1);
3951 Vcharset_mojikyo_pj_1 =
3952 make_charset (LEADING_BYTE_MOJIKYO_PJ_1, Qmojikyo_pj_1, 94, 2,
3953 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3954 build_string ("Mojikyo-PJ-1"),
3955 build_string ("Mojikyo (pseudo JIS encoding) part 1"),
3957 ("Konjaku-Mojikyo (pseudo JIS encoding) part 1"),
3958 build_string ("jisx0208\\.Mojikyo-1$"),
3960 staticpro (&Vcharset_mojikyo_pj_2);
3961 Vcharset_mojikyo_pj_2 =
3962 make_charset (LEADING_BYTE_MOJIKYO_PJ_2, Qmojikyo_pj_2, 94, 2,
3963 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3964 build_string ("Mojikyo-PJ-2"),
3965 build_string ("Mojikyo (pseudo JIS encoding) part 2"),
3967 ("Konjaku-Mojikyo (pseudo JIS encoding) part 2"),
3968 build_string ("jisx0208\\.Mojikyo-2$"),
3970 staticpro (&Vcharset_mojikyo_pj_3);
3971 Vcharset_mojikyo_pj_3 =
3972 make_charset (LEADING_BYTE_MOJIKYO_PJ_3, Qmojikyo_pj_3, 94, 2,
3973 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3974 build_string ("Mojikyo-PJ-3"),
3975 build_string ("Mojikyo (pseudo JIS encoding) part 3"),
3977 ("Konjaku-Mojikyo (pseudo JIS encoding) part 3"),
3978 build_string ("jisx0208\\.Mojikyo-3$"),
3980 staticpro (&Vcharset_mojikyo_pj_4);
3981 Vcharset_mojikyo_pj_4 =
3982 make_charset (LEADING_BYTE_MOJIKYO_PJ_4, Qmojikyo_pj_4, 94, 2,
3983 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3984 build_string ("Mojikyo-PJ-4"),
3985 build_string ("Mojikyo (pseudo JIS encoding) part 4"),
3987 ("Konjaku-Mojikyo (pseudo JIS encoding) part 4"),
3988 build_string ("jisx0208\\.Mojikyo-4$"),
3990 staticpro (&Vcharset_mojikyo_pj_5);
3991 Vcharset_mojikyo_pj_5 =
3992 make_charset (LEADING_BYTE_MOJIKYO_PJ_5, Qmojikyo_pj_5, 94, 2,
3993 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3994 build_string ("Mojikyo-PJ-5"),
3995 build_string ("Mojikyo (pseudo JIS encoding) part 5"),
3997 ("Konjaku-Mojikyo (pseudo JIS encoding) part 5"),
3998 build_string ("jisx0208\\.Mojikyo-5$"),
4000 staticpro (&Vcharset_mojikyo_pj_6);
4001 Vcharset_mojikyo_pj_6 =
4002 make_charset (LEADING_BYTE_MOJIKYO_PJ_6, Qmojikyo_pj_6, 94, 2,
4003 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4004 build_string ("Mojikyo-PJ-6"),
4005 build_string ("Mojikyo (pseudo JIS encoding) part 6"),
4007 ("Konjaku-Mojikyo (pseudo JIS encoding) part 6"),
4008 build_string ("jisx0208\\.Mojikyo-6$"),
4010 staticpro (&Vcharset_mojikyo_pj_7);
4011 Vcharset_mojikyo_pj_7 =
4012 make_charset (LEADING_BYTE_MOJIKYO_PJ_7, Qmojikyo_pj_7, 94, 2,
4013 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4014 build_string ("Mojikyo-PJ-7"),
4015 build_string ("Mojikyo (pseudo JIS encoding) part 7"),
4017 ("Konjaku-Mojikyo (pseudo JIS encoding) part 7"),
4018 build_string ("jisx0208\\.Mojikyo-7$"),
4020 staticpro (&Vcharset_mojikyo_pj_8);
4021 Vcharset_mojikyo_pj_8 =
4022 make_charset (LEADING_BYTE_MOJIKYO_PJ_8, Qmojikyo_pj_8, 94, 2,
4023 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4024 build_string ("Mojikyo-PJ-8"),
4025 build_string ("Mojikyo (pseudo JIS encoding) part 8"),
4027 ("Konjaku-Mojikyo (pseudo JIS encoding) part 8"),
4028 build_string ("jisx0208\\.Mojikyo-8$"),
4030 staticpro (&Vcharset_mojikyo_pj_9);
4031 Vcharset_mojikyo_pj_9 =
4032 make_charset (LEADING_BYTE_MOJIKYO_PJ_9, Qmojikyo_pj_9, 94, 2,
4033 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4034 build_string ("Mojikyo-PJ-9"),
4035 build_string ("Mojikyo (pseudo JIS encoding) part 9"),
4037 ("Konjaku-Mojikyo (pseudo JIS encoding) part 9"),
4038 build_string ("jisx0208\\.Mojikyo-9$"),
4040 staticpro (&Vcharset_mojikyo_pj_10);
4041 Vcharset_mojikyo_pj_10 =
4042 make_charset (LEADING_BYTE_MOJIKYO_PJ_10, Qmojikyo_pj_10, 94, 2,
4043 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4044 build_string ("Mojikyo-PJ-10"),
4045 build_string ("Mojikyo (pseudo JIS encoding) part 10"),
4047 ("Konjaku-Mojikyo (pseudo JIS encoding) part 10"),
4048 build_string ("jisx0208\\.Mojikyo-10$"),
4050 staticpro (&Vcharset_mojikyo_pj_11);
4051 Vcharset_mojikyo_pj_11 =
4052 make_charset (LEADING_BYTE_MOJIKYO_PJ_11, Qmojikyo_pj_11, 94, 2,
4053 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4054 build_string ("Mojikyo-PJ-11"),
4055 build_string ("Mojikyo (pseudo JIS encoding) part 11"),
4057 ("Konjaku-Mojikyo (pseudo JIS encoding) part 11"),
4058 build_string ("jisx0208\\.Mojikyo-11$"),
4060 staticpro (&Vcharset_mojikyo_pj_12);
4061 Vcharset_mojikyo_pj_12 =
4062 make_charset (LEADING_BYTE_MOJIKYO_PJ_12, Qmojikyo_pj_12, 94, 2,
4063 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4064 build_string ("Mojikyo-PJ-12"),
4065 build_string ("Mojikyo (pseudo JIS encoding) part 12"),
4067 ("Konjaku-Mojikyo (pseudo JIS encoding) part 12"),
4068 build_string ("jisx0208\\.Mojikyo-12$"),
4070 staticpro (&Vcharset_mojikyo_pj_13);
4071 Vcharset_mojikyo_pj_13 =
4072 make_charset (LEADING_BYTE_MOJIKYO_PJ_13, Qmojikyo_pj_13, 94, 2,
4073 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4074 build_string ("Mojikyo-PJ-13"),
4075 build_string ("Mojikyo (pseudo JIS encoding) part 13"),
4077 ("Konjaku-Mojikyo (pseudo JIS encoding) part 13"),
4078 build_string ("jisx0208\\.Mojikyo-13$"),
4080 staticpro (&Vcharset_mojikyo_pj_14);
4081 Vcharset_mojikyo_pj_14 =
4082 make_charset (LEADING_BYTE_MOJIKYO_PJ_14, Qmojikyo_pj_14, 94, 2,
4083 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4084 build_string ("Mojikyo-PJ-14"),
4085 build_string ("Mojikyo (pseudo JIS encoding) part 14"),
4087 ("Konjaku-Mojikyo (pseudo JIS encoding) part 14"),
4088 build_string ("jisx0208\\.Mojikyo-14$"),
4090 staticpro (&Vcharset_mojikyo_pj_15);
4091 Vcharset_mojikyo_pj_15 =
4092 make_charset (LEADING_BYTE_MOJIKYO_PJ_15, Qmojikyo_pj_15, 94, 2,
4093 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4094 build_string ("Mojikyo-PJ-15"),
4095 build_string ("Mojikyo (pseudo JIS encoding) part 15"),
4097 ("Konjaku-Mojikyo (pseudo JIS encoding) part 15"),
4098 build_string ("jisx0208\\.Mojikyo-15$"),
4100 staticpro (&Vcharset_mojikyo_pj_16);
4101 Vcharset_mojikyo_pj_16 =
4102 make_charset (LEADING_BYTE_MOJIKYO_PJ_16, Qmojikyo_pj_16, 94, 2,
4103 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4104 build_string ("Mojikyo-PJ-16"),
4105 build_string ("Mojikyo (pseudo JIS encoding) part 16"),
4107 ("Konjaku-Mojikyo (pseudo JIS encoding) part 16"),
4108 build_string ("jisx0208\\.Mojikyo-16$"),
4110 staticpro (&Vcharset_mojikyo_pj_17);
4111 Vcharset_mojikyo_pj_17 =
4112 make_charset (LEADING_BYTE_MOJIKYO_PJ_17, Qmojikyo_pj_17, 94, 2,
4113 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4114 build_string ("Mojikyo-PJ-17"),
4115 build_string ("Mojikyo (pseudo JIS encoding) part 17"),
4117 ("Konjaku-Mojikyo (pseudo JIS encoding) part 17"),
4118 build_string ("jisx0208\\.Mojikyo-17$"),
4120 staticpro (&Vcharset_mojikyo_pj_18);
4121 Vcharset_mojikyo_pj_18 =
4122 make_charset (LEADING_BYTE_MOJIKYO_PJ_18, Qmojikyo_pj_18, 94, 2,
4123 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4124 build_string ("Mojikyo-PJ-18"),
4125 build_string ("Mojikyo (pseudo JIS encoding) part 18"),
4127 ("Konjaku-Mojikyo (pseudo JIS encoding) part 18"),
4128 build_string ("jisx0208\\.Mojikyo-18$"),
4130 staticpro (&Vcharset_mojikyo_pj_19);
4131 Vcharset_mojikyo_pj_19 =
4132 make_charset (LEADING_BYTE_MOJIKYO_PJ_19, Qmojikyo_pj_19, 94, 2,
4133 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4134 build_string ("Mojikyo-PJ-19"),
4135 build_string ("Mojikyo (pseudo JIS encoding) part 19"),
4137 ("Konjaku-Mojikyo (pseudo JIS encoding) part 19"),
4138 build_string ("jisx0208\\.Mojikyo-19$"),
4140 staticpro (&Vcharset_mojikyo_pj_20);
4141 Vcharset_mojikyo_pj_20 =
4142 make_charset (LEADING_BYTE_MOJIKYO_PJ_20, Qmojikyo_pj_20, 94, 2,
4143 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4144 build_string ("Mojikyo-PJ-20"),
4145 build_string ("Mojikyo (pseudo JIS encoding) part 20"),
4147 ("Konjaku-Mojikyo (pseudo JIS encoding) part 20"),
4148 build_string ("jisx0208\\.Mojikyo-20$"),
4150 staticpro (&Vcharset_mojikyo_pj_21);
4151 Vcharset_mojikyo_pj_21 =
4152 make_charset (LEADING_BYTE_MOJIKYO_PJ_21, Qmojikyo_pj_21, 94, 2,
4153 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4154 build_string ("Mojikyo-PJ-21"),
4155 build_string ("Mojikyo (pseudo JIS encoding) part 21"),
4157 ("Konjaku-Mojikyo (pseudo JIS encoding) part 21"),
4158 build_string ("jisx0208\\.Mojikyo-21$"),
4160 staticpro (&Vcharset_ethiopic_ucs);
4161 Vcharset_ethiopic_ucs =
4162 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
4163 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4164 build_string ("Ethiopic (UCS)"),
4165 build_string ("Ethiopic (UCS)"),
4166 build_string ("Ethiopic of UCS"),
4167 build_string ("Ethiopic-Unicode"),
4168 Qnil, 0x1200, 0x137F, 0x1200, 0);
4170 staticpro (&Vcharset_chinese_big5_1);
4171 Vcharset_chinese_big5_1 =
4172 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
4173 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
4174 build_string ("Big5"),
4175 build_string ("Big5 (Level-1)"),
4177 ("Big5 Level-1 Chinese traditional"),
4178 build_string ("big5"),
4180 staticpro (&Vcharset_chinese_big5_2);
4181 Vcharset_chinese_big5_2 =
4182 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
4183 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
4184 build_string ("Big5"),
4185 build_string ("Big5 (Level-2)"),
4187 ("Big5 Level-2 Chinese traditional"),
4188 build_string ("big5"),
4191 #ifdef ENABLE_COMPOSITE_CHARS
4192 /* #### For simplicity, we put composite chars into a 96x96 charset.
4193 This is going to lead to problems because you can run out of
4194 room, esp. as we don't yet recycle numbers. */
4195 staticpro (&Vcharset_composite);
4196 Vcharset_composite =
4197 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
4198 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4199 build_string ("Composite"),
4200 build_string ("Composite characters"),
4201 build_string ("Composite characters"),
4204 /* #### not dumped properly */
4205 composite_char_row_next = 32;
4206 composite_char_col_next = 32;
4208 Vcomposite_char_string2char_hash_table =
4209 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
4210 Vcomposite_char_char2string_hash_table =
4211 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
4212 staticpro (&Vcomposite_char_string2char_hash_table);
4213 staticpro (&Vcomposite_char_char2string_hash_table);
4214 #endif /* ENABLE_COMPOSITE_CHARS */