1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 1999,2000 MORIOKA Tomohiko
6 This file is part of XEmacs.
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
23 /* Synched up with: FSF 20.3. Not in FSF. */
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
41 /* The various pre-defined charsets. */
43 Lisp_Object Vcharset_ascii;
44 Lisp_Object Vcharset_control_1;
45 Lisp_Object Vcharset_latin_iso8859_1;
46 Lisp_Object Vcharset_latin_iso8859_2;
47 Lisp_Object Vcharset_latin_iso8859_3;
48 Lisp_Object Vcharset_latin_iso8859_4;
49 Lisp_Object Vcharset_thai_tis620;
50 Lisp_Object Vcharset_greek_iso8859_7;
51 Lisp_Object Vcharset_arabic_iso8859_6;
52 Lisp_Object Vcharset_hebrew_iso8859_8;
53 Lisp_Object Vcharset_katakana_jisx0201;
54 Lisp_Object Vcharset_latin_jisx0201;
55 Lisp_Object Vcharset_cyrillic_iso8859_5;
56 Lisp_Object Vcharset_latin_iso8859_9;
57 Lisp_Object Vcharset_japanese_jisx0208_1978;
58 Lisp_Object Vcharset_chinese_gb2312;
59 Lisp_Object Vcharset_japanese_jisx0208;
60 Lisp_Object Vcharset_japanese_jisx0208_1990;
61 Lisp_Object Vcharset_korean_ksc5601;
62 Lisp_Object Vcharset_japanese_jisx0212;
63 Lisp_Object Vcharset_chinese_cns11643_1;
64 Lisp_Object Vcharset_chinese_cns11643_2;
66 Lisp_Object Vcharset_ucs;
67 Lisp_Object Vcharset_ucs_bmp;
68 Lisp_Object Vcharset_latin_viscii;
69 Lisp_Object Vcharset_latin_tcvn5712;
70 Lisp_Object Vcharset_latin_viscii_lower;
71 Lisp_Object Vcharset_latin_viscii_upper;
72 Lisp_Object Vcharset_ideograph_daikanwa;
73 Lisp_Object Vcharset_mojikyo;
74 Lisp_Object Vcharset_mojikyo_2022_1;
75 Lisp_Object Vcharset_mojikyo_pj_1;
76 Lisp_Object Vcharset_mojikyo_pj_2;
77 Lisp_Object Vcharset_mojikyo_pj_3;
78 Lisp_Object Vcharset_mojikyo_pj_4;
79 Lisp_Object Vcharset_mojikyo_pj_5;
80 Lisp_Object Vcharset_mojikyo_pj_6;
81 Lisp_Object Vcharset_mojikyo_pj_7;
82 Lisp_Object Vcharset_mojikyo_pj_8;
83 Lisp_Object Vcharset_mojikyo_pj_9;
84 Lisp_Object Vcharset_mojikyo_pj_10;
85 Lisp_Object Vcharset_mojikyo_pj_11;
86 Lisp_Object Vcharset_mojikyo_pj_12;
87 Lisp_Object Vcharset_mojikyo_pj_13;
88 Lisp_Object Vcharset_mojikyo_pj_14;
89 Lisp_Object Vcharset_mojikyo_pj_15;
90 Lisp_Object Vcharset_mojikyo_pj_16;
91 Lisp_Object Vcharset_mojikyo_pj_17;
92 Lisp_Object Vcharset_mojikyo_pj_18;
93 Lisp_Object Vcharset_mojikyo_pj_19;
94 Lisp_Object Vcharset_mojikyo_pj_20;
95 Lisp_Object Vcharset_mojikyo_pj_21;
96 Lisp_Object Vcharset_ethiopic_ucs;
98 Lisp_Object Vcharset_chinese_big5_1;
99 Lisp_Object Vcharset_chinese_big5_2;
101 #ifdef ENABLE_COMPOSITE_CHARS
102 Lisp_Object Vcharset_composite;
104 /* Hash tables for composite chars. One maps string representing
105 composed chars to their equivalent chars; one goes the
107 Lisp_Object Vcomposite_char_char2string_hash_table;
108 Lisp_Object Vcomposite_char_string2char_hash_table;
110 static int composite_char_row_next;
111 static int composite_char_col_next;
113 #endif /* ENABLE_COMPOSITE_CHARS */
115 struct charset_lookup *chlook;
117 static const struct lrecord_description charset_lookup_description_1[] = {
118 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte),
127 static const struct struct_description charset_lookup_description = {
128 sizeof (struct charset_lookup),
129 charset_lookup_description_1
133 /* Table of number of bytes in the string representation of a character
134 indexed by the first byte of that representation.
136 rep_bytes_by_first_byte(c) is more efficient than the equivalent
137 canonical computation:
139 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
141 const Bytecount rep_bytes_by_first_byte[0xA0] =
142 { /* 0x00 - 0x7f are for straight ASCII */
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
147 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
148 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
149 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
150 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
151 /* 0x80 - 0x8f are for Dimension-1 official charsets */
153 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
155 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
157 /* 0x90 - 0x9d are for Dimension-2 official charsets */
158 /* 0x9e is for Dimension-1 private charsets */
159 /* 0x9f is for Dimension-2 private charsets */
160 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
166 #define BT_UINT8_MIN 0
167 #define BT_UINT8_MAX (UCHAR_MAX - 3)
168 #define BT_UINT8_t (UCHAR_MAX - 2)
169 #define BT_UINT8_nil (UCHAR_MAX - 1)
170 #define BT_UINT8_unbound UCHAR_MAX
172 INLINE_HEADER int INT_UINT8_P (Lisp_Object obj);
173 INLINE_HEADER int UINT8_VALUE_P (Lisp_Object obj);
174 INLINE_HEADER unsigned char UINT8_ENCODE (Lisp_Object obj);
175 INLINE_HEADER Lisp_Object UINT8_DECODE (unsigned char n);
176 INLINE_HEADER unsigned short UINT8_TO_UINT16 (unsigned char n);
179 INT_UINT8_P (Lisp_Object obj)
183 int num = XINT (obj);
185 return (BT_UINT8_MIN <= num) && (num <= BT_UINT8_MAX);
192 UINT8_VALUE_P (Lisp_Object obj)
194 return EQ (obj, Qunbound)
195 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT8_P (obj);
198 INLINE_HEADER unsigned char
199 UINT8_ENCODE (Lisp_Object obj)
201 if (EQ (obj, Qunbound))
202 return BT_UINT8_unbound;
203 else if (EQ (obj, Qnil))
205 else if (EQ (obj, Qt))
211 INLINE_HEADER Lisp_Object
212 UINT8_DECODE (unsigned char n)
214 if (n == BT_UINT8_unbound)
216 else if (n == BT_UINT8_nil)
218 else if (n == BT_UINT8_t)
225 mark_uint8_byte_table (Lisp_Object obj)
231 print_uint8_byte_table (Lisp_Object obj,
232 Lisp_Object printcharfun, int escapeflag)
234 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
236 struct gcpro gcpro1, gcpro2;
237 GCPRO2 (obj, printcharfun);
239 write_c_string ("\n#<uint8-byte-table", printcharfun);
240 for (i = 0; i < 256; i++)
242 unsigned char n = bte->property[i];
244 write_c_string ("\n ", printcharfun);
245 write_c_string (" ", printcharfun);
246 if (n == BT_UINT8_unbound)
247 write_c_string ("void", printcharfun);
248 else if (n == BT_UINT8_nil)
249 write_c_string ("nil", printcharfun);
250 else if (n == BT_UINT8_t)
251 write_c_string ("t", printcharfun);
256 sprintf (buf, "%hd", n);
257 write_c_string (buf, printcharfun);
261 write_c_string (">", printcharfun);
265 uint8_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
267 Lisp_Uint8_Byte_Table *te1 = XUINT8_BYTE_TABLE (obj1);
268 Lisp_Uint8_Byte_Table *te2 = XUINT8_BYTE_TABLE (obj2);
271 for (i = 0; i < 256; i++)
272 if (te1->property[i] != te2->property[i])
278 uint8_byte_table_hash (Lisp_Object obj, int depth)
280 Lisp_Uint8_Byte_Table *te = XUINT8_BYTE_TABLE (obj);
284 for (i = 0; i < 256; i++)
285 hash = HASH2 (hash, te->property[i]);
289 DEFINE_LRECORD_IMPLEMENTATION ("uint8-byte-table", uint8_byte_table,
290 mark_uint8_byte_table,
291 print_uint8_byte_table,
292 0, uint8_byte_table_equal,
293 uint8_byte_table_hash,
294 0 /* uint8_byte_table_description */,
295 Lisp_Uint8_Byte_Table);
298 make_uint8_byte_table (unsigned char initval)
302 Lisp_Uint8_Byte_Table *cte;
304 cte = alloc_lcrecord_type (Lisp_Uint8_Byte_Table,
305 &lrecord_uint8_byte_table);
307 for (i = 0; i < 256; i++)
308 cte->property[i] = initval;
310 XSETUINT8_BYTE_TABLE (obj, cte);
315 uint8_byte_table_same_value_p (Lisp_Object obj)
317 Lisp_Uint8_Byte_Table *bte = XUINT8_BYTE_TABLE (obj);
318 unsigned char v0 = bte->property[0];
321 for (i = 1; i < 256; i++)
323 if (bte->property[i] != v0)
330 #define BT_UINT16_MIN 0
331 #define BT_UINT16_MAX (USHRT_MAX - 3)
332 #define BT_UINT16_t (USHRT_MAX - 2)
333 #define BT_UINT16_nil (USHRT_MAX - 1)
334 #define BT_UINT16_unbound USHRT_MAX
336 INLINE_HEADER int INT_UINT16_P (Lisp_Object obj);
337 INLINE_HEADER int UINT16_VALUE_P (Lisp_Object obj);
338 INLINE_HEADER unsigned short UINT16_ENCODE (Lisp_Object obj);
339 INLINE_HEADER Lisp_Object UINT16_DECODE (unsigned short us);
342 INT_UINT16_P (Lisp_Object obj)
346 int num = XINT (obj);
348 return (BT_UINT16_MIN <= num) && (num <= BT_UINT16_MAX);
355 UINT16_VALUE_P (Lisp_Object obj)
357 return EQ (obj, Qunbound)
358 || EQ (obj, Qnil) || EQ (obj, Qt) || INT_UINT16_P (obj);
361 INLINE_HEADER unsigned short
362 UINT16_ENCODE (Lisp_Object obj)
364 if (EQ (obj, Qunbound))
365 return BT_UINT16_unbound;
366 else if (EQ (obj, Qnil))
367 return BT_UINT16_nil;
368 else if (EQ (obj, Qt))
374 INLINE_HEADER Lisp_Object
375 UINT16_DECODE (unsigned short n)
377 if (n == BT_UINT16_unbound)
379 else if (n == BT_UINT16_nil)
381 else if (n == BT_UINT16_t)
387 INLINE_HEADER unsigned short
388 UINT8_TO_UINT16 (unsigned char n)
390 if (n == BT_UINT8_unbound)
391 return BT_UINT16_unbound;
392 else if (n == BT_UINT8_nil)
393 return BT_UINT16_nil;
394 else if (n == BT_UINT8_t)
401 mark_uint16_byte_table (Lisp_Object obj)
407 print_uint16_byte_table (Lisp_Object obj,
408 Lisp_Object printcharfun, int escapeflag)
410 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
412 struct gcpro gcpro1, gcpro2;
413 GCPRO2 (obj, printcharfun);
415 write_c_string ("\n#<uint16-byte-table", printcharfun);
416 for (i = 0; i < 256; i++)
418 unsigned short n = bte->property[i];
420 write_c_string ("\n ", printcharfun);
421 write_c_string (" ", printcharfun);
422 if (n == BT_UINT16_unbound)
423 write_c_string ("void", printcharfun);
424 else if (n == BT_UINT16_nil)
425 write_c_string ("nil", printcharfun);
426 else if (n == BT_UINT16_t)
427 write_c_string ("t", printcharfun);
432 sprintf (buf, "%hd", n);
433 write_c_string (buf, printcharfun);
437 write_c_string (">", printcharfun);
441 uint16_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
443 Lisp_Uint16_Byte_Table *te1 = XUINT16_BYTE_TABLE (obj1);
444 Lisp_Uint16_Byte_Table *te2 = XUINT16_BYTE_TABLE (obj2);
447 for (i = 0; i < 256; i++)
448 if (te1->property[i] != te2->property[i])
454 uint16_byte_table_hash (Lisp_Object obj, int depth)
456 Lisp_Uint16_Byte_Table *te = XUINT16_BYTE_TABLE (obj);
460 for (i = 0; i < 256; i++)
461 hash = HASH2 (hash, te->property[i]);
465 DEFINE_LRECORD_IMPLEMENTATION ("uint16-byte-table", uint16_byte_table,
466 mark_uint16_byte_table,
467 print_uint16_byte_table,
468 0, uint16_byte_table_equal,
469 uint16_byte_table_hash,
470 0 /* uint16_byte_table_description */,
471 Lisp_Uint16_Byte_Table);
474 make_uint16_byte_table (unsigned short initval)
478 Lisp_Uint16_Byte_Table *cte;
480 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
481 &lrecord_uint16_byte_table);
483 for (i = 0; i < 256; i++)
484 cte->property[i] = initval;
486 XSETUINT16_BYTE_TABLE (obj, cte);
491 expand_uint8_byte_table_to_uint16 (Lisp_Object table)
495 Lisp_Uint8_Byte_Table* bte = XUINT8_BYTE_TABLE(table);
496 Lisp_Uint16_Byte_Table* cte;
498 cte = alloc_lcrecord_type (Lisp_Uint16_Byte_Table,
499 &lrecord_uint16_byte_table);
500 for (i = 0; i < 256; i++)
502 cte->property[i] = UINT8_TO_UINT16 (bte->property[i]);
504 XSETUINT16_BYTE_TABLE (obj, cte);
509 uint16_byte_table_same_value_p (Lisp_Object obj)
511 Lisp_Uint16_Byte_Table *bte = XUINT16_BYTE_TABLE (obj);
512 unsigned short v0 = bte->property[0];
515 for (i = 1; i < 256; i++)
517 if (bte->property[i] != v0)
525 mark_byte_table (Lisp_Object obj)
527 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
530 for (i = 0; i < 256; i++)
532 mark_object (cte->property[i]);
538 print_byte_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
540 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
542 struct gcpro gcpro1, gcpro2;
543 GCPRO2 (obj, printcharfun);
545 write_c_string ("\n#<byte-table", printcharfun);
546 for (i = 0; i < 256; i++)
548 Lisp_Object elt = bte->property[i];
550 write_c_string ("\n ", printcharfun);
551 write_c_string (" ", printcharfun);
552 if (EQ (elt, Qunbound))
553 write_c_string ("void", printcharfun);
555 print_internal (elt, printcharfun, escapeflag);
558 write_c_string (">", printcharfun);
562 byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
564 Lisp_Byte_Table *cte1 = XBYTE_TABLE (obj1);
565 Lisp_Byte_Table *cte2 = XBYTE_TABLE (obj2);
568 for (i = 0; i < 256; i++)
569 if (BYTE_TABLE_P (cte1->property[i]))
571 if (BYTE_TABLE_P (cte2->property[i]))
573 if (!byte_table_equal (cte1->property[i],
574 cte2->property[i], depth + 1))
581 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
587 byte_table_hash (Lisp_Object obj, int depth)
589 Lisp_Byte_Table *cte = XBYTE_TABLE (obj);
591 return internal_array_hash (cte->property, 256, depth);
594 static const struct lrecord_description byte_table_description[] = {
595 { XD_LISP_OBJECT_ARRAY, offsetof(Lisp_Byte_Table, property), 256 },
599 DEFINE_LRECORD_IMPLEMENTATION ("byte-table", byte_table,
604 byte_table_description,
608 make_byte_table (Lisp_Object initval)
612 Lisp_Byte_Table *cte;
614 cte = alloc_lcrecord_type (Lisp_Byte_Table, &lrecord_byte_table);
616 for (i = 0; i < 256; i++)
617 cte->property[i] = initval;
619 XSETBYTE_TABLE (obj, cte);
624 byte_table_same_value_p (Lisp_Object obj)
626 Lisp_Byte_Table *bte = XBYTE_TABLE (obj);
627 Lisp_Object v0 = bte->property[0];
630 for (i = 1; i < 256; i++)
632 if (!internal_equal (bte->property[i], v0, 0))
639 Lisp_Object get_byte_table (Lisp_Object table, unsigned char idx);
640 Lisp_Object put_byte_table (Lisp_Object table, unsigned char idx,
644 get_byte_table (Lisp_Object table, unsigned char idx)
646 if (UINT8_BYTE_TABLE_P (table))
647 return UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[idx]);
648 else if (UINT16_BYTE_TABLE_P (table))
649 return UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[idx]);
650 else if (BYTE_TABLE_P (table))
651 return XBYTE_TABLE(table)->property[idx];
657 put_byte_table (Lisp_Object table, unsigned char idx, Lisp_Object value)
659 if (UINT8_BYTE_TABLE_P (table))
661 if (UINT8_VALUE_P (value))
663 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
664 if (!UINT8_BYTE_TABLE_P (value) &&
665 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
666 && uint8_byte_table_same_value_p (table))
671 else if (UINT16_VALUE_P (value))
673 Lisp_Object new = expand_uint8_byte_table_to_uint16 (table);
675 XUINT16_BYTE_TABLE(new)->property[idx] = UINT16_ENCODE (value);
680 Lisp_Object new = make_byte_table (Qnil);
683 for (i = 0; i < 256; i++)
685 XBYTE_TABLE(new)->property[i]
686 = UINT8_DECODE (XUINT8_BYTE_TABLE(table)->property[i]);
688 XBYTE_TABLE(new)->property[idx] = value;
692 else if (UINT16_BYTE_TABLE_P (table))
694 if (UINT16_VALUE_P (value))
696 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
697 if (!UINT8_BYTE_TABLE_P (value) &&
698 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
699 && uint16_byte_table_same_value_p (table))
706 Lisp_Object new = make_byte_table (Qnil);
709 for (i = 0; i < 256; i++)
711 XBYTE_TABLE(new)->property[i]
712 = UINT16_DECODE (XUINT16_BYTE_TABLE(table)->property[i]);
714 XBYTE_TABLE(new)->property[idx] = value;
718 else if (BYTE_TABLE_P (table))
720 XBYTE_TABLE(table)->property[idx] = value;
721 if (!UINT8_BYTE_TABLE_P (value) &&
722 !UINT16_BYTE_TABLE_P (value) && !BYTE_TABLE_P (value)
723 && byte_table_same_value_p (table))
728 else if (!internal_equal (table, value, 0))
730 if (UINT8_VALUE_P (table) && UINT8_VALUE_P (value))
732 table = make_uint8_byte_table (UINT8_ENCODE (table));
733 XUINT8_BYTE_TABLE(table)->property[idx] = UINT8_ENCODE (value);
735 else if (UINT16_VALUE_P (table) && UINT16_VALUE_P (value))
737 table = make_uint16_byte_table (UINT16_ENCODE (table));
738 XUINT16_BYTE_TABLE(table)->property[idx] = UINT16_ENCODE (value);
742 table = make_byte_table (table);
743 XBYTE_TABLE(table)->property[idx] = value;
750 mark_char_id_table (Lisp_Object obj)
752 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
758 print_char_id_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
760 Lisp_Object table = XCHAR_ID_TABLE (obj)->table;
762 struct gcpro gcpro1, gcpro2;
763 GCPRO2 (obj, printcharfun);
765 write_c_string ("#<char-id-table ", printcharfun);
766 for (i = 0; i < 256; i++)
768 Lisp_Object elt = get_byte_table (table, i);
769 if (i != 0) write_c_string ("\n ", printcharfun);
770 if (EQ (elt, Qunbound))
771 write_c_string ("void", printcharfun);
773 print_internal (elt, printcharfun, escapeflag);
776 write_c_string (">", printcharfun);
780 char_id_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
782 Lisp_Object table1 = XCHAR_ID_TABLE (obj1)->table;
783 Lisp_Object table2 = XCHAR_ID_TABLE (obj2)->table;
786 for (i = 0; i < 256; i++)
788 if (!internal_equal (get_byte_table (table1, i),
789 get_byte_table (table2, i), 0))
796 char_id_table_hash (Lisp_Object obj, int depth)
798 Lisp_Char_ID_Table *cte = XCHAR_ID_TABLE (obj);
800 return char_id_table_hash (cte->table, depth + 1);
803 static const struct lrecord_description char_id_table_description[] = {
804 { XD_LISP_OBJECT, offsetof(Lisp_Char_ID_Table, table) },
808 DEFINE_LRECORD_IMPLEMENTATION ("char-id-table", char_id_table,
811 0, char_id_table_equal,
813 char_id_table_description,
817 make_char_id_table (Lisp_Object initval)
820 Lisp_Char_ID_Table *cte;
822 cte = alloc_lcrecord_type (Lisp_Char_ID_Table, &lrecord_char_id_table);
824 cte->table = make_byte_table (initval);
826 XSETCHAR_ID_TABLE (obj, cte);
832 get_char_id_table (Emchar ch, Lisp_Object table)
834 unsigned int code = ch;
841 (XCHAR_ID_TABLE (table)->table,
842 (unsigned char)(code >> 24)),
843 (unsigned char) (code >> 16)),
844 (unsigned char) (code >> 8)),
845 (unsigned char) code);
848 void put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table);
850 put_char_id_table (Emchar ch, Lisp_Object value, Lisp_Object table)
852 unsigned int code = ch;
853 Lisp_Object table1, table2, table3, table4;
855 table1 = XCHAR_ID_TABLE (table)->table;
856 table2 = get_byte_table (table1, (unsigned char)(code >> 24));
857 table3 = get_byte_table (table2, (unsigned char)(code >> 16));
858 table4 = get_byte_table (table3, (unsigned char)(code >> 8));
860 table4 = put_byte_table (table4, (unsigned char)code, value);
861 table3 = put_byte_table (table3, (unsigned char)(code >> 8), table4);
862 table2 = put_byte_table (table2, (unsigned char)(code >> 16), table3);
863 XCHAR_ID_TABLE (table)->table
864 = put_byte_table (table1, (unsigned char)(code >> 24), table2);
868 Lisp_Object Vchar_attribute_hash_table;
869 Lisp_Object Vcharacter_composition_table;
870 Lisp_Object Vcharacter_variant_table;
872 Lisp_Object Qideograph_daikanwa;
873 Lisp_Object Q_decomposition;
877 Lisp_Object Qisolated;
878 Lisp_Object Qinitial;
881 Lisp_Object Qvertical;
882 Lisp_Object QnoBreak;
883 Lisp_Object Qfraction;
893 Emchar to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg);
895 Lisp_Object put_char_ccs_code_point (Lisp_Object character,
896 Lisp_Object ccs, Lisp_Object value);
897 Lisp_Object remove_char_ccs (Lisp_Object character, Lisp_Object ccs);
900 to_char_id (Lisp_Object v, char* err_msg, Lisp_Object err_arg)
906 else if (EQ (v, Qcompat))
908 else if (EQ (v, Qisolated))
910 else if (EQ (v, Qinitial))
912 else if (EQ (v, Qmedial))
914 else if (EQ (v, Qfinal))
916 else if (EQ (v, Qvertical))
918 else if (EQ (v, QnoBreak))
920 else if (EQ (v, Qfraction))
922 else if (EQ (v, Qsuper))
924 else if (EQ (v, Qsub))
926 else if (EQ (v, Qcircle))
928 else if (EQ (v, Qsquare))
930 else if (EQ (v, Qwide))
932 else if (EQ (v, Qnarrow))
934 else if (EQ (v, Qsmall))
936 else if (EQ (v, Qfont))
939 signal_simple_error (err_msg, err_arg);
942 DEFUN ("get-composite-char", Fget_composite_char, 1, 1, 0, /*
943 Return character corresponding with list.
947 Lisp_Object table = Vcharacter_composition_table;
948 Lisp_Object rest = list;
952 Lisp_Object v = Fcar (rest);
954 Emchar c = to_char_id (v, "Invalid value for composition", list);
956 ret = get_char_id_table (c, table);
961 if (!CHAR_ID_TABLE_P (ret))
966 else if (!CONSP (rest))
968 else if (CHAR_ID_TABLE_P (ret))
971 signal_simple_error ("Invalid table is found with", list);
973 signal_simple_error ("Invalid value for composition", list);
976 DEFUN ("char-variants", Fchar_variants, 1, 1, 0, /*
977 Return variants of CHARACTER.
981 CHECK_CHAR (character);
982 return Fcopy_list (get_char_id_table (XCHAR (character),
983 Vcharacter_variant_table));
987 /* We store the char-attributes in hash tables with the names as the
988 key and the actual char-id-table object as the value. Occasionally
989 we need to use them in a list format. These routines provide us
991 struct char_attribute_list_closure
993 Lisp_Object *char_attribute_list;
997 add_char_attribute_to_list_mapper (Lisp_Object key, Lisp_Object value,
998 void *char_attribute_list_closure)
1000 /* This function can GC */
1001 struct char_attribute_list_closure *calcl
1002 = (struct char_attribute_list_closure*) char_attribute_list_closure;
1003 Lisp_Object *char_attribute_list = calcl->char_attribute_list;
1005 *char_attribute_list = Fcons (key, *char_attribute_list);
1009 DEFUN ("char-attribute-list", Fchar_attribute_list, 0, 0, 0, /*
1010 Return the list of all existing character attributes except coded-charsets.
1014 Lisp_Object char_attribute_list = Qnil;
1015 struct gcpro gcpro1;
1016 struct char_attribute_list_closure char_attribute_list_closure;
1018 GCPRO1 (char_attribute_list);
1019 char_attribute_list_closure.char_attribute_list = &char_attribute_list;
1020 elisp_maphash (add_char_attribute_to_list_mapper,
1021 Vchar_attribute_hash_table,
1022 &char_attribute_list_closure);
1024 return char_attribute_list;
1027 DEFUN ("find-char-attribute-table", Ffind_char_attribute_table, 1, 1, 0, /*
1028 Return char-id-table corresponding to ATTRIBUTE.
1032 return Fgethash (attribute, Vchar_attribute_hash_table, Qnil);
1036 /* We store the char-id-tables in hash tables with the attributes as
1037 the key and the actual char-id-table object as the value. Each
1038 char-id-table stores values of an attribute corresponding with
1039 characters. Occasionally we need to get attributes of a character
1040 in a association-list format. These routines provide us with
1042 struct char_attribute_alist_closure
1045 Lisp_Object *char_attribute_alist;
1049 add_char_attribute_alist_mapper (Lisp_Object key, Lisp_Object value,
1050 void *char_attribute_alist_closure)
1052 /* This function can GC */
1053 struct char_attribute_alist_closure *caacl =
1054 (struct char_attribute_alist_closure*) char_attribute_alist_closure;
1055 Lisp_Object ret = get_char_id_table (caacl->char_id, value);
1056 if (!UNBOUNDP (ret))
1058 Lisp_Object *char_attribute_alist = caacl->char_attribute_alist;
1059 *char_attribute_alist
1060 = Fcons (Fcons (key, ret), *char_attribute_alist);
1065 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
1066 Return the alist of attributes of CHARACTER.
1070 Lisp_Object alist = Qnil;
1073 CHECK_CHAR (character);
1075 struct gcpro gcpro1;
1076 struct char_attribute_alist_closure char_attribute_alist_closure;
1079 char_attribute_alist_closure.char_id = XCHAR (character);
1080 char_attribute_alist_closure.char_attribute_alist = &alist;
1081 elisp_maphash (add_char_attribute_alist_mapper,
1082 Vchar_attribute_hash_table,
1083 &char_attribute_alist_closure);
1087 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1089 Lisp_Object ccs = chlook->charset_by_leading_byte[i];
1093 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1096 if ( CHAR_ID_TABLE_P (encoding_table)
1097 && INTP (cpos = get_char_id_table (XCHAR (character),
1100 alist = Fcons (Fcons (ccs, cpos), alist);
1107 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
1108 Return the value of CHARACTER's ATTRIBUTE.
1110 (character, attribute))
1114 CHECK_CHAR (character);
1115 if (!NILP (ccs = Ffind_charset (attribute)))
1117 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1119 if (CHAR_ID_TABLE_P (encoding_table))
1120 return get_char_id_table (XCHAR (character), encoding_table);
1126 Lisp_Object table = Fgethash (attribute,
1127 Vchar_attribute_hash_table,
1129 if (!UNBOUNDP (table))
1131 Lisp_Object ret = get_char_id_table (XCHAR (character), table);
1132 if (!UNBOUNDP (ret))
1139 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
1140 Store CHARACTER's ATTRIBUTE with VALUE.
1142 (character, attribute, value))
1146 CHECK_CHAR (character);
1147 ccs = Ffind_charset (attribute);
1150 return put_char_ccs_code_point (character, ccs, value);
1152 else if (EQ (attribute, Q_decomposition))
1157 signal_simple_error ("Invalid value for ->decomposition",
1160 if (CONSP (Fcdr (value)))
1162 Lisp_Object rest = value;
1163 Lisp_Object table = Vcharacter_composition_table;
1167 GET_EXTERNAL_LIST_LENGTH (rest, len);
1168 seq = make_vector (len, Qnil);
1170 while (CONSP (rest))
1172 Lisp_Object v = Fcar (rest);
1175 = to_char_id (v, "Invalid value for ->decomposition", value);
1178 XVECTOR_DATA(seq)[i++] = v;
1180 XVECTOR_DATA(seq)[i++] = make_char (c);
1184 put_char_id_table (c, character, table);
1189 ntable = get_char_id_table (c, table);
1190 if (!CHAR_ID_TABLE_P (ntable))
1192 ntable = make_char_id_table (Qnil);
1193 put_char_id_table (c, ntable, table);
1201 Lisp_Object v = Fcar (value);
1205 Emchar c = XINT (v);
1207 = get_char_id_table (c, Vcharacter_variant_table);
1209 if (NILP (Fmemq (v, ret)))
1211 put_char_id_table (c, Fcons (character, ret),
1212 Vcharacter_variant_table);
1215 seq = make_vector (1, v);
1219 else if (EQ (attribute, Q_ucs))
1225 signal_simple_error ("Invalid value for ->ucs", value);
1229 ret = get_char_id_table (c, Vcharacter_variant_table);
1230 if (NILP (Fmemq (character, ret)))
1232 put_char_id_table (c, Fcons (character, ret),
1233 Vcharacter_variant_table);
1237 Lisp_Object table = Fgethash (attribute,
1238 Vchar_attribute_hash_table,
1243 table = make_char_id_table (Qunbound);
1244 Fputhash (attribute, table, Vchar_attribute_hash_table);
1246 put_char_id_table (XCHAR (character), value, table);
1251 DEFUN ("remove-char-attribute", Fremove_char_attribute, 2, 2, 0, /*
1252 Remove CHARACTER's ATTRIBUTE.
1254 (character, attribute))
1258 CHECK_CHAR (character);
1259 ccs = Ffind_charset (attribute);
1262 return remove_char_ccs (character, ccs);
1266 Lisp_Object table = Fgethash (attribute,
1267 Vchar_attribute_hash_table,
1269 if (!UNBOUNDP (table))
1271 put_char_id_table (XCHAR (character), Qunbound, table);
1278 INLINE_HEADER int CHARSET_BYTE_SIZE (Lisp_Charset* cs);
1280 CHARSET_BYTE_SIZE (Lisp_Charset* cs)
1282 /* ad-hoc method for `ascii' */
1283 if ((CHARSET_CHARS (cs) == 94) &&
1284 (CHARSET_BYTE_OFFSET (cs) != 33))
1285 return 128 - CHARSET_BYTE_OFFSET (cs);
1287 return CHARSET_CHARS (cs);
1290 #define XCHARSET_BYTE_SIZE(ccs) CHARSET_BYTE_SIZE (XCHARSET (ccs))
1292 int decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len);
1294 decoding_table_check_elements (Lisp_Object v, int dim, int ccs_len)
1298 if (XVECTOR_LENGTH (v) > ccs_len)
1301 for (i = 0; i < XVECTOR_LENGTH (v); i++)
1303 Lisp_Object c = XVECTOR_DATA(v)[i];
1305 if (!NILP (c) && !CHARP (c))
1309 int ret = decoding_table_check_elements (c, dim - 1, ccs_len);
1321 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1324 decoding_table_remove_char (Lisp_Object v, int dim, int byte_offset,
1334 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1335 nv = XVECTOR_DATA(v)[i];
1341 XVECTOR_DATA(v)[i] = Qnil;
1345 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1346 int code_point, Lisp_Object character);
1348 decoding_table_put_char (Lisp_Object v, int dim, int byte_offset,
1349 int code_point, Lisp_Object character)
1353 int ccs_len = XVECTOR_LENGTH (v);
1358 i = ((code_point >> (8 * dim)) & 255) - byte_offset;
1359 nv = XVECTOR_DATA(v)[i];
1363 nv = (XVECTOR_DATA(v)[i] = make_older_vector (ccs_len, Qnil));
1369 XVECTOR_DATA(v)[i] = character;
1373 put_char_ccs_code_point (Lisp_Object character,
1374 Lisp_Object ccs, Lisp_Object value)
1376 Lisp_Object encoding_table;
1378 if (!EQ (XCHARSET_NAME (ccs), Qucs)
1379 || (XCHAR (character) != XINT (value)))
1381 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
1382 int dim = XCHARSET_DIMENSION (ccs);
1383 int ccs_len = XCHARSET_BYTE_SIZE (ccs);
1384 int byte_offset = XCHARSET_BYTE_OFFSET (ccs);
1388 { /* obsolete representation: value must be a list of bytes */
1389 Lisp_Object ret = Fcar (value);
1393 signal_simple_error ("Invalid value for coded-charset", value);
1394 code_point = XINT (ret);
1395 if (XCHARSET_GRAPHIC (ccs) == 1)
1397 rest = Fcdr (value);
1398 while (!NILP (rest))
1403 signal_simple_error ("Invalid value for coded-charset",
1407 signal_simple_error ("Invalid value for coded-charset",
1410 if (XCHARSET_GRAPHIC (ccs) == 1)
1412 code_point = (code_point << 8) | j;
1415 value = make_int (code_point);
1417 else if (INTP (value))
1419 code_point = XINT (value);
1420 if (XCHARSET_GRAPHIC (ccs) == 1)
1422 code_point &= 0x7F7F7F7F;
1423 value = make_int (code_point);
1427 signal_simple_error ("Invalid value for coded-charset", value);
1431 Lisp_Object cpos = Fget_char_attribute (character, ccs);
1434 decoding_table_remove_char (v, dim, byte_offset, XINT (cpos));
1439 XCHARSET_DECODING_TABLE (ccs)
1440 = v = make_older_vector (ccs_len, Qnil);
1443 decoding_table_put_char (v, dim, byte_offset, code_point, character);
1445 if (NILP (encoding_table = XCHARSET_ENCODING_TABLE (ccs)))
1447 XCHARSET_ENCODING_TABLE (ccs)
1448 = encoding_table = make_char_id_table (Qnil);
1450 put_char_id_table (XCHAR (character), value, encoding_table);
1455 remove_char_ccs (Lisp_Object character, Lisp_Object ccs)
1457 Lisp_Object decoding_table = XCHARSET_DECODING_TABLE (ccs);
1458 Lisp_Object encoding_table = XCHARSET_ENCODING_TABLE (ccs);
1460 if (VECTORP (decoding_table))
1462 Lisp_Object cpos = Fget_char_attribute (character, ccs);
1466 decoding_table_remove_char (decoding_table,
1467 XCHARSET_DIMENSION (ccs),
1468 XCHARSET_BYTE_OFFSET (ccs),
1472 if (CHAR_ID_TABLE_P (encoding_table))
1474 put_char_id_table (XCHAR (character), Qnil, encoding_table);
1479 EXFUN (Fmake_char, 3);
1480 EXFUN (Fdecode_char, 2);
1482 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
1483 Store character's ATTRIBUTES.
1487 Lisp_Object rest = attributes;
1488 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
1489 Lisp_Object character;
1491 Lisp_Object daikanwa = Qnil;
1496 while (CONSP (rest))
1498 Lisp_Object cell = Fcar (rest);
1502 signal_simple_error ("Invalid argument", attributes);
1503 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
1504 && ((XCHARSET_FINAL (ccs) != 0) ||
1505 (XCHARSET_UCS_MAX (ccs) > 0)) )
1509 character = Fmake_char (ccs, Fcar (cell), Fcar (Fcdr (cell)));
1511 character = Fdecode_char (ccs, cell);
1512 goto setup_attributes;
1516 if (!NILP (code = Fcdr (Fassq (Q_ucs, attributes))))
1519 signal_simple_error ("Invalid argument", attributes);
1521 character = make_char (XINT (code) + 0x100000);
1522 goto setup_attributes;
1526 else if (!INTP (code))
1527 signal_simple_error ("Invalid argument", attributes);
1529 character = make_char (XINT (code));
1533 while (CONSP (rest))
1535 Lisp_Object cell = Fcar (rest);
1537 Lisp_Object key = Fcar (cell);
1538 Lisp_Object value = Fcdr (cell);
1542 signal_simple_error ("Invalid argument", attributes);
1545 if (EQ (key, Qmorohashi_daikanwa))
1548 GET_EXTERNAL_LIST_LENGTH (value, len);
1552 if (NILP (daikanwa))
1553 daikanwa = Fcdr (Fassq (Qideograph_daikanwa, rest));
1554 if (EQ (Fcar (value), daikanwa))
1558 else if (EQ (key, Qideograph_daikanwa))
1562 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
1571 Lisp_Object Vutf_2000_version;
1575 int leading_code_private_11;
1578 Lisp_Object Qcharsetp;
1580 /* Qdoc_string, Qdimension, Qchars defined in general.c */
1581 Lisp_Object Qregistry, Qfinal, Qgraphic;
1582 Lisp_Object Qdirection;
1583 Lisp_Object Qreverse_direction_charset;
1584 Lisp_Object Qleading_byte;
1585 Lisp_Object Qshort_name, Qlong_name;
1599 Qcyrillic_iso8859_5,
1601 Qjapanese_jisx0208_1978,
1604 Qjapanese_jisx0208_1990,
1607 Qchinese_cns11643_1,
1608 Qchinese_cns11643_2,
1613 Qlatin_viscii_lower,
1614 Qlatin_viscii_upper,
1615 Qvietnamese_viscii_lower,
1616 Qvietnamese_viscii_upper,
1646 Lisp_Object Ql2r, Qr2l;
1648 Lisp_Object Vcharset_hash_table;
1650 /* Composite characters are characters constructed by overstriking two
1651 or more regular characters.
1653 1) The old Mule implementation involves storing composite characters
1654 in a buffer as a tag followed by all of the actual characters
1655 used to make up the composite character. I think this is a bad
1656 idea; it greatly complicates code that wants to handle strings
1657 one character at a time because it has to deal with the possibility
1658 of great big ungainly characters. It's much more reasonable to
1659 simply store an index into a table of composite characters.
1661 2) The current implementation only allows for 16,384 separate
1662 composite characters over the lifetime of the XEmacs process.
1663 This could become a potential problem if the user
1664 edited lots of different files that use composite characters.
1665 Due to FSF bogosity, increasing the number of allowable
1666 composite characters under Mule would decrease the number
1667 of possible faces that can exist. Mule already has shrunk
1668 this to 2048, and further shrinkage would become uncomfortable.
1669 No such problems exist in XEmacs.
1671 Composite characters could be represented as 0x80 C1 C2 C3,
1672 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
1673 for slightly under 2^20 (one million) composite characters
1674 over the XEmacs process lifetime, and you only need to
1675 increase the size of a Mule character from 19 to 21 bits.
1676 Or you could use 0x80 C1 C2 C3 C4, allowing for about
1677 85 million (slightly over 2^26) composite characters. */
1680 /************************************************************************/
1681 /* Basic Emchar functions */
1682 /************************************************************************/
1684 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
1685 string in STR. Returns the number of bytes stored.
1686 Do not call this directly. Use the macro set_charptr_emchar() instead.
1690 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
1696 Lisp_Object charset;
1705 else if ( c <= 0x7ff )
1707 *p++ = (c >> 6) | 0xc0;
1708 *p++ = (c & 0x3f) | 0x80;
1710 else if ( c <= 0xffff )
1712 *p++ = (c >> 12) | 0xe0;
1713 *p++ = ((c >> 6) & 0x3f) | 0x80;
1714 *p++ = (c & 0x3f) | 0x80;
1716 else if ( c <= 0x1fffff )
1718 *p++ = (c >> 18) | 0xf0;
1719 *p++ = ((c >> 12) & 0x3f) | 0x80;
1720 *p++ = ((c >> 6) & 0x3f) | 0x80;
1721 *p++ = (c & 0x3f) | 0x80;
1723 else if ( c <= 0x3ffffff )
1725 *p++ = (c >> 24) | 0xf8;
1726 *p++ = ((c >> 18) & 0x3f) | 0x80;
1727 *p++ = ((c >> 12) & 0x3f) | 0x80;
1728 *p++ = ((c >> 6) & 0x3f) | 0x80;
1729 *p++ = (c & 0x3f) | 0x80;
1733 *p++ = (c >> 30) | 0xfc;
1734 *p++ = ((c >> 24) & 0x3f) | 0x80;
1735 *p++ = ((c >> 18) & 0x3f) | 0x80;
1736 *p++ = ((c >> 12) & 0x3f) | 0x80;
1737 *p++ = ((c >> 6) & 0x3f) | 0x80;
1738 *p++ = (c & 0x3f) | 0x80;
1741 BREAKUP_CHAR (c, charset, c1, c2);
1742 lb = CHAR_LEADING_BYTE (c);
1743 if (LEADING_BYTE_PRIVATE_P (lb))
1744 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
1746 if (EQ (charset, Vcharset_control_1))
1755 /* Return the first character from a Mule-encoded string in STR,
1756 assuming it's non-ASCII. Do not call this directly.
1757 Use the macro charptr_emchar() instead. */
1760 non_ascii_charptr_emchar (const Bufbyte *str)
1773 else if ( b >= 0xf8 )
1778 else if ( b >= 0xf0 )
1783 else if ( b >= 0xe0 )
1788 else if ( b >= 0xc0 )
1798 for( ; len > 0; len-- )
1801 ch = ( ch << 6 ) | ( b & 0x3f );
1805 Bufbyte i0 = *str, i1, i2 = 0;
1806 Lisp_Object charset;
1808 if (i0 == LEADING_BYTE_CONTROL_1)
1809 return (Emchar) (*++str - 0x20);
1811 if (LEADING_BYTE_PREFIX_P (i0))
1816 charset = CHARSET_BY_LEADING_BYTE (i0);
1817 if (XCHARSET_DIMENSION (charset) == 2)
1820 return MAKE_CHAR (charset, i1, i2);
1824 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
1825 Do not call this directly. Use the macro valid_char_p() instead. */
1829 non_ascii_valid_char_p (Emchar ch)
1833 /* Must have only lowest 19 bits set */
1837 f1 = CHAR_FIELD1 (ch);
1838 f2 = CHAR_FIELD2 (ch);
1839 f3 = CHAR_FIELD3 (ch);
1843 Lisp_Object charset;
1845 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
1846 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
1847 f2 > MAX_CHAR_FIELD2_PRIVATE)
1852 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
1853 f2 <= MAX_CHAR_FIELD2_PRIVATE))
1857 NOTE: This takes advantage of the fact that
1858 FIELD2_TO_OFFICIAL_LEADING_BYTE and
1859 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
1861 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
1862 if (EQ (charset, Qnil))
1864 return (XCHARSET_CHARS (charset) == 96);
1868 Lisp_Object charset;
1870 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
1871 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
1872 f1 > MAX_CHAR_FIELD1_PRIVATE)
1874 if (f2 < 0x20 || f3 < 0x20)
1877 #ifdef ENABLE_COMPOSITE_CHARS
1878 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
1880 if (UNBOUNDP (Fgethash (make_int (ch),
1881 Vcomposite_char_char2string_hash_table,
1886 #endif /* ENABLE_COMPOSITE_CHARS */
1888 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
1889 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
1892 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
1894 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
1897 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
1899 if (EQ (charset, Qnil))
1901 return (XCHARSET_CHARS (charset) == 96);
1907 /************************************************************************/
1908 /* Basic string functions */
1909 /************************************************************************/
1911 /* Copy the character pointed to by PTR into STR, assuming it's
1912 non-ASCII. Do not call this directly. Use the macro
1913 charptr_copy_char() instead. */
1916 non_ascii_charptr_copy_char (const Bufbyte *ptr, Bufbyte *str)
1918 Bufbyte *strptr = str;
1920 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
1922 /* Notice fallthrough. */
1924 case 6: *++strptr = *ptr++;
1925 case 5: *++strptr = *ptr++;
1927 case 4: *++strptr = *ptr++;
1928 case 3: *++strptr = *ptr++;
1929 case 2: *++strptr = *ptr;
1934 return strptr + 1 - str;
1938 /************************************************************************/
1939 /* streams of Emchars */
1940 /************************************************************************/
1942 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
1943 The functions below are not meant to be called directly; use
1944 the macros in insdel.h. */
1947 Lstream_get_emchar_1 (Lstream *stream, int ch)
1949 Bufbyte str[MAX_EMCHAR_LEN];
1950 Bufbyte *strptr = str;
1952 str[0] = (Bufbyte) ch;
1953 switch (REP_BYTES_BY_FIRST_BYTE (ch))
1955 /* Notice fallthrough. */
1958 ch = Lstream_getc (stream);
1960 *++strptr = (Bufbyte) ch;
1962 ch = Lstream_getc (stream);
1964 *++strptr = (Bufbyte) ch;
1967 ch = Lstream_getc (stream);
1969 *++strptr = (Bufbyte) ch;
1971 ch = Lstream_getc (stream);
1973 *++strptr = (Bufbyte) ch;
1975 ch = Lstream_getc (stream);
1977 *++strptr = (Bufbyte) ch;
1982 return charptr_emchar (str);
1986 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1988 Bufbyte str[MAX_EMCHAR_LEN];
1989 Bytecount len = set_charptr_emchar (str, ch);
1990 return Lstream_write (stream, str, len);
1994 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1996 Bufbyte str[MAX_EMCHAR_LEN];
1997 Bytecount len = set_charptr_emchar (str, ch);
1998 Lstream_unread (stream, str, len);
2002 /************************************************************************/
2003 /* charset object */
2004 /************************************************************************/
2007 mark_charset (Lisp_Object obj)
2009 Lisp_Charset *cs = XCHARSET (obj);
2011 mark_object (cs->short_name);
2012 mark_object (cs->long_name);
2013 mark_object (cs->doc_string);
2014 mark_object (cs->registry);
2015 mark_object (cs->ccl_program);
2017 mark_object (cs->encoding_table);
2018 /* mark_object (cs->decoding_table); */
2024 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
2026 Lisp_Charset *cs = XCHARSET (obj);
2030 error ("printing unreadable object #<charset %s 0x%x>",
2031 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
2034 write_c_string ("#<charset ", printcharfun);
2035 print_internal (CHARSET_NAME (cs), printcharfun, 0);
2036 write_c_string (" ", printcharfun);
2037 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
2038 write_c_string (" ", printcharfun);
2039 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
2040 write_c_string (" ", printcharfun);
2041 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
2042 sprintf (buf, " %d^%d %s cols=%d g%d final='%c' reg=",
2044 CHARSET_DIMENSION (cs),
2045 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
2046 CHARSET_COLUMNS (cs),
2047 CHARSET_GRAPHIC (cs),
2048 CHARSET_FINAL (cs));
2049 write_c_string (buf, printcharfun);
2050 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
2051 sprintf (buf, " 0x%x>", cs->header.uid);
2052 write_c_string (buf, printcharfun);
2055 static const struct lrecord_description charset_description[] = {
2056 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
2057 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
2058 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
2059 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
2060 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
2061 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
2062 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
2064 { XD_LISP_OBJECT, offsetof (Lisp_Charset, decoding_table) },
2065 { XD_LISP_OBJECT, offsetof (Lisp_Charset, encoding_table) },
2070 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
2071 mark_charset, print_charset, 0, 0, 0,
2072 charset_description,
2074 /* Make a new charset. */
2077 make_charset (Charset_ID id, Lisp_Object name,
2078 unsigned short chars, unsigned char dimension,
2079 unsigned char columns, unsigned char graphic,
2080 Bufbyte final, unsigned char direction, Lisp_Object short_name,
2081 Lisp_Object long_name, Lisp_Object doc,
2083 Lisp_Object decoding_table,
2084 Emchar ucs_min, Emchar ucs_max,
2085 Emchar code_offset, unsigned char byte_offset)
2088 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
2092 XSETCHARSET (obj, cs);
2094 CHARSET_ID (cs) = id;
2095 CHARSET_NAME (cs) = name;
2096 CHARSET_SHORT_NAME (cs) = short_name;
2097 CHARSET_LONG_NAME (cs) = long_name;
2098 CHARSET_CHARS (cs) = chars;
2099 CHARSET_DIMENSION (cs) = dimension;
2100 CHARSET_DIRECTION (cs) = direction;
2101 CHARSET_COLUMNS (cs) = columns;
2102 CHARSET_GRAPHIC (cs) = graphic;
2103 CHARSET_FINAL (cs) = final;
2104 CHARSET_DOC_STRING (cs) = doc;
2105 CHARSET_REGISTRY (cs) = reg;
2106 CHARSET_CCL_PROGRAM (cs) = Qnil;
2107 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
2109 CHARSET_DECODING_TABLE(cs) = Qnil;
2110 CHARSET_ENCODING_TABLE(cs) = Qnil;
2111 CHARSET_UCS_MIN(cs) = ucs_min;
2112 CHARSET_UCS_MAX(cs) = ucs_max;
2113 CHARSET_CODE_OFFSET(cs) = code_offset;
2114 CHARSET_BYTE_OFFSET(cs) = byte_offset;
2118 if (id == LEADING_BYTE_ASCII)
2119 CHARSET_REP_BYTES (cs) = 1;
2121 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
2123 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
2128 /* some charsets do not have final characters. This includes
2129 ASCII, Control-1, Composite, and the two faux private
2131 unsigned char iso2022_type
2132 = (dimension == 1 ? 0 : 2) + (chars == 94 ? 0 : 1);
2134 if (code_offset == 0)
2136 assert (NILP (chlook->charset_by_attributes[iso2022_type][final]));
2137 chlook->charset_by_attributes[iso2022_type][final] = obj;
2141 (chlook->charset_by_attributes[iso2022_type][final][direction]));
2142 chlook->charset_by_attributes[iso2022_type][final][direction] = obj;
2146 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
2147 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
2149 /* Some charsets are "faux" and don't have names or really exist at
2150 all except in the leading-byte table. */
2152 Fputhash (name, obj, Vcharset_hash_table);
2157 get_unallocated_leading_byte (int dimension)
2162 if (chlook->next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
2165 lb = chlook->next_allocated_leading_byte++;
2169 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
2172 lb = chlook->next_allocated_1_byte_leading_byte++;
2176 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
2179 lb = chlook->next_allocated_2_byte_leading_byte++;
2185 ("No more character sets free for this dimension",
2186 make_int (dimension));
2193 make_builtin_char (Lisp_Object charset, int c1, int c2)
2195 if (XCHARSET_UCS_MAX (charset))
2198 = (XCHARSET_DIMENSION (charset) == 1
2200 c1 - XCHARSET_BYTE_OFFSET (charset)
2202 (c1 - XCHARSET_BYTE_OFFSET (charset)) * XCHARSET_CHARS (charset)
2203 + c2 - XCHARSET_BYTE_OFFSET (charset))
2204 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
2205 if ((code < XCHARSET_UCS_MIN (charset))
2206 || (XCHARSET_UCS_MAX (charset) < code))
2207 signal_simple_error ("Arguments makes invalid character",
2211 else if (XCHARSET_DIMENSION (charset) == 1)
2213 switch (XCHARSET_CHARS (charset))
2217 + (XCHARSET_FINAL (charset) - '0') * 94 + (c1 - 33);
2220 + (XCHARSET_FINAL (charset) - '0') * 96 + (c1 - 32);
2227 switch (XCHARSET_CHARS (charset))
2230 return MIN_CHAR_94x94
2231 + (XCHARSET_FINAL (charset) - '0') * 94 * 94
2232 + (c1 - 33) * 94 + (c2 - 33);
2234 return MIN_CHAR_96x96
2235 + (XCHARSET_FINAL (charset) - '0') * 96 * 96
2236 + (c1 - 32) * 96 + (c2 - 32);
2244 range_charset_code_point (Lisp_Object charset, Emchar ch)
2248 if ((XCHARSET_UCS_MIN (charset) <= ch)
2249 && (ch <= XCHARSET_UCS_MAX (charset)))
2251 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
2253 if (XCHARSET_CHARS (charset) == 256)
2255 else if (XCHARSET_DIMENSION (charset) == 1)
2256 return d + XCHARSET_BYTE_OFFSET (charset);
2257 else if (XCHARSET_DIMENSION (charset) == 2)
2259 ((d / XCHARSET_CHARS (charset)
2260 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2261 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2262 else if (XCHARSET_DIMENSION (charset) == 3)
2264 ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2265 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2266 | ((d / XCHARSET_CHARS (charset)
2267 % XCHARSET_CHARS (charset)
2268 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2269 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2270 else /* if (XCHARSET_DIMENSION (charset) == 4) */
2272 ((d / (XCHARSET_CHARS (charset)
2273 * XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2274 + XCHARSET_BYTE_OFFSET (charset)) << 24)
2275 | ((d / (XCHARSET_CHARS (charset) * XCHARSET_CHARS (charset))
2276 % XCHARSET_CHARS (charset)
2277 + XCHARSET_BYTE_OFFSET (charset)) << 16)
2278 | ((d / XCHARSET_CHARS (charset) % XCHARSET_CHARS (charset)
2279 + XCHARSET_BYTE_OFFSET (charset)) << 8)
2280 | (d % XCHARSET_CHARS (charset) + XCHARSET_BYTE_OFFSET (charset));
2282 else if (XCHARSET_CODE_OFFSET (charset) == 0)
2284 if (XCHARSET_DIMENSION (charset) == 1)
2286 if (XCHARSET_CHARS (charset) == 94)
2288 if (((d = ch - (MIN_CHAR_94
2289 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
2293 else if (XCHARSET_CHARS (charset) == 96)
2295 if (((d = ch - (MIN_CHAR_96
2296 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
2303 else if (XCHARSET_DIMENSION (charset) == 2)
2305 if (XCHARSET_CHARS (charset) == 94)
2307 if (((d = ch - (MIN_CHAR_94x94
2308 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
2311 return (((d / 94) + 33) << 8) | (d % 94 + 33);
2313 else if (XCHARSET_CHARS (charset) == 96)
2315 if (((d = ch - (MIN_CHAR_96x96
2316 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
2319 return (((d / 96) + 32) << 8) | (d % 96 + 32);
2325 if (EQ (charset, Vcharset_mojikyo_2022_1)
2326 && (MIN_CHAR_MOJIKYO < ch) && (ch < MIN_CHAR_MOJIKYO + 94 * 60 * 94))
2328 int m = ch - MIN_CHAR_MOJIKYO - 1;
2329 int byte1 = m / (94 * 60) + 33;
2330 int byte2 = (m % (94 * 60)) / 94;
2331 int byte3 = m % 94 + 33;
2337 return (byte1 << 16) | (byte2 << 8) | byte3;
2343 encode_builtin_char_1 (Emchar c, Lisp_Object* charset)
2345 if (c <= MAX_CHAR_BASIC_LATIN)
2347 *charset = Vcharset_ascii;
2352 *charset = Vcharset_control_1;
2357 *charset = Vcharset_latin_iso8859_1;
2361 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
2363 *charset = Vcharset_greek_iso8859_7;
2364 return c - MIN_CHAR_GREEK + 0x20;
2366 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
2368 *charset = Vcharset_cyrillic_iso8859_5;
2369 return c - MIN_CHAR_CYRILLIC + 0x20;
2372 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
2374 *charset = Vcharset_hebrew_iso8859_8;
2375 return c - MIN_CHAR_HEBREW + 0x20;
2377 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
2379 *charset = Vcharset_thai_tis620;
2380 return c - MIN_CHAR_THAI + 0x20;
2383 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
2384 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
2386 return list2 (Vcharset_katakana_jisx0201,
2387 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
2390 else if (c <= MAX_CHAR_BMP)
2392 *charset = Vcharset_ucs_bmp;
2395 else if (c < MIN_CHAR_DAIKANWA)
2397 *charset = Vcharset_ucs;
2401 else if (c <= MAX_CHAR_DAIKANWA)
2403 *charset = Vcharset_ideograph_daikanwa;
2404 return c - MIN_CHAR_DAIKANWA;
2407 else if (c <= MAX_CHAR_MOJIKYO)
2409 *charset = Vcharset_mojikyo;
2410 return c - MIN_CHAR_MOJIKYO;
2412 else if (c < MIN_CHAR_94)
2414 *charset = Vcharset_ucs;
2417 else if (c <= MAX_CHAR_94)
2419 *charset = CHARSET_BY_ATTRIBUTES (94, 1,
2420 ((c - MIN_CHAR_94) / 94) + '0',
2421 CHARSET_LEFT_TO_RIGHT);
2422 if (!NILP (*charset))
2423 return ((c - MIN_CHAR_94) % 94) + 33;
2426 *charset = Vcharset_ucs;
2430 else if (c <= MAX_CHAR_96)
2432 *charset = CHARSET_BY_ATTRIBUTES (96, 1,
2433 ((c - MIN_CHAR_96) / 96) + '0',
2434 CHARSET_LEFT_TO_RIGHT);
2435 if (!NILP (*charset))
2436 return ((c - MIN_CHAR_96) % 96) + 32;
2439 *charset = Vcharset_ucs;
2443 else if (c <= MAX_CHAR_94x94)
2446 = CHARSET_BY_ATTRIBUTES (94, 2,
2447 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
2448 CHARSET_LEFT_TO_RIGHT);
2449 if (!NILP (*charset))
2450 return (((((c - MIN_CHAR_94x94) / 94) % 94) + 33) << 8)
2451 | (((c - MIN_CHAR_94x94) % 94) + 33);
2454 *charset = Vcharset_ucs;
2458 else if (c <= MAX_CHAR_96x96)
2461 = CHARSET_BY_ATTRIBUTES (96, 2,
2462 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
2463 CHARSET_LEFT_TO_RIGHT);
2464 if (!NILP (*charset))
2465 return ((((c - MIN_CHAR_96x96) / 96) % 96) + 32) << 8
2466 | (((c - MIN_CHAR_96x96) % 96) + 32);
2469 *charset = Vcharset_ucs;
2475 *charset = Vcharset_ucs;
2480 Lisp_Object Vdefault_coded_charset_priority_list;
2484 /************************************************************************/
2485 /* Basic charset Lisp functions */
2486 /************************************************************************/
2488 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
2489 Return non-nil if OBJECT is a charset.
2493 return CHARSETP (object) ? Qt : Qnil;
2496 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
2497 Retrieve the charset of the given name.
2498 If CHARSET-OR-NAME is a charset object, it is simply returned.
2499 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
2500 nil is returned. Otherwise the associated charset object is returned.
2504 if (CHARSETP (charset_or_name))
2505 return charset_or_name;
2507 CHECK_SYMBOL (charset_or_name);
2508 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
2511 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
2512 Retrieve the charset of the given name.
2513 Same as `find-charset' except an error is signalled if there is no such
2514 charset instead of returning nil.
2518 Lisp_Object charset = Ffind_charset (name);
2521 signal_simple_error ("No such charset", name);
2525 /* We store the charsets in hash tables with the names as the key and the
2526 actual charset object as the value. Occasionally we need to use them
2527 in a list format. These routines provide us with that. */
2528 struct charset_list_closure
2530 Lisp_Object *charset_list;
2534 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
2535 void *charset_list_closure)
2537 /* This function can GC */
2538 struct charset_list_closure *chcl =
2539 (struct charset_list_closure*) charset_list_closure;
2540 Lisp_Object *charset_list = chcl->charset_list;
2542 *charset_list = Fcons (key /* XCHARSET_NAME (value) */, *charset_list);
2546 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
2547 Return a list of the names of all defined charsets.
2551 Lisp_Object charset_list = Qnil;
2552 struct gcpro gcpro1;
2553 struct charset_list_closure charset_list_closure;
2555 GCPRO1 (charset_list);
2556 charset_list_closure.charset_list = &charset_list;
2557 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
2558 &charset_list_closure);
2561 return charset_list;
2564 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
2565 Return the name of the given charset.
2569 return XCHARSET_NAME (Fget_charset (charset));
2572 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
2573 Define a new character set.
2574 This function is for use with Mule support.
2575 NAME is a symbol, the name by which the character set is normally referred.
2576 DOC-STRING is a string describing the character set.
2577 PROPS is a property list, describing the specific nature of the
2578 character set. Recognized properties are:
2580 'short-name Short version of the charset name (ex: Latin-1)
2581 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
2582 'registry A regular expression matching the font registry field for
2584 'dimension Number of octets used to index a character in this charset.
2585 Either 1 or 2. Defaults to 1.
2586 'columns Number of columns used to display a character in this charset.
2587 Only used in TTY mode. (Under X, the actual width of a
2588 character can be derived from the font used to display the
2589 characters.) If unspecified, defaults to the dimension
2590 (this is almost always the correct value).
2591 'chars Number of characters in each dimension (94 or 96).
2592 Defaults to 94. Note that if the dimension is 2, the
2593 character set thus described is 94x94 or 96x96.
2594 'final Final byte of ISO 2022 escape sequence. Must be
2595 supplied. Each combination of (DIMENSION, CHARS) defines a
2596 separate namespace for final bytes. Note that ISO
2597 2022 restricts the final byte to the range
2598 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
2599 dimension == 2. Note also that final bytes in the range
2600 0x30 - 0x3F are reserved for user-defined (not official)
2602 'graphic 0 (use left half of font on output) or 1 (use right half
2603 of font on output). Defaults to 0. For example, for
2604 a font whose registry is ISO8859-1, the left half
2605 (octets 0x20 - 0x7F) is the `ascii' character set, while
2606 the right half (octets 0xA0 - 0xFF) is the `latin-1'
2607 character set. With 'graphic set to 0, the octets
2608 will have their high bit cleared; with it set to 1,
2609 the octets will have their high bit set.
2610 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
2612 'ccl-program A compiled CCL program used to convert a character in
2613 this charset into an index into the font. This is in
2614 addition to the 'graphic property. The CCL program
2615 is passed the octets of the character, with the high
2616 bit cleared and set depending upon whether the value
2617 of the 'graphic property is 0 or 1.
2619 (name, doc_string, props))
2621 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
2622 int direction = CHARSET_LEFT_TO_RIGHT;
2623 Lisp_Object registry = Qnil;
2624 Lisp_Object charset;
2625 Lisp_Object rest, keyword, value;
2626 Lisp_Object ccl_program = Qnil;
2627 Lisp_Object short_name = Qnil, long_name = Qnil;
2628 int byte_offset = -1;
2630 CHECK_SYMBOL (name);
2631 if (!NILP (doc_string))
2632 CHECK_STRING (doc_string);
2634 charset = Ffind_charset (name);
2635 if (!NILP (charset))
2636 signal_simple_error ("Cannot redefine existing charset", name);
2638 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
2640 if (EQ (keyword, Qshort_name))
2642 CHECK_STRING (value);
2646 if (EQ (keyword, Qlong_name))
2648 CHECK_STRING (value);
2652 else if (EQ (keyword, Qdimension))
2655 dimension = XINT (value);
2656 if (dimension < 1 || dimension > 2)
2657 signal_simple_error ("Invalid value for 'dimension", value);
2660 else if (EQ (keyword, Qchars))
2663 chars = XINT (value);
2664 if (chars != 94 && chars != 96)
2665 signal_simple_error ("Invalid value for 'chars", value);
2668 else if (EQ (keyword, Qcolumns))
2671 columns = XINT (value);
2672 if (columns != 1 && columns != 2)
2673 signal_simple_error ("Invalid value for 'columns", value);
2676 else if (EQ (keyword, Qgraphic))
2679 graphic = XINT (value);
2681 if (graphic < 0 || graphic > 2)
2683 if (graphic < 0 || graphic > 1)
2685 signal_simple_error ("Invalid value for 'graphic", value);
2688 else if (EQ (keyword, Qregistry))
2690 CHECK_STRING (value);
2694 else if (EQ (keyword, Qdirection))
2696 if (EQ (value, Ql2r))
2697 direction = CHARSET_LEFT_TO_RIGHT;
2698 else if (EQ (value, Qr2l))
2699 direction = CHARSET_RIGHT_TO_LEFT;
2701 signal_simple_error ("Invalid value for 'direction", value);
2704 else if (EQ (keyword, Qfinal))
2706 CHECK_CHAR_COERCE_INT (value);
2707 final = XCHAR (value);
2708 if (final < '0' || final > '~')
2709 signal_simple_error ("Invalid value for 'final", value);
2712 else if (EQ (keyword, Qccl_program))
2714 CHECK_VECTOR (value);
2715 ccl_program = value;
2719 signal_simple_error ("Unrecognized property", keyword);
2723 error ("'final must be specified");
2724 if (dimension == 2 && final > 0x5F)
2726 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
2729 if (!NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2730 CHARSET_LEFT_TO_RIGHT)) ||
2731 !NILP (CHARSET_BY_ATTRIBUTES (chars, dimension, final,
2732 CHARSET_RIGHT_TO_LEFT)))
2734 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
2736 id = get_unallocated_leading_byte (dimension);
2738 if (NILP (doc_string))
2739 doc_string = build_string ("");
2741 if (NILP (registry))
2742 registry = build_string ("");
2744 if (NILP (short_name))
2745 XSETSTRING (short_name, XSYMBOL (name)->name);
2747 if (NILP (long_name))
2748 long_name = doc_string;
2751 columns = dimension;
2753 if (byte_offset < 0)
2757 else if (chars == 96)
2763 charset = make_charset (id, name, chars, dimension, columns, graphic,
2764 final, direction, short_name, long_name,
2765 doc_string, registry,
2766 Qnil, 0, 0, 0, byte_offset);
2767 if (!NILP (ccl_program))
2768 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
2772 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
2774 Make a charset equivalent to CHARSET but which goes in the opposite direction.
2775 NEW-NAME is the name of the new charset. Return the new charset.
2777 (charset, new_name))
2779 Lisp_Object new_charset = Qnil;
2780 int id, chars, dimension, columns, graphic, final;
2782 Lisp_Object registry, doc_string, short_name, long_name;
2785 charset = Fget_charset (charset);
2786 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
2787 signal_simple_error ("Charset already has reverse-direction charset",
2790 CHECK_SYMBOL (new_name);
2791 if (!NILP (Ffind_charset (new_name)))
2792 signal_simple_error ("Cannot redefine existing charset", new_name);
2794 cs = XCHARSET (charset);
2796 chars = CHARSET_CHARS (cs);
2797 dimension = CHARSET_DIMENSION (cs);
2798 columns = CHARSET_COLUMNS (cs);
2799 id = get_unallocated_leading_byte (dimension);
2801 graphic = CHARSET_GRAPHIC (cs);
2802 final = CHARSET_FINAL (cs);
2803 direction = CHARSET_RIGHT_TO_LEFT;
2804 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
2805 direction = CHARSET_LEFT_TO_RIGHT;
2806 doc_string = CHARSET_DOC_STRING (cs);
2807 short_name = CHARSET_SHORT_NAME (cs);
2808 long_name = CHARSET_LONG_NAME (cs);
2809 registry = CHARSET_REGISTRY (cs);
2811 new_charset = make_charset (id, new_name, chars, dimension, columns,
2812 graphic, final, direction, short_name, long_name,
2813 doc_string, registry,
2815 CHARSET_DECODING_TABLE(cs),
2816 CHARSET_UCS_MIN(cs),
2817 CHARSET_UCS_MAX(cs),
2818 CHARSET_CODE_OFFSET(cs),
2819 CHARSET_BYTE_OFFSET(cs)
2825 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
2826 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
2831 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
2832 Define symbol ALIAS as an alias for CHARSET.
2836 CHECK_SYMBOL (alias);
2837 charset = Fget_charset (charset);
2838 return Fputhash (alias, charset, Vcharset_hash_table);
2841 /* #### Reverse direction charsets not yet implemented. */
2843 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
2845 Return the reverse-direction charset parallel to CHARSET, if any.
2846 This is the charset with the same properties (in particular, the same
2847 dimension, number of characters per dimension, and final byte) as
2848 CHARSET but whose characters are displayed in the opposite direction.
2852 charset = Fget_charset (charset);
2853 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
2857 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
2858 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
2859 If DIRECTION is omitted, both directions will be checked (left-to-right
2860 will be returned if character sets exist for both directions).
2862 (dimension, chars, final, direction))
2864 int dm, ch, fi, di = -1;
2865 Lisp_Object obj = Qnil;
2867 CHECK_INT (dimension);
2868 dm = XINT (dimension);
2869 if (dm < 1 || dm > 2)
2870 signal_simple_error ("Invalid value for DIMENSION", dimension);
2874 if (ch != 94 && ch != 96)
2875 signal_simple_error ("Invalid value for CHARS", chars);
2877 CHECK_CHAR_COERCE_INT (final);
2879 if (fi < '0' || fi > '~')
2880 signal_simple_error ("Invalid value for FINAL", final);
2882 if (EQ (direction, Ql2r))
2883 di = CHARSET_LEFT_TO_RIGHT;
2884 else if (EQ (direction, Qr2l))
2885 di = CHARSET_RIGHT_TO_LEFT;
2886 else if (!NILP (direction))
2887 signal_simple_error ("Invalid value for DIRECTION", direction);
2889 if (dm == 2 && fi > 0x5F)
2891 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
2895 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_LEFT_TO_RIGHT);
2897 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, CHARSET_RIGHT_TO_LEFT);
2900 obj = CHARSET_BY_ATTRIBUTES (ch, dm, fi, di);
2903 return XCHARSET_NAME (obj);
2907 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
2908 Return short name of CHARSET.
2912 return XCHARSET_SHORT_NAME (Fget_charset (charset));
2915 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
2916 Return long name of CHARSET.
2920 return XCHARSET_LONG_NAME (Fget_charset (charset));
2923 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
2924 Return description of CHARSET.
2928 return XCHARSET_DOC_STRING (Fget_charset (charset));
2931 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
2932 Return dimension of CHARSET.
2936 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
2939 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
2940 Return property PROP of CHARSET.
2941 Recognized properties are those listed in `make-charset', as well as
2942 'name and 'doc-string.
2948 charset = Fget_charset (charset);
2949 cs = XCHARSET (charset);
2951 CHECK_SYMBOL (prop);
2952 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
2953 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
2954 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
2955 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
2956 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
2957 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
2958 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
2959 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
2960 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
2961 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
2962 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
2963 if (EQ (prop, Qdirection))
2964 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
2965 if (EQ (prop, Qreverse_direction_charset))
2967 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
2971 return XCHARSET_NAME (obj);
2973 signal_simple_error ("Unrecognized charset property name", prop);
2974 return Qnil; /* not reached */
2977 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
2978 Return charset identification number of CHARSET.
2982 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
2985 /* #### We need to figure out which properties we really want to
2988 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
2989 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
2991 (charset, ccl_program))
2993 charset = Fget_charset (charset);
2994 CHECK_VECTOR (ccl_program);
2995 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
3000 invalidate_charset_font_caches (Lisp_Object charset)
3002 /* Invalidate font cache entries for charset on all devices. */
3003 Lisp_Object devcons, concons, hash_table;
3004 DEVICE_LOOP_NO_BREAK (devcons, concons)
3006 struct device *d = XDEVICE (XCAR (devcons));
3007 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
3008 if (!UNBOUNDP (hash_table))
3009 Fclrhash (hash_table);
3013 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
3014 Set the 'registry property of CHARSET to REGISTRY.
3016 (charset, registry))
3018 charset = Fget_charset (charset);
3019 CHECK_STRING (registry);
3020 XCHARSET_REGISTRY (charset) = registry;
3021 invalidate_charset_font_caches (charset);
3022 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
3027 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
3028 Return mapping-table of CHARSET.
3032 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
3035 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
3036 Set mapping-table of CHARSET to TABLE.
3040 struct Lisp_Charset *cs;
3044 charset = Fget_charset (charset);
3045 cs = XCHARSET (charset);
3049 if (VECTORP (CHARSET_DECODING_TABLE(cs)))
3050 make_vector_newer (CHARSET_DECODING_TABLE(cs));
3051 CHARSET_DECODING_TABLE(cs) = Qnil;
3054 else if (VECTORP (table))
3056 int ccs_len = CHARSET_BYTE_SIZE (cs);
3057 int ret = decoding_table_check_elements (table,
3058 CHARSET_DIMENSION (cs),
3063 signal_simple_error ("Too big table", table);
3065 signal_simple_error ("Invalid element is found", table);
3067 signal_simple_error ("Something wrong", table);
3069 CHARSET_DECODING_TABLE(cs) = Qnil;
3072 signal_error (Qwrong_type_argument,
3073 list2 (build_translated_string ("vector-or-nil-p"),
3076 byte_offset = CHARSET_BYTE_OFFSET (cs);
3077 switch (CHARSET_DIMENSION (cs))
3080 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3082 Lisp_Object c = XVECTOR_DATA(table)[i];
3085 put_char_ccs_code_point (c, charset,
3086 make_int (i + byte_offset));
3090 for (i = 0; i < XVECTOR_LENGTH (table); i++)
3092 Lisp_Object v = XVECTOR_DATA(table)[i];
3098 for (j = 0; j < XVECTOR_LENGTH (v); j++)
3100 Lisp_Object c = XVECTOR_DATA(v)[j];
3103 put_char_ccs_code_point
3105 make_int ( ( (i + byte_offset) << 8 )
3111 put_char_ccs_code_point (v, charset,
3112 make_int (i + byte_offset));
3121 /************************************************************************/
3122 /* Lisp primitives for working with characters */
3123 /************************************************************************/
3126 DEFUN ("decode-char", Fdecode_char, 2, 2, 0, /*
3127 Make a character from CHARSET and code-point CODE.
3133 charset = Fget_charset (charset);
3136 if (XCHARSET_GRAPHIC (charset) == 1)
3138 return make_char (DECODE_CHAR (charset, c));
3141 DEFUN ("decode-builtin-char", Fdecode_builtin_char, 2, 2, 0, /*
3142 Make a builtin character from CHARSET and code-point CODE.
3149 charset = Fget_charset (charset);
3153 if ((final = XCHARSET_FINAL (charset)) >= '0')
3155 if (XCHARSET_DIMENSION (charset) == 1)
3157 switch (XCHARSET_CHARS (charset))
3161 make_char (MIN_CHAR_94 + (final - '0') * 94
3162 + ((c & 0x7F) - 33));
3165 make_char (MIN_CHAR_96 + (final - '0') * 96
3166 + ((c & 0x7F) - 32));
3168 return Fdecode_char (charset, code);
3173 switch (XCHARSET_CHARS (charset))
3177 make_char (MIN_CHAR_94x94
3178 + (final - '0') * 94 * 94
3179 + (((c >> 8) & 0x7F) - 33) * 94
3180 + ((c & 0x7F) - 33));
3183 make_char (MIN_CHAR_96x96
3184 + (final - '0') * 96 * 96
3185 + (((c >> 8) & 0x7F) - 32) * 96
3186 + ((c & 0x7F) - 32));
3188 return Fdecode_char (charset, code);
3192 else if (XCHARSET_UCS_MAX (charset))
3195 = (XCHARSET_DIMENSION (charset) == 1
3197 c - XCHARSET_BYTE_OFFSET (charset)
3199 ((c >> 8) - XCHARSET_BYTE_OFFSET (charset))
3200 * XCHARSET_CHARS (charset)
3201 + (c & 0xFF) - XCHARSET_BYTE_OFFSET (charset))
3202 - XCHARSET_CODE_OFFSET (charset) + XCHARSET_UCS_MIN (charset);
3203 if ((cid < XCHARSET_UCS_MIN (charset))
3204 || (XCHARSET_UCS_MAX (charset) < cid))
3205 return Fdecode_char (charset, code);
3206 return make_char (cid);
3209 return Fdecode_char (charset, code);
3213 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
3214 Make a character from CHARSET and octets ARG1 and ARG2.
3215 ARG2 is required only for characters from two-dimensional charsets.
3216 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
3217 character s with caron.
3219 (charset, arg1, arg2))
3223 int lowlim, highlim;
3225 charset = Fget_charset (charset);
3226 cs = XCHARSET (charset);
3228 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
3229 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
3231 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
3233 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
3234 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
3237 /* It is useful (and safe, according to Olivier Galibert) to strip
3238 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
3239 write (make-char 'latin-iso8859-2 CODE) where code is the actual
3240 Latin 2 code of the character. */
3248 if (a1 < lowlim || a1 > highlim)
3249 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
3251 if (CHARSET_DIMENSION (cs) == 1)
3255 ("Charset is of dimension one; second octet must be nil", arg2);
3256 return make_char (MAKE_CHAR (charset, a1, 0));
3265 a2 = XINT (arg2) & 0x7f;
3267 if (a2 < lowlim || a2 > highlim)
3268 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
3270 return make_char (MAKE_CHAR (charset, a1, a2));
3273 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
3274 Return the character set of char CH.
3278 CHECK_CHAR_COERCE_INT (ch);
3280 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
3283 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
3284 Return the octet numbered N (should be 0 or 1) of char CH.
3285 N defaults to 0 if omitted.
3289 Lisp_Object charset;
3292 CHECK_CHAR_COERCE_INT (ch);
3294 BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
3296 if (NILP (n) || EQ (n, Qzero))
3297 return make_int (octet0);
3298 else if (EQ (n, make_int (1)))
3299 return make_int (octet1);
3301 signal_simple_error ("Octet number must be 0 or 1", n);
3304 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
3305 Return list of charset and one or two position-codes of CHAR.
3309 /* This function can GC */
3310 struct gcpro gcpro1, gcpro2;
3311 Lisp_Object charset = Qnil;
3312 Lisp_Object rc = Qnil;
3320 GCPRO2 (charset, rc);
3321 CHECK_CHAR_COERCE_INT (character);
3324 code_point = ENCODE_CHAR (XCHAR (character), charset);
3325 dimension = XCHARSET_DIMENSION (charset);
3326 while (dimension > 0)
3328 rc = Fcons (make_int (code_point & 255), rc);
3332 rc = Fcons (XCHARSET_NAME (charset), rc);
3334 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
3336 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
3338 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
3342 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
3351 #ifdef ENABLE_COMPOSITE_CHARS
3352 /************************************************************************/
3353 /* composite character functions */
3354 /************************************************************************/
3357 lookup_composite_char (Bufbyte *str, int len)
3359 Lisp_Object lispstr = make_string (str, len);
3360 Lisp_Object ch = Fgethash (lispstr,
3361 Vcomposite_char_string2char_hash_table,
3367 if (composite_char_row_next >= 128)
3368 signal_simple_error ("No more composite chars available", lispstr);
3369 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
3370 composite_char_col_next);
3371 Fputhash (make_char (emch), lispstr,
3372 Vcomposite_char_char2string_hash_table);
3373 Fputhash (lispstr, make_char (emch),
3374 Vcomposite_char_string2char_hash_table);
3375 composite_char_col_next++;
3376 if (composite_char_col_next >= 128)
3378 composite_char_col_next = 32;
3379 composite_char_row_next++;
3388 composite_char_string (Emchar ch)
3390 Lisp_Object str = Fgethash (make_char (ch),
3391 Vcomposite_char_char2string_hash_table,
3393 assert (!UNBOUNDP (str));
3397 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
3398 Convert a string into a single composite character.
3399 The character is the result of overstriking all the characters in
3404 CHECK_STRING (string);
3405 return make_char (lookup_composite_char (XSTRING_DATA (string),
3406 XSTRING_LENGTH (string)));
3409 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3410 Return a string of the characters comprising a composite character.
3418 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
3419 signal_simple_error ("Must be composite char", ch);
3420 return composite_char_string (emch);
3422 #endif /* ENABLE_COMPOSITE_CHARS */
3425 /************************************************************************/
3426 /* initialization */
3427 /************************************************************************/
3430 syms_of_mule_charset (void)
3433 INIT_LRECORD_IMPLEMENTATION (uint8_byte_table);
3434 INIT_LRECORD_IMPLEMENTATION (uint16_byte_table);
3435 INIT_LRECORD_IMPLEMENTATION (byte_table);
3436 INIT_LRECORD_IMPLEMENTATION (char_id_table);
3438 INIT_LRECORD_IMPLEMENTATION (charset);
3440 DEFSUBR (Fcharsetp);
3441 DEFSUBR (Ffind_charset);
3442 DEFSUBR (Fget_charset);
3443 DEFSUBR (Fcharset_list);
3444 DEFSUBR (Fcharset_name);
3445 DEFSUBR (Fmake_charset);
3446 DEFSUBR (Fmake_reverse_direction_charset);
3447 /* DEFSUBR (Freverse_direction_charset); */
3448 DEFSUBR (Fdefine_charset_alias);
3449 DEFSUBR (Fcharset_from_attributes);
3450 DEFSUBR (Fcharset_short_name);
3451 DEFSUBR (Fcharset_long_name);
3452 DEFSUBR (Fcharset_description);
3453 DEFSUBR (Fcharset_dimension);
3454 DEFSUBR (Fcharset_property);
3455 DEFSUBR (Fcharset_id);
3456 DEFSUBR (Fset_charset_ccl_program);
3457 DEFSUBR (Fset_charset_registry);
3459 DEFSUBR (Fchar_attribute_list);
3460 DEFSUBR (Ffind_char_attribute_table);
3461 DEFSUBR (Fchar_attribute_alist);
3462 DEFSUBR (Fget_char_attribute);
3463 DEFSUBR (Fput_char_attribute);
3464 DEFSUBR (Fremove_char_attribute);
3465 DEFSUBR (Fdefine_char);
3466 DEFSUBR (Fchar_variants);
3467 DEFSUBR (Fget_composite_char);
3468 DEFSUBR (Fcharset_mapping_table);
3469 DEFSUBR (Fset_charset_mapping_table);
3473 DEFSUBR (Fdecode_char);
3474 DEFSUBR (Fdecode_builtin_char);
3476 DEFSUBR (Fmake_char);
3477 DEFSUBR (Fchar_charset);
3478 DEFSUBR (Fchar_octet);
3479 DEFSUBR (Fsplit_char);
3481 #ifdef ENABLE_COMPOSITE_CHARS
3482 DEFSUBR (Fmake_composite_char);
3483 DEFSUBR (Fcomposite_char_string);
3486 defsymbol (&Qcharsetp, "charsetp");
3487 defsymbol (&Qregistry, "registry");
3488 defsymbol (&Qfinal, "final");
3489 defsymbol (&Qgraphic, "graphic");
3490 defsymbol (&Qdirection, "direction");
3491 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
3492 defsymbol (&Qshort_name, "short-name");
3493 defsymbol (&Qlong_name, "long-name");
3495 defsymbol (&Ql2r, "l2r");
3496 defsymbol (&Qr2l, "r2l");
3498 /* Charsets, compatible with FSF 20.3
3499 Naming convention is Script-Charset[-Edition] */
3500 defsymbol (&Qascii, "ascii");
3501 defsymbol (&Qcontrol_1, "control-1");
3502 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
3503 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
3504 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
3505 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
3506 defsymbol (&Qthai_tis620, "thai-tis620");
3507 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
3508 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
3509 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
3510 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
3511 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
3512 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
3513 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
3514 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
3515 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
3516 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
3517 defsymbol (&Qjapanese_jisx0208_1990, "japanese-jisx0208-1990");
3518 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
3519 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
3520 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
3521 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
3523 defsymbol (&Q_ucs, "->ucs");
3524 defsymbol (&Q_decomposition, "->decomposition");
3525 defsymbol (&Qcompat, "compat");
3526 defsymbol (&Qisolated, "isolated");
3527 defsymbol (&Qinitial, "initial");
3528 defsymbol (&Qmedial, "medial");
3529 defsymbol (&Qfinal, "final");
3530 defsymbol (&Qvertical, "vertical");
3531 defsymbol (&QnoBreak, "noBreak");
3532 defsymbol (&Qfraction, "fraction");
3533 defsymbol (&Qsuper, "super");
3534 defsymbol (&Qsub, "sub");
3535 defsymbol (&Qcircle, "circle");
3536 defsymbol (&Qsquare, "square");
3537 defsymbol (&Qwide, "wide");
3538 defsymbol (&Qnarrow, "narrow");
3539 defsymbol (&Qsmall, "small");
3540 defsymbol (&Qfont, "font");
3541 defsymbol (&Qucs, "ucs");
3542 defsymbol (&Qucs_bmp, "ucs-bmp");
3543 defsymbol (&Qlatin_viscii, "latin-viscii");
3544 defsymbol (&Qlatin_tcvn5712, "latin-tcvn5712");
3545 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
3546 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
3547 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
3548 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
3549 defsymbol (&Qideograph_daikanwa, "ideograph-daikanwa");
3550 defsymbol (&Qmojikyo, "mojikyo");
3551 defsymbol (&Qmojikyo_2022_1, "mojikyo-2022-1");
3552 defsymbol (&Qmojikyo_pj_1, "mojikyo-pj-1");
3553 defsymbol (&Qmojikyo_pj_2, "mojikyo-pj-2");
3554 defsymbol (&Qmojikyo_pj_3, "mojikyo-pj-3");
3555 defsymbol (&Qmojikyo_pj_4, "mojikyo-pj-4");
3556 defsymbol (&Qmojikyo_pj_5, "mojikyo-pj-5");
3557 defsymbol (&Qmojikyo_pj_6, "mojikyo-pj-6");
3558 defsymbol (&Qmojikyo_pj_7, "mojikyo-pj-7");
3559 defsymbol (&Qmojikyo_pj_8, "mojikyo-pj-8");
3560 defsymbol (&Qmojikyo_pj_9, "mojikyo-pj-9");
3561 defsymbol (&Qmojikyo_pj_10, "mojikyo-pj-10");
3562 defsymbol (&Qmojikyo_pj_11, "mojikyo-pj-11");
3563 defsymbol (&Qmojikyo_pj_12, "mojikyo-pj-12");
3564 defsymbol (&Qmojikyo_pj_13, "mojikyo-pj-13");
3565 defsymbol (&Qmojikyo_pj_14, "mojikyo-pj-14");
3566 defsymbol (&Qmojikyo_pj_15, "mojikyo-pj-15");
3567 defsymbol (&Qmojikyo_pj_16, "mojikyo-pj-16");
3568 defsymbol (&Qmojikyo_pj_17, "mojikyo-pj-17");
3569 defsymbol (&Qmojikyo_pj_18, "mojikyo-pj-18");
3570 defsymbol (&Qmojikyo_pj_19, "mojikyo-pj-19");
3571 defsymbol (&Qmojikyo_pj_20, "mojikyo-pj-20");
3572 defsymbol (&Qmojikyo_pj_21, "mojikyo-pj-21");
3573 defsymbol (&Qethiopic_ucs, "ethiopic-ucs");
3575 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
3576 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
3578 defsymbol (&Qcomposite, "composite");
3582 vars_of_mule_charset (void)
3589 chlook = xnew (struct charset_lookup);
3590 dumpstruct (&chlook, &charset_lookup_description);
3592 /* Table of charsets indexed by leading byte. */
3593 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
3594 chlook->charset_by_leading_byte[i] = Qnil;
3597 /* Table of charsets indexed by type/final-byte. */
3598 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3599 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3600 chlook->charset_by_attributes[i][j] = Qnil;
3602 /* Table of charsets indexed by type/final-byte/direction. */
3603 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
3604 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
3605 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
3606 chlook->charset_by_attributes[i][j][k] = Qnil;
3610 chlook->next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
3612 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
3613 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
3617 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3618 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
3619 Leading-code of private TYPE9N charset of column-width 1.
3621 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
3625 Vutf_2000_version = build_string("0.17 (Hōryūji)");
3626 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
3627 Version number of UTF-2000.
3630 staticpro (&Vcharacter_composition_table);
3631 Vcharacter_composition_table = make_char_id_table (Qnil);
3633 staticpro (&Vcharacter_variant_table);
3634 Vcharacter_variant_table = make_char_id_table (Qnil);
3636 Vdefault_coded_charset_priority_list = Qnil;
3637 DEFVAR_LISP ("default-coded-charset-priority-list",
3638 &Vdefault_coded_charset_priority_list /*
3639 Default order of preferred coded-character-sets.
3645 complex_vars_of_mule_charset (void)
3647 staticpro (&Vcharset_hash_table);
3648 Vcharset_hash_table =
3649 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3651 /* Predefined character sets. We store them into variables for
3655 staticpro (&Vchar_attribute_hash_table);
3656 Vchar_attribute_hash_table
3657 = make_lisp_hash_table (16, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
3659 staticpro (&Vcharset_ucs);
3661 make_charset (LEADING_BYTE_UCS, Qucs, 256, 4,
3662 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3663 build_string ("UCS"),
3664 build_string ("UCS"),
3665 build_string ("ISO/IEC 10646"),
3667 Qnil, 0, 0xFFFFFFF, 0, 0);
3668 staticpro (&Vcharset_ucs_bmp);
3670 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp, 256, 2,
3671 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3672 build_string ("BMP"),
3673 build_string ("BMP"),
3674 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
3675 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
3676 Qnil, 0, 0xFFFF, 0, 0);
3678 # define MIN_CHAR_THAI 0
3679 # define MAX_CHAR_THAI 0
3680 # define MIN_CHAR_HEBREW 0
3681 # define MAX_CHAR_HEBREW 0
3682 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
3683 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
3685 staticpro (&Vcharset_ascii);
3687 make_charset (LEADING_BYTE_ASCII, Qascii, 94, 1,
3688 1, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3689 build_string ("ASCII"),
3690 build_string ("ASCII)"),
3691 build_string ("ASCII (ISO646 IRV)"),
3692 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
3693 Qnil, 0, 0x7F, 0, 0);
3694 staticpro (&Vcharset_control_1);
3695 Vcharset_control_1 =
3696 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 94, 1,
3697 1, 1, 0, CHARSET_LEFT_TO_RIGHT,
3698 build_string ("C1"),
3699 build_string ("Control characters"),
3700 build_string ("Control characters 128-191"),
3702 Qnil, 0x80, 0x9F, 0, 0);
3703 staticpro (&Vcharset_latin_iso8859_1);
3704 Vcharset_latin_iso8859_1 =
3705 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 96, 1,
3706 1, 1, 'A', CHARSET_LEFT_TO_RIGHT,
3707 build_string ("Latin-1"),
3708 build_string ("ISO8859-1 (Latin-1)"),
3709 build_string ("ISO8859-1 (Latin-1)"),
3710 build_string ("iso8859-1"),
3711 Qnil, 0xA0, 0xFF, 0, 32);
3712 staticpro (&Vcharset_latin_iso8859_2);
3713 Vcharset_latin_iso8859_2 =
3714 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 96, 1,
3715 1, 1, 'B', CHARSET_LEFT_TO_RIGHT,
3716 build_string ("Latin-2"),
3717 build_string ("ISO8859-2 (Latin-2)"),
3718 build_string ("ISO8859-2 (Latin-2)"),
3719 build_string ("iso8859-2"),
3721 staticpro (&Vcharset_latin_iso8859_3);
3722 Vcharset_latin_iso8859_3 =
3723 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 96, 1,
3724 1, 1, 'C', CHARSET_LEFT_TO_RIGHT,
3725 build_string ("Latin-3"),
3726 build_string ("ISO8859-3 (Latin-3)"),
3727 build_string ("ISO8859-3 (Latin-3)"),
3728 build_string ("iso8859-3"),
3730 staticpro (&Vcharset_latin_iso8859_4);
3731 Vcharset_latin_iso8859_4 =
3732 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 96, 1,
3733 1, 1, 'D', CHARSET_LEFT_TO_RIGHT,
3734 build_string ("Latin-4"),
3735 build_string ("ISO8859-4 (Latin-4)"),
3736 build_string ("ISO8859-4 (Latin-4)"),
3737 build_string ("iso8859-4"),
3739 staticpro (&Vcharset_thai_tis620);
3740 Vcharset_thai_tis620 =
3741 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 96, 1,
3742 1, 1, 'T', CHARSET_LEFT_TO_RIGHT,
3743 build_string ("TIS620"),
3744 build_string ("TIS620 (Thai)"),
3745 build_string ("TIS620.2529 (Thai)"),
3746 build_string ("tis620"),
3747 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
3748 staticpro (&Vcharset_greek_iso8859_7);
3749 Vcharset_greek_iso8859_7 =
3750 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 96, 1,
3751 1, 1, 'F', CHARSET_LEFT_TO_RIGHT,
3752 build_string ("ISO8859-7"),
3753 build_string ("ISO8859-7 (Greek)"),
3754 build_string ("ISO8859-7 (Greek)"),
3755 build_string ("iso8859-7"),
3757 0 /* MIN_CHAR_GREEK */,
3758 0 /* MAX_CHAR_GREEK */, 0, 32);
3759 staticpro (&Vcharset_arabic_iso8859_6);
3760 Vcharset_arabic_iso8859_6 =
3761 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 96, 1,
3762 1, 1, 'G', CHARSET_RIGHT_TO_LEFT,
3763 build_string ("ISO8859-6"),
3764 build_string ("ISO8859-6 (Arabic)"),
3765 build_string ("ISO8859-6 (Arabic)"),
3766 build_string ("iso8859-6"),
3768 staticpro (&Vcharset_hebrew_iso8859_8);
3769 Vcharset_hebrew_iso8859_8 =
3770 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 96, 1,
3771 1, 1, 'H', CHARSET_RIGHT_TO_LEFT,
3772 build_string ("ISO8859-8"),
3773 build_string ("ISO8859-8 (Hebrew)"),
3774 build_string ("ISO8859-8 (Hebrew)"),
3775 build_string ("iso8859-8"),
3776 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
3777 staticpro (&Vcharset_katakana_jisx0201);
3778 Vcharset_katakana_jisx0201 =
3779 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 94, 1,
3780 1, 1, 'I', CHARSET_LEFT_TO_RIGHT,
3781 build_string ("JISX0201 Kana"),
3782 build_string ("JISX0201.1976 (Japanese Kana)"),
3783 build_string ("JISX0201.1976 Japanese Kana"),
3784 build_string ("jisx0201\\.1976"),
3786 staticpro (&Vcharset_latin_jisx0201);
3787 Vcharset_latin_jisx0201 =
3788 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 94, 1,
3789 1, 0, 'J', CHARSET_LEFT_TO_RIGHT,
3790 build_string ("JISX0201 Roman"),
3791 build_string ("JISX0201.1976 (Japanese Roman)"),
3792 build_string ("JISX0201.1976 Japanese Roman"),
3793 build_string ("jisx0201\\.1976"),
3795 staticpro (&Vcharset_cyrillic_iso8859_5);
3796 Vcharset_cyrillic_iso8859_5 =
3797 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 96, 1,
3798 1, 1, 'L', CHARSET_LEFT_TO_RIGHT,
3799 build_string ("ISO8859-5"),
3800 build_string ("ISO8859-5 (Cyrillic)"),
3801 build_string ("ISO8859-5 (Cyrillic)"),
3802 build_string ("iso8859-5"),
3804 0 /* MIN_CHAR_CYRILLIC */,
3805 0 /* MAX_CHAR_CYRILLIC */, 0, 32);
3806 staticpro (&Vcharset_latin_iso8859_9);
3807 Vcharset_latin_iso8859_9 =
3808 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 96, 1,
3809 1, 1, 'M', CHARSET_LEFT_TO_RIGHT,
3810 build_string ("Latin-5"),
3811 build_string ("ISO8859-9 (Latin-5)"),
3812 build_string ("ISO8859-9 (Latin-5)"),
3813 build_string ("iso8859-9"),
3815 staticpro (&Vcharset_japanese_jisx0208_1978);
3816 Vcharset_japanese_jisx0208_1978 =
3817 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978,
3818 Qjapanese_jisx0208_1978, 94, 2,
3819 2, 0, '@', CHARSET_LEFT_TO_RIGHT,
3820 build_string ("JIS X0208:1978"),
3821 build_string ("JIS X0208:1978 (Japanese)"),
3823 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
3824 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
3826 staticpro (&Vcharset_chinese_gb2312);
3827 Vcharset_chinese_gb2312 =
3828 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 94, 2,
3829 2, 0, 'A', CHARSET_LEFT_TO_RIGHT,
3830 build_string ("GB2312"),
3831 build_string ("GB2312)"),
3832 build_string ("GB2312 Chinese simplified"),
3833 build_string ("gb2312"),
3835 staticpro (&Vcharset_japanese_jisx0208);
3836 Vcharset_japanese_jisx0208 =
3837 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 94, 2,
3838 2, 0, 'B', CHARSET_LEFT_TO_RIGHT,
3839 build_string ("JISX0208"),
3840 build_string ("JIS X0208:1983 (Japanese)"),
3841 build_string ("JIS X0208:1983 Japanese Kanji"),
3842 build_string ("jisx0208\\.1983"),
3845 staticpro (&Vcharset_japanese_jisx0208_1990);
3846 Vcharset_japanese_jisx0208_1990 =
3847 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1990,
3848 Qjapanese_jisx0208_1990, 94, 2,
3849 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
3850 build_string ("JISX0208-1990"),
3851 build_string ("JIS X0208:1990 (Japanese)"),
3852 build_string ("JIS X0208:1990 Japanese Kanji"),
3853 build_string ("jisx0208\\.1990"),
3855 MIN_CHAR_JIS_X0208_1990,
3856 MAX_CHAR_JIS_X0208_1990, 0, 33);
3858 staticpro (&Vcharset_korean_ksc5601);
3859 Vcharset_korean_ksc5601 =
3860 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 94, 2,
3861 2, 0, 'C', CHARSET_LEFT_TO_RIGHT,
3862 build_string ("KSC5601"),
3863 build_string ("KSC5601 (Korean"),
3864 build_string ("KSC5601 Korean Hangul and Hanja"),
3865 build_string ("ksc5601"),
3867 staticpro (&Vcharset_japanese_jisx0212);
3868 Vcharset_japanese_jisx0212 =
3869 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 94, 2,
3870 2, 0, 'D', CHARSET_LEFT_TO_RIGHT,
3871 build_string ("JISX0212"),
3872 build_string ("JISX0212 (Japanese)"),
3873 build_string ("JISX0212 Japanese Supplement"),
3874 build_string ("jisx0212"),
3877 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
3878 staticpro (&Vcharset_chinese_cns11643_1);
3879 Vcharset_chinese_cns11643_1 =
3880 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 94, 2,
3881 2, 0, 'G', CHARSET_LEFT_TO_RIGHT,
3882 build_string ("CNS11643-1"),
3883 build_string ("CNS11643-1 (Chinese traditional)"),
3885 ("CNS 11643 Plane 1 Chinese traditional"),
3886 build_string (CHINESE_CNS_PLANE_RE("1")),
3888 staticpro (&Vcharset_chinese_cns11643_2);
3889 Vcharset_chinese_cns11643_2 =
3890 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 94, 2,
3891 2, 0, 'H', CHARSET_LEFT_TO_RIGHT,
3892 build_string ("CNS11643-2"),
3893 build_string ("CNS11643-2 (Chinese traditional)"),
3895 ("CNS 11643 Plane 2 Chinese traditional"),
3896 build_string (CHINESE_CNS_PLANE_RE("2")),
3899 staticpro (&Vcharset_latin_tcvn5712);
3900 Vcharset_latin_tcvn5712 =
3901 make_charset (LEADING_BYTE_LATIN_TCVN5712, Qlatin_tcvn5712, 96, 1,
3902 1, 1, 'Z', CHARSET_LEFT_TO_RIGHT,
3903 build_string ("TCVN 5712"),
3904 build_string ("TCVN 5712 (VSCII-2)"),
3905 build_string ("Vietnamese TCVN 5712:1983 (VSCII-2)"),
3906 build_string ("tcvn5712-1"),
3908 staticpro (&Vcharset_latin_viscii_lower);
3909 Vcharset_latin_viscii_lower =
3910 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower, 96, 1,
3911 1, 1, '1', CHARSET_LEFT_TO_RIGHT,
3912 build_string ("VISCII lower"),
3913 build_string ("VISCII lower (Vietnamese)"),
3914 build_string ("VISCII lower (Vietnamese)"),
3915 build_string ("MULEVISCII-LOWER"),
3917 staticpro (&Vcharset_latin_viscii_upper);
3918 Vcharset_latin_viscii_upper =
3919 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper, 96, 1,
3920 1, 1, '2', CHARSET_LEFT_TO_RIGHT,
3921 build_string ("VISCII upper"),
3922 build_string ("VISCII upper (Vietnamese)"),
3923 build_string ("VISCII upper (Vietnamese)"),
3924 build_string ("MULEVISCII-UPPER"),
3926 staticpro (&Vcharset_latin_viscii);
3927 Vcharset_latin_viscii =
3928 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii, 256, 1,
3929 1, 2, 0, CHARSET_LEFT_TO_RIGHT,
3930 build_string ("VISCII"),
3931 build_string ("VISCII 1.1 (Vietnamese)"),
3932 build_string ("VISCII 1.1 (Vietnamese)"),
3933 build_string ("VISCII1\\.1"),
3935 staticpro (&Vcharset_ideograph_daikanwa);
3936 Vcharset_ideograph_daikanwa =
3937 make_charset (LEADING_BYTE_DAIKANWA, Qideograph_daikanwa, 256, 2,
3938 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3939 build_string ("Daikanwa"),
3940 build_string ("Morohashi's Daikanwa"),
3941 build_string ("Daikanwa dictionary by MOROHASHI Tetsuji"),
3942 build_string ("Daikanwa"),
3943 Qnil, MIN_CHAR_DAIKANWA, MAX_CHAR_DAIKANWA, 0, 0);
3944 staticpro (&Vcharset_mojikyo);
3946 make_charset (LEADING_BYTE_MOJIKYO, Qmojikyo, 256, 3,
3947 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
3948 build_string ("Mojikyo"),
3949 build_string ("Mojikyo"),
3950 build_string ("Konjaku-Mojikyo"),
3952 Qnil, MIN_CHAR_MOJIKYO, MAX_CHAR_MOJIKYO, 0, 0);
3953 staticpro (&Vcharset_mojikyo_2022_1);
3954 Vcharset_mojikyo_2022_1 =
3955 make_charset (LEADING_BYTE_MOJIKYO_2022_1, Qmojikyo_2022_1, 94, 3,
3956 2, 2, ':', CHARSET_LEFT_TO_RIGHT,
3957 build_string ("Mojikyo-2022-1"),
3958 build_string ("Mojikyo ISO-2022 Part 1"),
3959 build_string ("Konjaku-Mojikyo for ISO/IEC 2022 Part 1"),
3963 #define DEF_MOJIKYO_PJ(n) \
3964 staticpro (&Vcharset_mojikyo_pj_##n); \
3965 Vcharset_mojikyo_pj_##n = \
3966 make_charset (LEADING_BYTE_MOJIKYO_PJ_##n, Qmojikyo_pj_##n, 94, 2, \
3967 2, 0, 0, CHARSET_LEFT_TO_RIGHT, \
3968 build_string ("Mojikyo-PJ-"#n), \
3969 build_string ("Mojikyo (pseudo JIS encoding) part "#n), \
3971 ("Konjaku-Mojikyo (pseudo JIS encoding) part "#n), \
3973 ("\\(MojikyoPJ-"#n "\\|jisx0208\\.Mojikyo-"#n "\\)$"), \
3985 DEF_MOJIKYO_PJ (10);
3986 DEF_MOJIKYO_PJ (11);
3987 DEF_MOJIKYO_PJ (12);
3988 DEF_MOJIKYO_PJ (13);
3989 DEF_MOJIKYO_PJ (14);
3990 DEF_MOJIKYO_PJ (15);
3991 DEF_MOJIKYO_PJ (16);
3992 DEF_MOJIKYO_PJ (17);
3993 DEF_MOJIKYO_PJ (18);
3994 DEF_MOJIKYO_PJ (19);
3995 DEF_MOJIKYO_PJ (20);
3996 DEF_MOJIKYO_PJ (21);
3998 staticpro (&Vcharset_ethiopic_ucs);
3999 Vcharset_ethiopic_ucs =
4000 make_charset (LEADING_BYTE_ETHIOPIC_UCS, Qethiopic_ucs, 256, 2,
4001 2, 2, 0, CHARSET_LEFT_TO_RIGHT,
4002 build_string ("Ethiopic (UCS)"),
4003 build_string ("Ethiopic (UCS)"),
4004 build_string ("Ethiopic of UCS"),
4005 build_string ("Ethiopic-Unicode"),
4006 Qnil, 0x1200, 0x137F, 0x1200, 0);
4008 staticpro (&Vcharset_chinese_big5_1);
4009 Vcharset_chinese_big5_1 =
4010 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 94, 2,
4011 2, 0, '0', CHARSET_LEFT_TO_RIGHT,
4012 build_string ("Big5"),
4013 build_string ("Big5 (Level-1)"),
4015 ("Big5 Level-1 Chinese traditional"),
4016 build_string ("big5"),
4018 staticpro (&Vcharset_chinese_big5_2);
4019 Vcharset_chinese_big5_2 =
4020 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 94, 2,
4021 2, 0, '1', CHARSET_LEFT_TO_RIGHT,
4022 build_string ("Big5"),
4023 build_string ("Big5 (Level-2)"),
4025 ("Big5 Level-2 Chinese traditional"),
4026 build_string ("big5"),
4029 #ifdef ENABLE_COMPOSITE_CHARS
4030 /* #### For simplicity, we put composite chars into a 96x96 charset.
4031 This is going to lead to problems because you can run out of
4032 room, esp. as we don't yet recycle numbers. */
4033 staticpro (&Vcharset_composite);
4034 Vcharset_composite =
4035 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 96, 2,
4036 2, 0, 0, CHARSET_LEFT_TO_RIGHT,
4037 build_string ("Composite"),
4038 build_string ("Composite characters"),
4039 build_string ("Composite characters"),
4042 /* #### not dumped properly */
4043 composite_char_row_next = 32;
4044 composite_char_col_next = 32;
4046 Vcomposite_char_string2char_hash_table =
4047 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
4048 Vcomposite_char_char2string_hash_table =
4049 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
4050 staticpro (&Vcomposite_char_string2char_hash_table);
4051 staticpro (&Vcomposite_char_char2string_hash_table);
4052 #endif /* ENABLE_COMPOSITE_CHARS */