1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_hiragana_jisx0208;
66 Lisp_Object Vcharset_katakana_jisx0208;
68 Lisp_Object Vcharset_chinese_big5_1;
69 Lisp_Object Vcharset_chinese_big5_2;
71 #ifdef ENABLE_COMPOSITE_CHARS
72 Lisp_Object Vcharset_composite;
74 /* Hash tables for composite chars. One maps string representing
75 composed chars to their equivalent chars; one goes the
77 Lisp_Object Vcomposite_char_char2string_hash_table;
78 Lisp_Object Vcomposite_char_string2char_hash_table;
80 static int composite_char_row_next;
81 static int composite_char_col_next;
83 #endif /* ENABLE_COMPOSITE_CHARS */
85 /* Table of charsets indexed by leading byte. */
86 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
88 /* Table of charsets indexed by type/final-byte/direction. */
90 Lisp_Object charset_by_attributes[4][128];
92 Lisp_Object charset_by_attributes[4][128][2];
96 /* Table of number of bytes in the string representation of a character
97 indexed by the first byte of that representation.
99 rep_bytes_by_first_byte(c) is more efficient than the equivalent
100 canonical computation:
102 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
104 Bytecount rep_bytes_by_first_byte[0xA0] =
105 { /* 0x00 - 0x7f are for straight ASCII */
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 /* 0x80 - 0x8f are for Dimension-1 official charsets */
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 /* 0x90 - 0x9d are for Dimension-2 official charsets */
121 /* 0x9e is for Dimension-1 private charsets */
122 /* 0x9f is for Dimension-2 private charsets */
123 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
130 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
132 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
135 for (i = 0; i < 256; i++)
137 markobj (cte->property[i]);
143 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
145 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
146 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
149 for (i = 0; i < 256; i++)
150 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
152 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
154 if (!char_byte_table_equal (cte1->property[i],
155 cte2->property[i], depth + 1))
162 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
168 char_byte_table_hash (Lisp_Object obj, int depth)
170 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
172 return internal_array_hash (cte->property, 256, depth);
175 static const struct lrecord_description char_byte_table_description[] = {
176 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
180 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
181 mark_char_byte_table,
182 internal_object_printer,
183 0, char_byte_table_equal,
184 char_byte_table_hash,
185 char_byte_table_description,
186 struct Lisp_Char_Byte_Table);
189 make_char_byte_table (Lisp_Object initval)
193 struct Lisp_Char_Byte_Table *cte =
194 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
195 &lrecord_char_byte_table);
197 for (i = 0; i < 256; i++)
198 cte->property[i] = initval;
200 XSETCHAR_BYTE_TABLE (obj, cte);
205 copy_char_byte_table (Lisp_Object entry)
207 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
210 struct Lisp_Char_Byte_Table *ctenew =
211 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
212 &lrecord_char_byte_table);
214 for (i = 0; i < 256; i++)
216 Lisp_Object new = cte->property[i];
217 if (CHAR_BYTE_TABLE_P (new))
218 ctenew->property[i] = copy_char_byte_table (new);
220 ctenew->property[i] = new;
223 XSETCHAR_BYTE_TABLE (obj, ctenew);
229 mark_char_code_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
231 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
237 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
239 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
240 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
242 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
246 char_code_table_hash (Lisp_Object obj, int depth)
248 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
250 return char_code_table_hash (cte->table, depth + 1);
253 static const struct lrecord_description char_code_table_description[] = {
254 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
258 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
259 mark_char_code_table,
260 internal_object_printer,
261 0, char_code_table_equal,
262 char_code_table_hash,
263 char_code_table_description,
264 struct Lisp_Char_Code_Table);
267 make_char_code_table (Lisp_Object initval)
270 struct Lisp_Char_Code_Table *cte =
271 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
272 &lrecord_char_code_table);
274 cte->table = make_char_byte_table (initval);
276 XSETCHAR_CODE_TABLE (obj, cte);
281 copy_char_code_table (Lisp_Object entry)
283 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
285 struct Lisp_Char_Code_Table *ctenew =
286 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
287 &lrecord_char_code_table);
289 ctenew->table = copy_char_byte_table (cte->table);
290 XSETCHAR_CODE_TABLE (obj, ctenew);
296 get_char_code_table (Emchar ch, Lisp_Object table)
298 struct Lisp_Char_Byte_Table* cpt
299 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
300 Lisp_Object ret = cpt->property [ch >> 24];
302 if (CHAR_BYTE_TABLE_P (ret))
303 cpt = XCHAR_BYTE_TABLE (ret);
307 ret = cpt->property [(unsigned char) (ch >> 16)];
308 if (CHAR_BYTE_TABLE_P (ret))
309 cpt = XCHAR_BYTE_TABLE (ret);
313 ret = cpt->property [(unsigned char) (ch >> 8)];
314 if (CHAR_BYTE_TABLE_P (ret))
315 cpt = XCHAR_BYTE_TABLE (ret);
319 return cpt->property [(unsigned char) ch];
323 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
325 struct Lisp_Char_Byte_Table* cpt1
326 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
327 Lisp_Object ret = cpt1->property[ch >> 24];
329 if (CHAR_BYTE_TABLE_P (ret))
331 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
333 ret = cpt2->property[(unsigned char)(ch >> 16)];
334 if (CHAR_BYTE_TABLE_P (ret))
336 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
338 ret = cpt3->property[(unsigned char)(ch >> 8)];
339 if (CHAR_BYTE_TABLE_P (ret))
341 struct Lisp_Char_Byte_Table* cpt4
342 = XCHAR_BYTE_TABLE (ret);
344 cpt4->property[(unsigned char)ch] = value;
346 else if (!EQ (ret, value))
348 Lisp_Object cpt4 = make_char_byte_table (ret);
350 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
351 cpt3->property[(unsigned char)(ch >> 8)] = cpt4;
354 else if (!EQ (ret, value))
356 Lisp_Object cpt3 = make_char_byte_table (ret);
357 Lisp_Object cpt4 = make_char_byte_table (ret);
359 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
360 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)]
362 cpt2->property[(unsigned char)(ch >> 16)] = cpt3;
365 else if (!EQ (ret, value))
367 Lisp_Object cpt2 = make_char_byte_table (ret);
368 Lisp_Object cpt3 = make_char_byte_table (ret);
369 Lisp_Object cpt4 = make_char_byte_table (ret);
371 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
372 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] = cpt4;
373 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(ch >> 16)] = cpt3;
374 cpt1->property[(unsigned char)(ch >> 24)] = cpt2;
379 Lisp_Object Vcharacter_attribute_table;
381 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
382 Return the alist of attributes of CHARACTER.
386 return get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
389 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
390 Return the value of CHARACTER's ATTRIBUTE.
392 (character, attribute))
395 = get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
401 if (!NILP (ccs = Ffind_charset (attribute)))
404 return Fcdr (Fassq (attribute, ret));
408 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
411 Emchar char_code = XCHAR (character);
413 = get_char_code_table (char_code, Vcharacter_attribute_table);
416 cell = Fassq (attribute, ret);
420 ret = Fcons (Fcons (attribute, value), ret);
422 else if (!EQ (Fcdr (cell), value))
424 Fsetcdr (cell, value);
426 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
430 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
431 Store CHARACTER's ATTRIBUTE with VALUE.
433 (character, attribute, value))
437 ccs = Ffind_charset (attribute);
441 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
446 /* ad-hoc method for `ascii' */
447 if ((XCHARSET_CHARS (ccs) == 94) &&
448 (XCHARSET_BYTE_OFFSET (ccs) != 33))
449 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
451 ccs_len = XCHARSET_CHARS (ccs);
454 signal_simple_error ("Invalid value for coded-charset",
458 rest = Fget_char_attribute (character, attribute);
465 Lisp_Object ei = Fcar (rest);
467 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
468 nv = XVECTOR_DATA(v)[i];
475 XVECTOR_DATA(v)[i] = Qnil;
476 v = XCHARSET_DECODING_TABLE (ccs);
481 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
488 Lisp_Object ei = Fcar (rest);
491 signal_simple_error ("Invalid value for coded-charset",
493 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
494 nv = XVECTOR_DATA(v)[i];
500 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
507 XVECTOR_DATA(v)[i] = character;
509 return put_char_attribute (character, attribute, value);
514 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
515 Store character's ATTRIBUTES.
519 Lisp_Object rest = attributes;
520 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
521 Lisp_Object character;
527 Lisp_Object cell = Fcar (rest);
531 signal_simple_error ("Invalid argument", attributes);
532 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
533 && XCHARSET_FINAL (ccs))
537 if (XCHARSET_DIMENSION (ccs) == 1)
539 Lisp_Object eb1 = Fcar (Fcdr (cell));
543 signal_simple_error ("Invalid argument", attributes);
545 switch (XCHARSET_CHARS (ccs))
549 + (XCHARSET_FINAL (ccs) - '0') * 94 + (b1 - 33);
553 + (XCHARSET_FINAL (ccs) - '0') * 96 + (b1 - 32);
559 else if (XCHARSET_DIMENSION (ccs) == 2)
561 Lisp_Object eb1 = Fcar (Fcdr (cell));
562 Lisp_Object eb2 = Fcar (Fcdr (Fcdr (cell)));
566 signal_simple_error ("Invalid argument", attributes);
569 signal_simple_error ("Invalid argument", attributes);
571 switch (XCHARSET_CHARS (ccs))
574 code = MIN_CHAR_94x94
575 + (XCHARSET_FINAL (ccs) - '0') * 94 * 94
576 + (b1 - 33) * 94 + (b2 - 33);
579 code = MIN_CHAR_96x96
580 + (XCHARSET_FINAL (ccs) - '0') * 96 * 96
581 + (b1 - 32) * 96 + (b2 - 32);
592 character = make_char (code);
593 goto setup_attributes;
599 else if (!INTP (code))
600 signal_simple_error ("Invalid argument", attributes);
602 character = make_char (XINT (code));
608 Lisp_Object cell = Fcar (rest);
611 signal_simple_error ("Invalid argument", attributes);
612 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
616 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
619 Lisp_Object Vutf_2000_version;
623 int leading_code_private_11;
626 Lisp_Object Qcharsetp;
628 /* Qdoc_string, Qdimension, Qchars defined in general.c */
629 Lisp_Object Qregistry, Qfinal, Qgraphic;
630 Lisp_Object Qdirection;
631 Lisp_Object Qreverse_direction_charset;
632 Lisp_Object Qleading_byte;
633 Lisp_Object Qshort_name, Qlong_name;
649 Qjapanese_jisx0208_1978,
661 Qvietnamese_viscii_lower,
662 Qvietnamese_viscii_upper,
670 Lisp_Object Ql2r, Qr2l;
672 Lisp_Object Vcharset_hash_table;
675 static Charset_ID next_allocated_leading_byte;
677 static Charset_ID next_allocated_1_byte_leading_byte;
678 static Charset_ID next_allocated_2_byte_leading_byte;
681 /* Composite characters are characters constructed by overstriking two
682 or more regular characters.
684 1) The old Mule implementation involves storing composite characters
685 in a buffer as a tag followed by all of the actual characters
686 used to make up the composite character. I think this is a bad
687 idea; it greatly complicates code that wants to handle strings
688 one character at a time because it has to deal with the possibility
689 of great big ungainly characters. It's much more reasonable to
690 simply store an index into a table of composite characters.
692 2) The current implementation only allows for 16,384 separate
693 composite characters over the lifetime of the XEmacs process.
694 This could become a potential problem if the user
695 edited lots of different files that use composite characters.
696 Due to FSF bogosity, increasing the number of allowable
697 composite characters under Mule would decrease the number
698 of possible faces that can exist. Mule already has shrunk
699 this to 2048, and further shrinkage would become uncomfortable.
700 No such problems exist in XEmacs.
702 Composite characters could be represented as 0x80 C1 C2 C3,
703 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
704 for slightly under 2^20 (one million) composite characters
705 over the XEmacs process lifetime, and you only need to
706 increase the size of a Mule character from 19 to 21 bits.
707 Or you could use 0x80 C1 C2 C3 C4, allowing for about
708 85 million (slightly over 2^26) composite characters. */
711 /************************************************************************/
712 /* Basic Emchar functions */
713 /************************************************************************/
715 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
716 string in STR. Returns the number of bytes stored.
717 Do not call this directly. Use the macro set_charptr_emchar() instead.
721 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
736 else if ( c <= 0x7ff )
738 *p++ = (c >> 6) | 0xc0;
739 *p++ = (c & 0x3f) | 0x80;
741 else if ( c <= 0xffff )
743 *p++ = (c >> 12) | 0xe0;
744 *p++ = ((c >> 6) & 0x3f) | 0x80;
745 *p++ = (c & 0x3f) | 0x80;
747 else if ( c <= 0x1fffff )
749 *p++ = (c >> 18) | 0xf0;
750 *p++ = ((c >> 12) & 0x3f) | 0x80;
751 *p++ = ((c >> 6) & 0x3f) | 0x80;
752 *p++ = (c & 0x3f) | 0x80;
754 else if ( c <= 0x3ffffff )
756 *p++ = (c >> 24) | 0xf8;
757 *p++ = ((c >> 18) & 0x3f) | 0x80;
758 *p++ = ((c >> 12) & 0x3f) | 0x80;
759 *p++ = ((c >> 6) & 0x3f) | 0x80;
760 *p++ = (c & 0x3f) | 0x80;
764 *p++ = (c >> 30) | 0xfc;
765 *p++ = ((c >> 24) & 0x3f) | 0x80;
766 *p++ = ((c >> 18) & 0x3f) | 0x80;
767 *p++ = ((c >> 12) & 0x3f) | 0x80;
768 *p++ = ((c >> 6) & 0x3f) | 0x80;
769 *p++ = (c & 0x3f) | 0x80;
772 BREAKUP_CHAR (c, charset, c1, c2);
773 lb = CHAR_LEADING_BYTE (c);
774 if (LEADING_BYTE_PRIVATE_P (lb))
775 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
777 if (EQ (charset, Vcharset_control_1))
786 /* Return the first character from a Mule-encoded string in STR,
787 assuming it's non-ASCII. Do not call this directly.
788 Use the macro charptr_emchar() instead. */
791 non_ascii_charptr_emchar (CONST Bufbyte *str)
804 else if ( b >= 0xf8 )
809 else if ( b >= 0xf0 )
814 else if ( b >= 0xe0 )
819 else if ( b >= 0xc0 )
829 for( ; len > 0; len-- )
832 ch = ( ch << 6 ) | ( b & 0x3f );
836 Bufbyte i0 = *str, i1, i2 = 0;
839 if (i0 == LEADING_BYTE_CONTROL_1)
840 return (Emchar) (*++str - 0x20);
842 if (LEADING_BYTE_PREFIX_P (i0))
847 charset = CHARSET_BY_LEADING_BYTE (i0);
848 if (XCHARSET_DIMENSION (charset) == 2)
851 return MAKE_CHAR (charset, i1, i2);
855 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
856 Do not call this directly. Use the macro valid_char_p() instead. */
860 non_ascii_valid_char_p (Emchar ch)
864 /* Must have only lowest 19 bits set */
868 f1 = CHAR_FIELD1 (ch);
869 f2 = CHAR_FIELD2 (ch);
870 f3 = CHAR_FIELD3 (ch);
876 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
877 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
878 f2 > MAX_CHAR_FIELD2_PRIVATE)
883 if (f3 != 0x20 && f3 != 0x7F)
887 NOTE: This takes advantage of the fact that
888 FIELD2_TO_OFFICIAL_LEADING_BYTE and
889 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
891 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
892 return (XCHARSET_CHARS (charset) == 96);
898 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
899 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
900 f1 > MAX_CHAR_FIELD1_PRIVATE)
902 if (f2 < 0x20 || f3 < 0x20)
905 #ifdef ENABLE_COMPOSITE_CHARS
906 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
908 if (UNBOUNDP (Fgethash (make_int (ch),
909 Vcomposite_char_char2string_hash_table,
914 #endif /* ENABLE_COMPOSITE_CHARS */
916 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
919 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
921 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
924 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
926 return (XCHARSET_CHARS (charset) == 96);
932 /************************************************************************/
933 /* Basic string functions */
934 /************************************************************************/
936 /* Copy the character pointed to by PTR into STR, assuming it's
937 non-ASCII. Do not call this directly. Use the macro
938 charptr_copy_char() instead. */
941 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
943 Bufbyte *strptr = str;
945 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
947 /* Notice fallthrough. */
949 case 6: *++strptr = *ptr++;
950 case 5: *++strptr = *ptr++;
952 case 4: *++strptr = *ptr++;
953 case 3: *++strptr = *ptr++;
954 case 2: *++strptr = *ptr;
959 return strptr + 1 - str;
963 /************************************************************************/
964 /* streams of Emchars */
965 /************************************************************************/
967 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
968 The functions below are not meant to be called directly; use
969 the macros in insdel.h. */
972 Lstream_get_emchar_1 (Lstream *stream, int ch)
974 Bufbyte str[MAX_EMCHAR_LEN];
975 Bufbyte *strptr = str;
977 str[0] = (Bufbyte) ch;
978 switch (REP_BYTES_BY_FIRST_BYTE (ch))
980 /* Notice fallthrough. */
983 ch = Lstream_getc (stream);
985 *++strptr = (Bufbyte) ch;
987 ch = Lstream_getc (stream);
989 *++strptr = (Bufbyte) ch;
992 ch = Lstream_getc (stream);
994 *++strptr = (Bufbyte) ch;
996 ch = Lstream_getc (stream);
998 *++strptr = (Bufbyte) ch;
1000 ch = Lstream_getc (stream);
1002 *++strptr = (Bufbyte) ch;
1007 return charptr_emchar (str);
1011 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1013 Bufbyte str[MAX_EMCHAR_LEN];
1014 Bytecount len = set_charptr_emchar (str, ch);
1015 return Lstream_write (stream, str, len);
1019 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1021 Bufbyte str[MAX_EMCHAR_LEN];
1022 Bytecount len = set_charptr_emchar (str, ch);
1023 Lstream_unread (stream, str, len);
1027 /************************************************************************/
1028 /* charset object */
1029 /************************************************************************/
1032 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
1034 struct Lisp_Charset *cs = XCHARSET (obj);
1036 markobj (cs->short_name);
1037 markobj (cs->long_name);
1038 markobj (cs->doc_string);
1039 markobj (cs->registry);
1040 markobj (cs->ccl_program);
1042 markobj (cs->decoding_table);
1048 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1050 struct Lisp_Charset *cs = XCHARSET (obj);
1054 error ("printing unreadable object #<charset %s 0x%x>",
1055 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1058 write_c_string ("#<charset ", printcharfun);
1059 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1060 write_c_string (" ", printcharfun);
1061 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1062 write_c_string (" ", printcharfun);
1063 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1064 write_c_string (" ", printcharfun);
1065 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1066 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1067 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1068 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1069 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1071 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1072 CHARSET_COLUMNS (cs),
1073 CHARSET_GRAPHIC (cs),
1074 CHARSET_FINAL (cs));
1075 write_c_string (buf, printcharfun);
1076 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1077 sprintf (buf, " 0x%x>", cs->header.uid);
1078 write_c_string (buf, printcharfun);
1081 static const struct lrecord_description charset_description[] = {
1082 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1084 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1089 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1090 mark_charset, print_charset, 0, 0, 0,
1091 charset_description,
1092 struct Lisp_Charset);
1094 /* Make a new charset. */
1097 make_charset (Charset_ID id, Lisp_Object name,
1098 unsigned char type, unsigned char columns, unsigned char graphic,
1099 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1100 Lisp_Object long_name, Lisp_Object doc,
1102 Lisp_Object decoding_table,
1103 Emchar ucs_min, Emchar ucs_max,
1104 Emchar code_offset, unsigned char byte_offset)
1107 struct Lisp_Charset *cs =
1108 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1109 XSETCHARSET (obj, cs);
1111 CHARSET_ID (cs) = id;
1112 CHARSET_NAME (cs) = name;
1113 CHARSET_SHORT_NAME (cs) = short_name;
1114 CHARSET_LONG_NAME (cs) = long_name;
1115 CHARSET_DIRECTION (cs) = direction;
1116 CHARSET_TYPE (cs) = type;
1117 CHARSET_COLUMNS (cs) = columns;
1118 CHARSET_GRAPHIC (cs) = graphic;
1119 CHARSET_FINAL (cs) = final;
1120 CHARSET_DOC_STRING (cs) = doc;
1121 CHARSET_REGISTRY (cs) = reg;
1122 CHARSET_CCL_PROGRAM (cs) = Qnil;
1123 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1125 CHARSET_DECODING_TABLE(cs) = Qnil;
1126 CHARSET_UCS_MIN(cs) = ucs_min;
1127 CHARSET_UCS_MAX(cs) = ucs_max;
1128 CHARSET_CODE_OFFSET(cs) = code_offset;
1129 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1132 switch (CHARSET_TYPE (cs))
1134 case CHARSET_TYPE_94:
1135 CHARSET_DIMENSION (cs) = 1;
1136 CHARSET_CHARS (cs) = 94;
1138 case CHARSET_TYPE_96:
1139 CHARSET_DIMENSION (cs) = 1;
1140 CHARSET_CHARS (cs) = 96;
1142 case CHARSET_TYPE_94X94:
1143 CHARSET_DIMENSION (cs) = 2;
1144 CHARSET_CHARS (cs) = 94;
1146 case CHARSET_TYPE_96X96:
1147 CHARSET_DIMENSION (cs) = 2;
1148 CHARSET_CHARS (cs) = 96;
1151 case CHARSET_TYPE_128:
1152 CHARSET_DIMENSION (cs) = 1;
1153 CHARSET_CHARS (cs) = 128;
1155 case CHARSET_TYPE_128X128:
1156 CHARSET_DIMENSION (cs) = 2;
1157 CHARSET_CHARS (cs) = 128;
1159 case CHARSET_TYPE_256:
1160 CHARSET_DIMENSION (cs) = 1;
1161 CHARSET_CHARS (cs) = 256;
1163 case CHARSET_TYPE_256X256:
1164 CHARSET_DIMENSION (cs) = 2;
1165 CHARSET_CHARS (cs) = 256;
1171 if (id == LEADING_BYTE_ASCII)
1172 CHARSET_REP_BYTES (cs) = 1;
1174 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1176 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1181 /* some charsets do not have final characters. This includes
1182 ASCII, Control-1, Composite, and the two faux private
1185 if (code_offset == 0)
1187 assert (NILP (charset_by_attributes[type][final]));
1188 charset_by_attributes[type][final] = obj;
1191 assert (NILP (charset_by_attributes[type][final][direction]));
1192 charset_by_attributes[type][final][direction] = obj;
1196 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1197 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1200 /* official leading byte */
1201 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1204 /* Some charsets are "faux" and don't have names or really exist at
1205 all except in the leading-byte table. */
1207 Fputhash (name, obj, Vcharset_hash_table);
1212 get_unallocated_leading_byte (int dimension)
1217 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1220 lb = next_allocated_leading_byte++;
1224 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1227 lb = next_allocated_1_byte_leading_byte++;
1231 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1234 lb = next_allocated_2_byte_leading_byte++;
1240 ("No more character sets free for this dimension",
1241 make_int (dimension));
1248 range_charset_code_point (Lisp_Object charset, Emchar ch)
1252 if ((XCHARSET_UCS_MIN (charset) <= ch)
1253 && (ch <= XCHARSET_UCS_MAX (charset)))
1255 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1257 if (XCHARSET_DIMENSION (charset) == 1)
1258 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
1259 else if (XCHARSET_DIMENSION (charset) == 2)
1260 return list2 (make_int (d / XCHARSET_CHARS (charset)
1261 + XCHARSET_BYTE_OFFSET (charset)),
1262 make_int (d % XCHARSET_CHARS (charset)
1263 + XCHARSET_BYTE_OFFSET (charset)));
1264 else if (XCHARSET_DIMENSION (charset) == 3)
1265 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1266 * XCHARSET_CHARS (charset))
1267 + XCHARSET_BYTE_OFFSET (charset)),
1268 make_int (d / XCHARSET_CHARS (charset)
1269 % XCHARSET_CHARS (charset)
1270 + XCHARSET_BYTE_OFFSET (charset)),
1271 make_int (d % XCHARSET_CHARS (charset)
1272 + XCHARSET_BYTE_OFFSET (charset)));
1273 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1274 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1275 * XCHARSET_CHARS (charset)
1276 * XCHARSET_CHARS (charset))
1277 + XCHARSET_BYTE_OFFSET (charset)),
1278 make_int (d / (XCHARSET_CHARS (charset)
1279 * XCHARSET_CHARS (charset))
1280 % XCHARSET_CHARS (charset)
1281 + XCHARSET_BYTE_OFFSET (charset)),
1282 make_int (d / XCHARSET_CHARS (charset)
1283 % XCHARSET_CHARS (charset)
1284 + XCHARSET_BYTE_OFFSET (charset)),
1285 make_int (d % XCHARSET_CHARS (charset)
1286 + XCHARSET_BYTE_OFFSET (charset)));
1288 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1290 if (XCHARSET_DIMENSION (charset) == 1)
1292 if (XCHARSET_CHARS (charset) == 94)
1294 if (((d = ch - (MIN_CHAR_94
1295 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1297 return list1 (make_int (d + 33));
1299 else if (XCHARSET_CHARS (charset) == 96)
1301 if (((d = ch - (MIN_CHAR_96
1302 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1304 return list1 (make_int (d + 32));
1309 else if (XCHARSET_DIMENSION (charset) == 2)
1311 if (XCHARSET_CHARS (charset) == 94)
1313 if (((d = ch - (MIN_CHAR_94x94
1314 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1317 return list2 ((d / 94) + 33, d % 94 + 33);
1319 else if (XCHARSET_CHARS (charset) == 96)
1321 if (((d = ch - (MIN_CHAR_96x96
1322 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1325 return list2 ((d / 96) + 32, d % 96 + 32);
1333 charset_code_point (Lisp_Object charset, Emchar ch)
1335 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1337 if (!EQ (cdef, Qnil))
1339 Lisp_Object field = Fassq (charset, cdef);
1341 if (!EQ (field, Qnil))
1342 return Fcdr (field);
1344 return range_charset_code_point (charset, ch);
1347 Lisp_Object Vdefault_coded_charset_priority_list;
1351 /************************************************************************/
1352 /* Basic charset Lisp functions */
1353 /************************************************************************/
1355 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1356 Return non-nil if OBJECT is a charset.
1360 return CHARSETP (object) ? Qt : Qnil;
1363 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1364 Retrieve the charset of the given name.
1365 If CHARSET-OR-NAME is a charset object, it is simply returned.
1366 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1367 nil is returned. Otherwise the associated charset object is returned.
1371 if (CHARSETP (charset_or_name))
1372 return charset_or_name;
1374 CHECK_SYMBOL (charset_or_name);
1375 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1378 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1379 Retrieve the charset of the given name.
1380 Same as `find-charset' except an error is signalled if there is no such
1381 charset instead of returning nil.
1385 Lisp_Object charset = Ffind_charset (name);
1388 signal_simple_error ("No such charset", name);
1392 /* We store the charsets in hash tables with the names as the key and the
1393 actual charset object as the value. Occasionally we need to use them
1394 in a list format. These routines provide us with that. */
1395 struct charset_list_closure
1397 Lisp_Object *charset_list;
1401 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1402 void *charset_list_closure)
1404 /* This function can GC */
1405 struct charset_list_closure *chcl =
1406 (struct charset_list_closure*) charset_list_closure;
1407 Lisp_Object *charset_list = chcl->charset_list;
1409 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1413 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1414 Return a list of the names of all defined charsets.
1418 Lisp_Object charset_list = Qnil;
1419 struct gcpro gcpro1;
1420 struct charset_list_closure charset_list_closure;
1422 GCPRO1 (charset_list);
1423 charset_list_closure.charset_list = &charset_list;
1424 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1425 &charset_list_closure);
1428 return charset_list;
1431 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1432 Return the name of the given charset.
1436 return XCHARSET_NAME (Fget_charset (charset));
1439 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1440 Define a new character set.
1441 This function is for use with Mule support.
1442 NAME is a symbol, the name by which the character set is normally referred.
1443 DOC-STRING is a string describing the character set.
1444 PROPS is a property list, describing the specific nature of the
1445 character set. Recognized properties are:
1447 'short-name Short version of the charset name (ex: Latin-1)
1448 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1449 'registry A regular expression matching the font registry field for
1451 'dimension Number of octets used to index a character in this charset.
1452 Either 1 or 2. Defaults to 1.
1453 'columns Number of columns used to display a character in this charset.
1454 Only used in TTY mode. (Under X, the actual width of a
1455 character can be derived from the font used to display the
1456 characters.) If unspecified, defaults to the dimension
1457 (this is almost always the correct value).
1458 'chars Number of characters in each dimension (94 or 96).
1459 Defaults to 94. Note that if the dimension is 2, the
1460 character set thus described is 94x94 or 96x96.
1461 'final Final byte of ISO 2022 escape sequence. Must be
1462 supplied. Each combination of (DIMENSION, CHARS) defines a
1463 separate namespace for final bytes. Note that ISO
1464 2022 restricts the final byte to the range
1465 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1466 dimension == 2. Note also that final bytes in the range
1467 0x30 - 0x3F are reserved for user-defined (not official)
1469 'graphic 0 (use left half of font on output) or 1 (use right half
1470 of font on output). Defaults to 0. For example, for
1471 a font whose registry is ISO8859-1, the left half
1472 (octets 0x20 - 0x7F) is the `ascii' character set, while
1473 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1474 character set. With 'graphic set to 0, the octets
1475 will have their high bit cleared; with it set to 1,
1476 the octets will have their high bit set.
1477 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1479 'ccl-program A compiled CCL program used to convert a character in
1480 this charset into an index into the font. This is in
1481 addition to the 'graphic property. The CCL program
1482 is passed the octets of the character, with the high
1483 bit cleared and set depending upon whether the value
1484 of the 'graphic property is 0 or 1.
1486 (name, doc_string, props))
1488 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1489 int direction = CHARSET_LEFT_TO_RIGHT;
1491 Lisp_Object registry = Qnil;
1492 Lisp_Object charset;
1493 Lisp_Object rest, keyword, value;
1494 Lisp_Object ccl_program = Qnil;
1495 Lisp_Object short_name = Qnil, long_name = Qnil;
1496 int byte_offset = -1;
1498 CHECK_SYMBOL (name);
1499 if (!NILP (doc_string))
1500 CHECK_STRING (doc_string);
1502 charset = Ffind_charset (name);
1503 if (!NILP (charset))
1504 signal_simple_error ("Cannot redefine existing charset", name);
1506 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1508 if (EQ (keyword, Qshort_name))
1510 CHECK_STRING (value);
1514 if (EQ (keyword, Qlong_name))
1516 CHECK_STRING (value);
1520 else if (EQ (keyword, Qdimension))
1523 dimension = XINT (value);
1524 if (dimension < 1 || dimension > 2)
1525 signal_simple_error ("Invalid value for 'dimension", value);
1528 else if (EQ (keyword, Qchars))
1531 chars = XINT (value);
1532 if (chars != 94 && chars != 96)
1533 signal_simple_error ("Invalid value for 'chars", value);
1536 else if (EQ (keyword, Qcolumns))
1539 columns = XINT (value);
1540 if (columns != 1 && columns != 2)
1541 signal_simple_error ("Invalid value for 'columns", value);
1544 else if (EQ (keyword, Qgraphic))
1547 graphic = XINT (value);
1549 if (graphic < 0 || graphic > 2)
1551 if (graphic < 0 || graphic > 1)
1553 signal_simple_error ("Invalid value for 'graphic", value);
1556 else if (EQ (keyword, Qregistry))
1558 CHECK_STRING (value);
1562 else if (EQ (keyword, Qdirection))
1564 if (EQ (value, Ql2r))
1565 direction = CHARSET_LEFT_TO_RIGHT;
1566 else if (EQ (value, Qr2l))
1567 direction = CHARSET_RIGHT_TO_LEFT;
1569 signal_simple_error ("Invalid value for 'direction", value);
1572 else if (EQ (keyword, Qfinal))
1574 CHECK_CHAR_COERCE_INT (value);
1575 final = XCHAR (value);
1576 if (final < '0' || final > '~')
1577 signal_simple_error ("Invalid value for 'final", value);
1580 else if (EQ (keyword, Qccl_program))
1582 CHECK_VECTOR (value);
1583 ccl_program = value;
1587 signal_simple_error ("Unrecognized property", keyword);
1591 error ("'final must be specified");
1592 if (dimension == 2 && final > 0x5F)
1594 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1598 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1600 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1602 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1603 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1605 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1607 id = get_unallocated_leading_byte (dimension);
1609 if (NILP (doc_string))
1610 doc_string = build_string ("");
1612 if (NILP (registry))
1613 registry = build_string ("");
1615 if (NILP (short_name))
1616 XSETSTRING (short_name, XSYMBOL (name)->name);
1618 if (NILP (long_name))
1619 long_name = doc_string;
1622 columns = dimension;
1624 if (byte_offset < 0)
1628 else if (chars == 96)
1634 charset = make_charset (id, name, type, columns, graphic,
1635 final, direction, short_name, long_name,
1636 doc_string, registry,
1637 Qnil, 0, 0, 0, byte_offset);
1638 if (!NILP (ccl_program))
1639 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1643 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1645 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1646 NEW-NAME is the name of the new charset. Return the new charset.
1648 (charset, new_name))
1650 Lisp_Object new_charset = Qnil;
1651 int id, dimension, columns, graphic, final;
1652 int direction, type;
1653 Lisp_Object registry, doc_string, short_name, long_name;
1654 struct Lisp_Charset *cs;
1656 charset = Fget_charset (charset);
1657 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1658 signal_simple_error ("Charset already has reverse-direction charset",
1661 CHECK_SYMBOL (new_name);
1662 if (!NILP (Ffind_charset (new_name)))
1663 signal_simple_error ("Cannot redefine existing charset", new_name);
1665 cs = XCHARSET (charset);
1667 type = CHARSET_TYPE (cs);
1668 columns = CHARSET_COLUMNS (cs);
1669 dimension = CHARSET_DIMENSION (cs);
1670 id = get_unallocated_leading_byte (dimension);
1672 graphic = CHARSET_GRAPHIC (cs);
1673 final = CHARSET_FINAL (cs);
1674 direction = CHARSET_RIGHT_TO_LEFT;
1675 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1676 direction = CHARSET_LEFT_TO_RIGHT;
1677 doc_string = CHARSET_DOC_STRING (cs);
1678 short_name = CHARSET_SHORT_NAME (cs);
1679 long_name = CHARSET_LONG_NAME (cs);
1680 registry = CHARSET_REGISTRY (cs);
1682 new_charset = make_charset (id, new_name, type, columns,
1683 graphic, final, direction, short_name, long_name,
1684 doc_string, registry,
1686 CHARSET_DECODING_TABLE(cs),
1687 CHARSET_UCS_MIN(cs),
1688 CHARSET_UCS_MAX(cs),
1689 CHARSET_CODE_OFFSET(cs),
1690 CHARSET_BYTE_OFFSET(cs)
1696 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1697 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1702 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1703 Define symbol ALIAS as an alias for CHARSET.
1707 CHECK_SYMBOL (alias);
1708 charset = Fget_charset (charset);
1709 return Fputhash (alias, charset, Vcharset_hash_table);
1712 /* #### Reverse direction charsets not yet implemented. */
1714 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1716 Return the reverse-direction charset parallel to CHARSET, if any.
1717 This is the charset with the same properties (in particular, the same
1718 dimension, number of characters per dimension, and final byte) as
1719 CHARSET but whose characters are displayed in the opposite direction.
1723 charset = Fget_charset (charset);
1724 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1728 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1729 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1730 If DIRECTION is omitted, both directions will be checked (left-to-right
1731 will be returned if character sets exist for both directions).
1733 (dimension, chars, final, direction))
1735 int dm, ch, fi, di = -1;
1737 Lisp_Object obj = Qnil;
1739 CHECK_INT (dimension);
1740 dm = XINT (dimension);
1741 if (dm < 1 || dm > 2)
1742 signal_simple_error ("Invalid value for DIMENSION", dimension);
1746 if (ch != 94 && ch != 96)
1747 signal_simple_error ("Invalid value for CHARS", chars);
1749 CHECK_CHAR_COERCE_INT (final);
1751 if (fi < '0' || fi > '~')
1752 signal_simple_error ("Invalid value for FINAL", final);
1754 if (EQ (direction, Ql2r))
1755 di = CHARSET_LEFT_TO_RIGHT;
1756 else if (EQ (direction, Qr2l))
1757 di = CHARSET_RIGHT_TO_LEFT;
1758 else if (!NILP (direction))
1759 signal_simple_error ("Invalid value for DIRECTION", direction);
1761 if (dm == 2 && fi > 0x5F)
1763 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1766 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1768 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1772 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1774 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1777 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1780 return XCHARSET_NAME (obj);
1784 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1785 Return short name of CHARSET.
1789 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1792 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1793 Return long name of CHARSET.
1797 return XCHARSET_LONG_NAME (Fget_charset (charset));
1800 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1801 Return description of CHARSET.
1805 return XCHARSET_DOC_STRING (Fget_charset (charset));
1808 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1809 Return dimension of CHARSET.
1813 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1816 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1817 Return property PROP of CHARSET.
1818 Recognized properties are those listed in `make-charset', as well as
1819 'name and 'doc-string.
1823 struct Lisp_Charset *cs;
1825 charset = Fget_charset (charset);
1826 cs = XCHARSET (charset);
1828 CHECK_SYMBOL (prop);
1829 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1830 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1831 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1832 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1833 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1834 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1835 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1836 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1837 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1838 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1839 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1840 if (EQ (prop, Qdirection))
1841 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1842 if (EQ (prop, Qreverse_direction_charset))
1844 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1848 return XCHARSET_NAME (obj);
1850 signal_simple_error ("Unrecognized charset property name", prop);
1851 return Qnil; /* not reached */
1854 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1855 Return charset identification number of CHARSET.
1859 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1862 /* #### We need to figure out which properties we really want to
1865 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1866 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1868 (charset, ccl_program))
1870 charset = Fget_charset (charset);
1871 CHECK_VECTOR (ccl_program);
1872 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1877 invalidate_charset_font_caches (Lisp_Object charset)
1879 /* Invalidate font cache entries for charset on all devices. */
1880 Lisp_Object devcons, concons, hash_table;
1881 DEVICE_LOOP_NO_BREAK (devcons, concons)
1883 struct device *d = XDEVICE (XCAR (devcons));
1884 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1885 if (!UNBOUNDP (hash_table))
1886 Fclrhash (hash_table);
1890 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1891 Set the 'registry property of CHARSET to REGISTRY.
1893 (charset, registry))
1895 charset = Fget_charset (charset);
1896 CHECK_STRING (registry);
1897 XCHARSET_REGISTRY (charset) = registry;
1898 invalidate_charset_font_caches (charset);
1899 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1904 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1905 Return mapping-table of CHARSET.
1909 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1912 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1913 Set mapping-table of CHARSET to TABLE.
1917 struct Lisp_Charset *cs;
1918 Lisp_Object old_table;
1921 charset = Fget_charset (charset);
1922 cs = XCHARSET (charset);
1924 if (EQ (table, Qnil))
1926 CHARSET_DECODING_TABLE(cs) = table;
1929 else if (VECTORP (table))
1931 if (XVECTOR_LENGTH (table) > CHARSET_CHARS (cs))
1932 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
1933 old_table = CHARSET_DECODING_TABLE(cs);
1934 CHARSET_DECODING_TABLE(cs) = table;
1937 signal_error (Qwrong_type_argument,
1938 list2 (build_translated_string ("vector-or-nil-p"),
1940 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
1942 switch (CHARSET_DIMENSION (cs))
1945 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1947 Lisp_Object c = XVECTOR_DATA(table)[i];
1952 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
1956 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1958 Lisp_Object v = XVECTOR_DATA(table)[i];
1964 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
1966 CHARSET_DECODING_TABLE(cs) = old_table;
1967 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
1969 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1971 Lisp_Object c = XVECTOR_DATA(v)[j];
1974 put_char_attribute (c, charset,
1977 (i + CHARSET_BYTE_OFFSET (cs)),
1979 (j + CHARSET_BYTE_OFFSET (cs))));
1983 put_char_attribute (v, charset,
1985 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
1994 /************************************************************************/
1995 /* Lisp primitives for working with characters */
1996 /************************************************************************/
1998 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1999 Make a character from CHARSET and octets ARG1 and ARG2.
2000 ARG2 is required only for characters from two-dimensional charsets.
2001 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2002 character s with caron.
2004 (charset, arg1, arg2))
2006 struct Lisp_Charset *cs;
2008 int lowlim, highlim;
2010 charset = Fget_charset (charset);
2011 cs = XCHARSET (charset);
2013 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2014 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2016 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2018 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2019 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2022 /* It is useful (and safe, according to Olivier Galibert) to strip
2023 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2024 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2025 Latin 2 code of the character. */
2033 if (a1 < lowlim || a1 > highlim)
2034 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2036 if (CHARSET_DIMENSION (cs) == 1)
2040 ("Charset is of dimension one; second octet must be nil", arg2);
2041 return make_char (MAKE_CHAR (charset, a1, 0));
2050 a2 = XINT (arg2) & 0x7f;
2052 if (a2 < lowlim || a2 > highlim)
2053 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2055 return make_char (MAKE_CHAR (charset, a1, a2));
2058 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2059 Return the character set of char CH.
2063 CHECK_CHAR_COERCE_INT (ch);
2065 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2068 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2069 Return list of charset and one or two position-codes of CHAR.
2073 /* This function can GC */
2074 struct gcpro gcpro1, gcpro2;
2075 Lisp_Object charset = Qnil;
2076 Lisp_Object rc = Qnil;
2079 GCPRO2 (charset, rc);
2080 CHECK_CHAR_COERCE_INT (character);
2082 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2084 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2086 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2090 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2098 #ifdef ENABLE_COMPOSITE_CHARS
2099 /************************************************************************/
2100 /* composite character functions */
2101 /************************************************************************/
2104 lookup_composite_char (Bufbyte *str, int len)
2106 Lisp_Object lispstr = make_string (str, len);
2107 Lisp_Object ch = Fgethash (lispstr,
2108 Vcomposite_char_string2char_hash_table,
2114 if (composite_char_row_next >= 128)
2115 signal_simple_error ("No more composite chars available", lispstr);
2116 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2117 composite_char_col_next);
2118 Fputhash (make_char (emch), lispstr,
2119 Vcomposite_char_char2string_hash_table);
2120 Fputhash (lispstr, make_char (emch),
2121 Vcomposite_char_string2char_hash_table);
2122 composite_char_col_next++;
2123 if (composite_char_col_next >= 128)
2125 composite_char_col_next = 32;
2126 composite_char_row_next++;
2135 composite_char_string (Emchar ch)
2137 Lisp_Object str = Fgethash (make_char (ch),
2138 Vcomposite_char_char2string_hash_table,
2140 assert (!UNBOUNDP (str));
2144 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2145 Convert a string into a single composite character.
2146 The character is the result of overstriking all the characters in
2151 CHECK_STRING (string);
2152 return make_char (lookup_composite_char (XSTRING_DATA (string),
2153 XSTRING_LENGTH (string)));
2156 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2157 Return a string of the characters comprising a composite character.
2165 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2166 signal_simple_error ("Must be composite char", ch);
2167 return composite_char_string (emch);
2169 #endif /* ENABLE_COMPOSITE_CHARS */
2172 /************************************************************************/
2173 /* initialization */
2174 /************************************************************************/
2177 syms_of_mule_charset (void)
2179 DEFSUBR (Fcharsetp);
2180 DEFSUBR (Ffind_charset);
2181 DEFSUBR (Fget_charset);
2182 DEFSUBR (Fcharset_list);
2183 DEFSUBR (Fcharset_name);
2184 DEFSUBR (Fmake_charset);
2185 DEFSUBR (Fmake_reverse_direction_charset);
2186 /* DEFSUBR (Freverse_direction_charset); */
2187 DEFSUBR (Fdefine_charset_alias);
2188 DEFSUBR (Fcharset_from_attributes);
2189 DEFSUBR (Fcharset_short_name);
2190 DEFSUBR (Fcharset_long_name);
2191 DEFSUBR (Fcharset_description);
2192 DEFSUBR (Fcharset_dimension);
2193 DEFSUBR (Fcharset_property);
2194 DEFSUBR (Fcharset_id);
2195 DEFSUBR (Fset_charset_ccl_program);
2196 DEFSUBR (Fset_charset_registry);
2198 DEFSUBR (Fchar_attribute_alist);
2199 DEFSUBR (Fget_char_attribute);
2200 DEFSUBR (Fput_char_attribute);
2201 DEFSUBR (Fdefine_char);
2202 DEFSUBR (Fcharset_mapping_table);
2203 DEFSUBR (Fset_charset_mapping_table);
2206 DEFSUBR (Fmake_char);
2207 DEFSUBR (Fchar_charset);
2208 DEFSUBR (Fsplit_char);
2210 #ifdef ENABLE_COMPOSITE_CHARS
2211 DEFSUBR (Fmake_composite_char);
2212 DEFSUBR (Fcomposite_char_string);
2215 defsymbol (&Qcharsetp, "charsetp");
2216 defsymbol (&Qregistry, "registry");
2217 defsymbol (&Qfinal, "final");
2218 defsymbol (&Qgraphic, "graphic");
2219 defsymbol (&Qdirection, "direction");
2220 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2221 defsymbol (&Qshort_name, "short-name");
2222 defsymbol (&Qlong_name, "long-name");
2224 defsymbol (&Ql2r, "l2r");
2225 defsymbol (&Qr2l, "r2l");
2227 /* Charsets, compatible with FSF 20.3
2228 Naming convention is Script-Charset[-Edition] */
2229 defsymbol (&Qascii, "ascii");
2230 defsymbol (&Qcontrol_1, "control-1");
2231 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2232 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2233 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2234 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2235 defsymbol (&Qthai_tis620, "thai-tis620");
2236 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2237 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2238 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2239 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2240 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2241 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2242 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2243 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2244 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2245 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2246 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2247 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2248 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2249 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2251 defsymbol (&Qucs, "ucs");
2252 defsymbol (&Qucs_bmp, "ucs-bmp");
2253 defsymbol (&Qlatin_viscii, "latin-viscii");
2254 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2255 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2256 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2257 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2258 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2259 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2261 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2262 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2264 defsymbol (&Qcomposite, "composite");
2268 vars_of_mule_charset (void)
2275 /* Table of charsets indexed by leading byte. */
2276 for (i = 0; i < countof (charset_by_leading_byte); i++)
2277 charset_by_leading_byte[i] = Qnil;
2280 /* Table of charsets indexed by type/final-byte. */
2281 for (i = 0; i < countof (charset_by_attributes); i++)
2282 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2283 charset_by_attributes[i][j] = Qnil;
2285 /* Table of charsets indexed by type/final-byte/direction. */
2286 for (i = 0; i < countof (charset_by_attributes); i++)
2287 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2288 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2289 charset_by_attributes[i][j][k] = Qnil;
2293 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2295 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2296 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2300 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2301 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2302 Leading-code of private TYPE9N charset of column-width 1.
2304 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2308 Vutf_2000_version = build_string("0.12 (Kashiwara)");
2309 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2310 Version number of UTF-2000.
2313 staticpro (&Vcharacter_attribute_table);
2314 Vcharacter_attribute_table = make_char_code_table (Qnil);
2316 Vdefault_coded_charset_priority_list = Qnil;
2317 DEFVAR_LISP ("default-coded-charset-priority-list",
2318 &Vdefault_coded_charset_priority_list /*
2319 Default order of preferred coded-character-sets.
2325 complex_vars_of_mule_charset (void)
2327 staticpro (&Vcharset_hash_table);
2328 Vcharset_hash_table =
2329 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2331 /* Predefined character sets. We store them into variables for
2336 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2337 CHARSET_TYPE_256X256, 1, 2, 0,
2338 CHARSET_LEFT_TO_RIGHT,
2339 build_string ("BMP"),
2340 build_string ("BMP"),
2341 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2342 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2343 Qnil, 0, 0xFFFF, 0, 0);
2345 # define MIN_CHAR_THAI 0
2346 # define MAX_CHAR_THAI 0
2347 # define MIN_CHAR_GREEK 0
2348 # define MAX_CHAR_GREEK 0
2349 # define MIN_CHAR_HEBREW 0
2350 # define MAX_CHAR_HEBREW 0
2351 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2352 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2353 # define MIN_CHAR_CYRILLIC 0
2354 # define MAX_CHAR_CYRILLIC 0
2357 make_charset (LEADING_BYTE_ASCII, Qascii,
2358 CHARSET_TYPE_94, 1, 0, 'B',
2359 CHARSET_LEFT_TO_RIGHT,
2360 build_string ("ASCII"),
2361 build_string ("ASCII)"),
2362 build_string ("ASCII (ISO646 IRV)"),
2363 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2364 Qnil, 0, 0x7F, 0, 0);
2365 Vcharset_control_1 =
2366 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2367 CHARSET_TYPE_94, 1, 1, 0,
2368 CHARSET_LEFT_TO_RIGHT,
2369 build_string ("C1"),
2370 build_string ("Control characters"),
2371 build_string ("Control characters 128-191"),
2373 Qnil, 0x80, 0x9F, 0, 0);
2374 Vcharset_latin_iso8859_1 =
2375 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2376 CHARSET_TYPE_96, 1, 1, 'A',
2377 CHARSET_LEFT_TO_RIGHT,
2378 build_string ("Latin-1"),
2379 build_string ("ISO8859-1 (Latin-1)"),
2380 build_string ("ISO8859-1 (Latin-1)"),
2381 build_string ("iso8859-1"),
2382 Qnil, 0xA0, 0xFF, 0, 32);
2383 Vcharset_latin_iso8859_2 =
2384 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2385 CHARSET_TYPE_96, 1, 1, 'B',
2386 CHARSET_LEFT_TO_RIGHT,
2387 build_string ("Latin-2"),
2388 build_string ("ISO8859-2 (Latin-2)"),
2389 build_string ("ISO8859-2 (Latin-2)"),
2390 build_string ("iso8859-2"),
2392 Vcharset_latin_iso8859_3 =
2393 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2394 CHARSET_TYPE_96, 1, 1, 'C',
2395 CHARSET_LEFT_TO_RIGHT,
2396 build_string ("Latin-3"),
2397 build_string ("ISO8859-3 (Latin-3)"),
2398 build_string ("ISO8859-3 (Latin-3)"),
2399 build_string ("iso8859-3"),
2401 Vcharset_latin_iso8859_4 =
2402 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2403 CHARSET_TYPE_96, 1, 1, 'D',
2404 CHARSET_LEFT_TO_RIGHT,
2405 build_string ("Latin-4"),
2406 build_string ("ISO8859-4 (Latin-4)"),
2407 build_string ("ISO8859-4 (Latin-4)"),
2408 build_string ("iso8859-4"),
2410 Vcharset_thai_tis620 =
2411 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2412 CHARSET_TYPE_96, 1, 1, 'T',
2413 CHARSET_LEFT_TO_RIGHT,
2414 build_string ("TIS620"),
2415 build_string ("TIS620 (Thai)"),
2416 build_string ("TIS620.2529 (Thai)"),
2417 build_string ("tis620"),
2418 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2419 Vcharset_greek_iso8859_7 =
2420 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2421 CHARSET_TYPE_96, 1, 1, 'F',
2422 CHARSET_LEFT_TO_RIGHT,
2423 build_string ("ISO8859-7"),
2424 build_string ("ISO8859-7 (Greek)"),
2425 build_string ("ISO8859-7 (Greek)"),
2426 build_string ("iso8859-7"),
2427 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2428 Vcharset_arabic_iso8859_6 =
2429 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2430 CHARSET_TYPE_96, 1, 1, 'G',
2431 CHARSET_RIGHT_TO_LEFT,
2432 build_string ("ISO8859-6"),
2433 build_string ("ISO8859-6 (Arabic)"),
2434 build_string ("ISO8859-6 (Arabic)"),
2435 build_string ("iso8859-6"),
2437 Vcharset_hebrew_iso8859_8 =
2438 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2439 CHARSET_TYPE_96, 1, 1, 'H',
2440 CHARSET_RIGHT_TO_LEFT,
2441 build_string ("ISO8859-8"),
2442 build_string ("ISO8859-8 (Hebrew)"),
2443 build_string ("ISO8859-8 (Hebrew)"),
2444 build_string ("iso8859-8"),
2445 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2446 Vcharset_katakana_jisx0201 =
2447 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2448 CHARSET_TYPE_94, 1, 1, 'I',
2449 CHARSET_LEFT_TO_RIGHT,
2450 build_string ("JISX0201 Kana"),
2451 build_string ("JISX0201.1976 (Japanese Kana)"),
2452 build_string ("JISX0201.1976 Japanese Kana"),
2453 build_string ("jisx0201\\.1976"),
2455 MIN_CHAR_HALFWIDTH_KATAKANA,
2456 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2457 Vcharset_latin_jisx0201 =
2458 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2459 CHARSET_TYPE_94, 1, 0, 'J',
2460 CHARSET_LEFT_TO_RIGHT,
2461 build_string ("JISX0201 Roman"),
2462 build_string ("JISX0201.1976 (Japanese Roman)"),
2463 build_string ("JISX0201.1976 Japanese Roman"),
2464 build_string ("jisx0201\\.1976"),
2466 Vcharset_cyrillic_iso8859_5 =
2467 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2468 CHARSET_TYPE_96, 1, 1, 'L',
2469 CHARSET_LEFT_TO_RIGHT,
2470 build_string ("ISO8859-5"),
2471 build_string ("ISO8859-5 (Cyrillic)"),
2472 build_string ("ISO8859-5 (Cyrillic)"),
2473 build_string ("iso8859-5"),
2474 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2475 Vcharset_latin_iso8859_9 =
2476 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2477 CHARSET_TYPE_96, 1, 1, 'M',
2478 CHARSET_LEFT_TO_RIGHT,
2479 build_string ("Latin-5"),
2480 build_string ("ISO8859-9 (Latin-5)"),
2481 build_string ("ISO8859-9 (Latin-5)"),
2482 build_string ("iso8859-9"),
2484 Vcharset_japanese_jisx0208_1978 =
2485 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2486 CHARSET_TYPE_94X94, 2, 0, '@',
2487 CHARSET_LEFT_TO_RIGHT,
2488 build_string ("JIS X0208:1978"),
2489 build_string ("JIS X0208:1978 (Japanese)"),
2491 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2492 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2494 Vcharset_chinese_gb2312 =
2495 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2496 CHARSET_TYPE_94X94, 2, 0, 'A',
2497 CHARSET_LEFT_TO_RIGHT,
2498 build_string ("GB2312"),
2499 build_string ("GB2312)"),
2500 build_string ("GB2312 Chinese simplified"),
2501 build_string ("gb2312"),
2503 Vcharset_japanese_jisx0208 =
2504 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2505 CHARSET_TYPE_94X94, 2, 0, 'B',
2506 CHARSET_LEFT_TO_RIGHT,
2507 build_string ("JISX0208"),
2508 build_string ("JIS X0208:1983 (Japanese)"),
2509 build_string ("JIS X0208:1983 Japanese Kanji"),
2510 build_string ("jisx0208\\.1983"),
2512 Vcharset_korean_ksc5601 =
2513 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2514 CHARSET_TYPE_94X94, 2, 0, 'C',
2515 CHARSET_LEFT_TO_RIGHT,
2516 build_string ("KSC5601"),
2517 build_string ("KSC5601 (Korean"),
2518 build_string ("KSC5601 Korean Hangul and Hanja"),
2519 build_string ("ksc5601"),
2521 Vcharset_japanese_jisx0212 =
2522 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2523 CHARSET_TYPE_94X94, 2, 0, 'D',
2524 CHARSET_LEFT_TO_RIGHT,
2525 build_string ("JISX0212"),
2526 build_string ("JISX0212 (Japanese)"),
2527 build_string ("JISX0212 Japanese Supplement"),
2528 build_string ("jisx0212"),
2531 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2532 Vcharset_chinese_cns11643_1 =
2533 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2534 CHARSET_TYPE_94X94, 2, 0, 'G',
2535 CHARSET_LEFT_TO_RIGHT,
2536 build_string ("CNS11643-1"),
2537 build_string ("CNS11643-1 (Chinese traditional)"),
2539 ("CNS 11643 Plane 1 Chinese traditional"),
2540 build_string (CHINESE_CNS_PLANE_RE("1")),
2542 Vcharset_chinese_cns11643_2 =
2543 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2544 CHARSET_TYPE_94X94, 2, 0, 'H',
2545 CHARSET_LEFT_TO_RIGHT,
2546 build_string ("CNS11643-2"),
2547 build_string ("CNS11643-2 (Chinese traditional)"),
2549 ("CNS 11643 Plane 2 Chinese traditional"),
2550 build_string (CHINESE_CNS_PLANE_RE("2")),
2553 Vcharset_latin_viscii_lower =
2554 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2555 CHARSET_TYPE_96, 1, 1, '1',
2556 CHARSET_LEFT_TO_RIGHT,
2557 build_string ("VISCII lower"),
2558 build_string ("VISCII lower (Vietnamese)"),
2559 build_string ("VISCII lower (Vietnamese)"),
2560 build_string ("MULEVISCII-LOWER"),
2562 Vcharset_latin_viscii_upper =
2563 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2564 CHARSET_TYPE_96, 1, 1, '2',
2565 CHARSET_LEFT_TO_RIGHT,
2566 build_string ("VISCII upper"),
2567 build_string ("VISCII upper (Vietnamese)"),
2568 build_string ("VISCII upper (Vietnamese)"),
2569 build_string ("MULEVISCII-UPPER"),
2571 Vcharset_latin_viscii =
2572 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2573 CHARSET_TYPE_256, 1, 2, 0,
2574 CHARSET_LEFT_TO_RIGHT,
2575 build_string ("VISCII"),
2576 build_string ("VISCII 1.1 (Vietnamese)"),
2577 build_string ("VISCII 1.1 (Vietnamese)"),
2578 build_string ("VISCII1\\.1"),
2580 Vcharset_hiragana_jisx0208 =
2581 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2582 CHARSET_TYPE_94X94, 2, 0, 'B',
2583 CHARSET_LEFT_TO_RIGHT,
2584 build_string ("Hiragana"),
2585 build_string ("Hiragana of JIS X0208"),
2586 build_string ("Japanese Hiragana of JIS X0208"),
2587 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2588 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2589 (0x24 - 33) * 94 + (0x21 - 33), 33);
2590 Vcharset_katakana_jisx0208 =
2591 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2592 CHARSET_TYPE_94X94, 2, 0, 'B',
2593 CHARSET_LEFT_TO_RIGHT,
2594 build_string ("Katakana"),
2595 build_string ("Katakana of JIS X0208"),
2596 build_string ("Japanese Katakana of JIS X0208"),
2597 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2598 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2599 (0x25 - 33) * 94 + (0x21 - 33), 33);
2601 Vcharset_chinese_big5_1 =
2602 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2603 CHARSET_TYPE_94X94, 2, 0, '0',
2604 CHARSET_LEFT_TO_RIGHT,
2605 build_string ("Big5"),
2606 build_string ("Big5 (Level-1)"),
2608 ("Big5 Level-1 Chinese traditional"),
2609 build_string ("big5"),
2611 Vcharset_chinese_big5_2 =
2612 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2613 CHARSET_TYPE_94X94, 2, 0, '1',
2614 CHARSET_LEFT_TO_RIGHT,
2615 build_string ("Big5"),
2616 build_string ("Big5 (Level-2)"),
2618 ("Big5 Level-2 Chinese traditional"),
2619 build_string ("big5"),
2622 #ifdef ENABLE_COMPOSITE_CHARS
2623 /* #### For simplicity, we put composite chars into a 96x96 charset.
2624 This is going to lead to problems because you can run out of
2625 room, esp. as we don't yet recycle numbers. */
2626 Vcharset_composite =
2627 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2628 CHARSET_TYPE_96X96, 2, 0, 0,
2629 CHARSET_LEFT_TO_RIGHT,
2630 build_string ("Composite"),
2631 build_string ("Composite characters"),
2632 build_string ("Composite characters"),
2635 composite_char_row_next = 32;
2636 composite_char_col_next = 32;
2638 Vcomposite_char_string2char_hash_table =
2639 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2640 Vcomposite_char_char2string_hash_table =
2641 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2642 staticpro (&Vcomposite_char_string2char_hash_table);
2643 staticpro (&Vcomposite_char_char2string_hash_table);
2644 #endif /* ENABLE_COMPOSITE_CHARS */