1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_hiragana_jisx0208;
66 Lisp_Object Vcharset_katakana_jisx0208;
68 Lisp_Object Vcharset_chinese_big5_1;
69 Lisp_Object Vcharset_chinese_big5_2;
71 #ifdef ENABLE_COMPOSITE_CHARS
72 Lisp_Object Vcharset_composite;
74 /* Hash tables for composite chars. One maps string representing
75 composed chars to their equivalent chars; one goes the
77 Lisp_Object Vcomposite_char_char2string_hash_table;
78 Lisp_Object Vcomposite_char_string2char_hash_table;
80 static int composite_char_row_next;
81 static int composite_char_col_next;
83 #endif /* ENABLE_COMPOSITE_CHARS */
85 /* Table of charsets indexed by leading byte. */
86 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
88 /* Table of charsets indexed by type/final-byte/direction. */
90 Lisp_Object charset_by_attributes[4][128];
92 Lisp_Object charset_by_attributes[4][128][2];
96 /* Table of number of bytes in the string representation of a character
97 indexed by the first byte of that representation.
99 rep_bytes_by_first_byte(c) is more efficient than the equivalent
100 canonical computation:
102 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
104 Bytecount rep_bytes_by_first_byte[0xA0] =
105 { /* 0x00 - 0x7f are for straight ASCII */
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 /* 0x80 - 0x8f are for Dimension-1 official charsets */
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 /* 0x90 - 0x9d are for Dimension-2 official charsets */
121 /* 0x9e is for Dimension-1 private charsets */
122 /* 0x9f is for Dimension-2 private charsets */
123 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
130 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
132 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
135 for (i = 0; i < 256; i++)
137 markobj (cte->property[i]);
143 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
145 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
146 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
149 for (i = 0; i < 256; i++)
150 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
152 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
154 if (!char_byte_table_equal (cte1->property[i],
155 cte2->property[i], depth + 1))
162 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
168 char_byte_table_hash (Lisp_Object obj, int depth)
170 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
172 return internal_array_hash (cte->property, 256, depth);
175 static const struct lrecord_description char_byte_table_description[] = {
176 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
180 DEFINE_LRECORD_IMPLEMENTATION ("char-byte-table", char_byte_table,
181 mark_char_byte_table,
182 internal_object_printer,
183 0, char_byte_table_equal,
184 char_byte_table_hash,
185 char_byte_table_description,
186 struct Lisp_Char_Byte_Table);
189 make_char_byte_table (Lisp_Object initval)
193 struct Lisp_Char_Byte_Table *cte =
194 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
195 &lrecord_char_byte_table);
197 for (i = 0; i < 256; i++)
198 cte->property[i] = initval;
200 XSETCHAR_BYTE_TABLE (obj, cte);
205 copy_char_byte_table (Lisp_Object entry)
207 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
210 struct Lisp_Char_Byte_Table *ctenew =
211 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
212 &lrecord_char_byte_table);
214 for (i = 0; i < 256; i++)
216 Lisp_Object new = cte->property[i];
217 if (CHAR_BYTE_TABLE_P (new))
218 ctenew->property[i] = copy_char_byte_table (new);
220 ctenew->property[i] = new;
223 XSETCHAR_BYTE_TABLE (obj, ctenew);
229 mark_char_code_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
231 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
237 char_code_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
239 struct Lisp_Char_Code_Table *cte1 = XCHAR_CODE_TABLE (obj1);
240 struct Lisp_Char_Code_Table *cte2 = XCHAR_CODE_TABLE (obj2);
242 return char_byte_table_equal (cte1->table, cte2->table, depth + 1);
246 char_code_table_hash (Lisp_Object obj, int depth)
248 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (obj);
250 return char_code_table_hash (cte->table, depth + 1);
253 static const struct lrecord_description char_code_table_description[] = {
254 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Code_Table, table), 1 },
258 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_code_table,
259 mark_char_code_table,
260 internal_object_printer,
261 0, char_code_table_equal,
262 char_code_table_hash,
263 char_code_table_description,
264 struct Lisp_Char_Code_Table);
267 make_char_code_table (Lisp_Object initval)
270 struct Lisp_Char_Code_Table *cte =
271 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
272 &lrecord_char_code_table);
274 cte->table = make_char_byte_table (initval);
276 XSETCHAR_CODE_TABLE (obj, cte);
281 copy_char_code_table (Lisp_Object entry)
283 struct Lisp_Char_Code_Table *cte = XCHAR_CODE_TABLE (entry);
285 struct Lisp_Char_Code_Table *ctenew =
286 alloc_lcrecord_type (struct Lisp_Char_Code_Table,
287 &lrecord_char_code_table);
289 ctenew->table = copy_char_byte_table (cte->table);
290 XSETCHAR_CODE_TABLE (obj, ctenew);
296 get_char_code_table (Emchar ch, Lisp_Object table)
298 struct Lisp_Char_Byte_Table* cpt
299 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
300 Lisp_Object ret = cpt->property [ch >> 24];
302 if (CHAR_BYTE_TABLE_P (ret))
303 cpt = XCHAR_BYTE_TABLE (ret);
307 ret = cpt->property [(unsigned char) (ch >> 16)];
308 if (CHAR_BYTE_TABLE_P (ret))
309 cpt = XCHAR_BYTE_TABLE (ret);
313 ret = cpt->property [(unsigned char) (ch >> 8)];
314 if (CHAR_BYTE_TABLE_P (ret))
315 cpt = XCHAR_BYTE_TABLE (ret);
319 return cpt->property [(unsigned char) ch];
323 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
325 struct Lisp_Char_Byte_Table* cpt1
326 = XCHAR_BYTE_TABLE (XCHAR_CODE_TABLE (table)->table);
327 Lisp_Object ret = cpt1->property[ch >> 24];
329 if (CHAR_BYTE_TABLE_P (ret))
331 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
333 ret = cpt2->property[(unsigned char)(ch >> 16)];
334 if (CHAR_BYTE_TABLE_P (ret))
336 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
338 ret = cpt3->property[(unsigned char)(ch >> 8)];
339 if (CHAR_BYTE_TABLE_P (ret))
341 struct Lisp_Char_Byte_Table* cpt4
342 = XCHAR_BYTE_TABLE (ret);
344 cpt4->property[(unsigned char)ch] = value;
346 else if (!EQ (ret, value))
348 Lisp_Object cpt4 = make_char_byte_table (ret);
350 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
351 cpt3->property[(unsigned char)(ch >> 8)] = cpt4;
354 else if (!EQ (ret, value))
356 Lisp_Object cpt3 = make_char_byte_table (ret);
357 Lisp_Object cpt4 = make_char_byte_table (ret);
359 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
360 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)]
362 cpt2->property[(unsigned char)(ch >> 16)] = cpt3;
365 else if (!EQ (ret, value))
367 Lisp_Object cpt2 = make_char_byte_table (ret);
368 Lisp_Object cpt3 = make_char_byte_table (ret);
369 Lisp_Object cpt4 = make_char_byte_table (ret);
371 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
372 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] = cpt4;
373 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(ch >> 16)] = cpt3;
374 cpt1->property[(unsigned char)(ch >> 24)] = cpt2;
379 Lisp_Object Vcharacter_attribute_table;
381 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
382 Return the alist of attributes of CHARACTER.
386 return get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
389 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
390 Return the value of CHARACTER's ATTRIBUTE.
392 (character, attribute))
395 = get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
401 if (!NILP (ccs = Ffind_charset (attribute)))
404 return Fcdr (Fassq (attribute, ret));
408 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
411 Emchar char_code = XCHAR (character);
413 = get_char_code_table (char_code, Vcharacter_attribute_table);
416 cell = Fassq (attribute, ret);
420 ret = Fcons (Fcons (attribute, value), ret);
422 else if (!EQ (Fcdr (cell), value))
424 Fsetcdr (cell, value);
426 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
430 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
431 Store CHARACTER's ATTRIBUTE with VALUE.
433 (character, attribute, value))
437 ccs = Ffind_charset (attribute);
441 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
446 /* ad-hoc method for `ascii' */
447 if ((XCHARSET_CHARS (ccs) == 94) &&
448 (XCHARSET_BYTE_OFFSET (ccs) != 33))
449 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
451 ccs_len = XCHARSET_CHARS (ccs);
454 signal_simple_error ("Invalid value for coded-charset",
458 rest = Fget_char_attribute (character, attribute);
465 Lisp_Object ei = Fcar (rest);
467 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
468 nv = XVECTOR_DATA(v)[i];
475 XVECTOR_DATA(v)[i] = Qnil;
476 v = XCHARSET_DECODING_TABLE (ccs);
481 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
488 Lisp_Object ei = Fcar (rest);
491 signal_simple_error ("Invalid value for coded-charset",
493 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
494 nv = XVECTOR_DATA(v)[i];
500 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
507 XVECTOR_DATA(v)[i] = character;
509 return put_char_attribute (character, attribute, value);
514 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
515 Store character's ATTRIBUTES.
519 Lisp_Object rest = attributes;
520 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
521 Lisp_Object character;
527 Lisp_Object cell = Fcar (rest);
531 signal_simple_error ("Invalid argument", attributes);
532 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
533 && XCHARSET_FINAL (ccs))
537 if (XCHARSET_DIMENSION (ccs) == 1)
539 Lisp_Object eb1 = Fcar (Fcdr (cell));
543 signal_simple_error ("Invalid argument", attributes);
545 switch (XCHARSET_CHARS (ccs))
549 + (XCHARSET_FINAL (ccs) - '0') * 94 + (b1 - 33);
553 + (XCHARSET_FINAL (ccs) - '0') * 96 + (b1 - 32);
559 else if (XCHARSET_DIMENSION (ccs) == 2)
561 Lisp_Object eb1 = Fcar (Fcdr (cell));
562 Lisp_Object eb2 = Fcar (Fcdr (Fcdr (cell)));
566 signal_simple_error ("Invalid argument", attributes);
569 signal_simple_error ("Invalid argument", attributes);
571 switch (XCHARSET_CHARS (ccs))
574 code = MIN_CHAR_94x94
575 + (XCHARSET_FINAL (ccs) - '0') * 94 * 94
576 + (b1 - 33) * 94 + (b2 - 33);
579 code = MIN_CHAR_96x96
580 + (XCHARSET_FINAL (ccs) - '0') * 96 * 96
581 + (b1 - 32) * 96 + (b2 - 32);
592 character = make_char (code);
593 goto setup_attributes;
599 else if (!INTP (code))
600 signal_simple_error ("Invalid argument", attributes);
602 character = make_char (XINT (code));
608 Lisp_Object cell = Fcar (rest);
611 signal_simple_error ("Invalid argument", attributes);
612 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
616 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
619 Lisp_Object Vutf_2000_version;
623 int leading_code_private_11;
626 Lisp_Object Qcharsetp;
628 /* Qdoc_string, Qdimension, Qchars defined in general.c */
629 Lisp_Object Qregistry, Qfinal, Qgraphic;
630 Lisp_Object Qdirection;
631 Lisp_Object Qreverse_direction_charset;
632 Lisp_Object Qleading_byte;
633 Lisp_Object Qshort_name, Qlong_name;
649 Qjapanese_jisx0208_1978,
661 Qvietnamese_viscii_lower,
662 Qvietnamese_viscii_upper,
670 Lisp_Object Ql2r, Qr2l;
672 Lisp_Object Vcharset_hash_table;
675 static Charset_ID next_allocated_leading_byte;
677 static Charset_ID next_allocated_1_byte_leading_byte;
678 static Charset_ID next_allocated_2_byte_leading_byte;
681 /* Composite characters are characters constructed by overstriking two
682 or more regular characters.
684 1) The old Mule implementation involves storing composite characters
685 in a buffer as a tag followed by all of the actual characters
686 used to make up the composite character. I think this is a bad
687 idea; it greatly complicates code that wants to handle strings
688 one character at a time because it has to deal with the possibility
689 of great big ungainly characters. It's much more reasonable to
690 simply store an index into a table of composite characters.
692 2) The current implementation only allows for 16,384 separate
693 composite characters over the lifetime of the XEmacs process.
694 This could become a potential problem if the user
695 edited lots of different files that use composite characters.
696 Due to FSF bogosity, increasing the number of allowable
697 composite characters under Mule would decrease the number
698 of possible faces that can exist. Mule already has shrunk
699 this to 2048, and further shrinkage would become uncomfortable.
700 No such problems exist in XEmacs.
702 Composite characters could be represented as 0x80 C1 C2 C3,
703 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
704 for slightly under 2^20 (one million) composite characters
705 over the XEmacs process lifetime, and you only need to
706 increase the size of a Mule character from 19 to 21 bits.
707 Or you could use 0x80 C1 C2 C3 C4, allowing for about
708 85 million (slightly over 2^26) composite characters. */
711 /************************************************************************/
712 /* Basic Emchar functions */
713 /************************************************************************/
715 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
716 string in STR. Returns the number of bytes stored.
717 Do not call this directly. Use the macro set_charptr_emchar() instead.
721 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
736 else if ( c <= 0x7ff )
738 *p++ = (c >> 6) | 0xc0;
739 *p++ = (c & 0x3f) | 0x80;
741 else if ( c <= 0xffff )
743 *p++ = (c >> 12) | 0xe0;
744 *p++ = ((c >> 6) & 0x3f) | 0x80;
745 *p++ = (c & 0x3f) | 0x80;
747 else if ( c <= 0x1fffff )
749 *p++ = (c >> 18) | 0xf0;
750 *p++ = ((c >> 12) & 0x3f) | 0x80;
751 *p++ = ((c >> 6) & 0x3f) | 0x80;
752 *p++ = (c & 0x3f) | 0x80;
754 else if ( c <= 0x3ffffff )
756 *p++ = (c >> 24) | 0xf8;
757 *p++ = ((c >> 18) & 0x3f) | 0x80;
758 *p++ = ((c >> 12) & 0x3f) | 0x80;
759 *p++ = ((c >> 6) & 0x3f) | 0x80;
760 *p++ = (c & 0x3f) | 0x80;
764 *p++ = (c >> 30) | 0xfc;
765 *p++ = ((c >> 24) & 0x3f) | 0x80;
766 *p++ = ((c >> 18) & 0x3f) | 0x80;
767 *p++ = ((c >> 12) & 0x3f) | 0x80;
768 *p++ = ((c >> 6) & 0x3f) | 0x80;
769 *p++ = (c & 0x3f) | 0x80;
772 BREAKUP_CHAR (c, charset, c1, c2);
773 lb = CHAR_LEADING_BYTE (c);
774 if (LEADING_BYTE_PRIVATE_P (lb))
775 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
777 if (EQ (charset, Vcharset_control_1))
786 /* Return the first character from a Mule-encoded string in STR,
787 assuming it's non-ASCII. Do not call this directly.
788 Use the macro charptr_emchar() instead. */
791 non_ascii_charptr_emchar (CONST Bufbyte *str)
804 else if ( b >= 0xf8 )
809 else if ( b >= 0xf0 )
814 else if ( b >= 0xe0 )
819 else if ( b >= 0xc0 )
829 for( ; len > 0; len-- )
832 ch = ( ch << 6 ) | ( b & 0x3f );
836 Bufbyte i0 = *str, i1, i2 = 0;
839 if (i0 == LEADING_BYTE_CONTROL_1)
840 return (Emchar) (*++str - 0x20);
842 if (LEADING_BYTE_PREFIX_P (i0))
847 charset = CHARSET_BY_LEADING_BYTE (i0);
848 if (XCHARSET_DIMENSION (charset) == 2)
851 return MAKE_CHAR (charset, i1, i2);
855 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
856 Do not call this directly. Use the macro valid_char_p() instead. */
860 non_ascii_valid_char_p (Emchar ch)
864 /* Must have only lowest 19 bits set */
868 f1 = CHAR_FIELD1 (ch);
869 f2 = CHAR_FIELD2 (ch);
870 f3 = CHAR_FIELD3 (ch);
876 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
877 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
878 f2 > MAX_CHAR_FIELD2_PRIVATE)
883 if (f3 != 0x20 && f3 != 0x7F)
887 NOTE: This takes advantage of the fact that
888 FIELD2_TO_OFFICIAL_LEADING_BYTE and
889 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
891 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
892 return (XCHARSET_CHARS (charset) == 96);
898 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
899 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
900 f1 > MAX_CHAR_FIELD1_PRIVATE)
902 if (f2 < 0x20 || f3 < 0x20)
905 #ifdef ENABLE_COMPOSITE_CHARS
906 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
908 if (UNBOUNDP (Fgethash (make_int (ch),
909 Vcomposite_char_char2string_hash_table,
914 #endif /* ENABLE_COMPOSITE_CHARS */
916 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
919 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
921 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
924 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
926 return (XCHARSET_CHARS (charset) == 96);
932 /************************************************************************/
933 /* Basic string functions */
934 /************************************************************************/
936 /* Copy the character pointed to by PTR into STR, assuming it's
937 non-ASCII. Do not call this directly. Use the macro
938 charptr_copy_char() instead. */
941 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
943 Bufbyte *strptr = str;
945 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
947 /* Notice fallthrough. */
949 case 6: *++strptr = *ptr++;
950 case 5: *++strptr = *ptr++;
952 case 4: *++strptr = *ptr++;
953 case 3: *++strptr = *ptr++;
954 case 2: *++strptr = *ptr;
959 return strptr + 1 - str;
963 /************************************************************************/
964 /* streams of Emchars */
965 /************************************************************************/
967 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
968 The functions below are not meant to be called directly; use
969 the macros in insdel.h. */
972 Lstream_get_emchar_1 (Lstream *stream, int ch)
974 Bufbyte str[MAX_EMCHAR_LEN];
975 Bufbyte *strptr = str;
977 str[0] = (Bufbyte) ch;
978 switch (REP_BYTES_BY_FIRST_BYTE (ch))
980 /* Notice fallthrough. */
983 ch = Lstream_getc (stream);
985 *++strptr = (Bufbyte) ch;
987 ch = Lstream_getc (stream);
989 *++strptr = (Bufbyte) ch;
992 ch = Lstream_getc (stream);
994 *++strptr = (Bufbyte) ch;
996 ch = Lstream_getc (stream);
998 *++strptr = (Bufbyte) ch;
1000 ch = Lstream_getc (stream);
1002 *++strptr = (Bufbyte) ch;
1007 return charptr_emchar (str);
1011 Lstream_fput_emchar (Lstream *stream, Emchar ch)
1013 Bufbyte str[MAX_EMCHAR_LEN];
1014 Bytecount len = set_charptr_emchar (str, ch);
1015 return Lstream_write (stream, str, len);
1019 Lstream_funget_emchar (Lstream *stream, Emchar ch)
1021 Bufbyte str[MAX_EMCHAR_LEN];
1022 Bytecount len = set_charptr_emchar (str, ch);
1023 Lstream_unread (stream, str, len);
1027 /************************************************************************/
1028 /* charset object */
1029 /************************************************************************/
1032 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
1034 struct Lisp_Charset *cs = XCHARSET (obj);
1036 markobj (cs->short_name);
1037 markobj (cs->long_name);
1038 markobj (cs->doc_string);
1039 markobj (cs->registry);
1040 markobj (cs->ccl_program);
1042 markobj (cs->decoding_table);
1048 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
1050 struct Lisp_Charset *cs = XCHARSET (obj);
1054 error ("printing unreadable object #<charset %s 0x%x>",
1055 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
1058 write_c_string ("#<charset ", printcharfun);
1059 print_internal (CHARSET_NAME (cs), printcharfun, 0);
1060 write_c_string (" ", printcharfun);
1061 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
1062 write_c_string (" ", printcharfun);
1063 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
1064 write_c_string (" ", printcharfun);
1065 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
1066 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
1067 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1068 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1069 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1071 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1072 CHARSET_COLUMNS (cs),
1073 CHARSET_GRAPHIC (cs),
1074 CHARSET_FINAL (cs));
1075 write_c_string (buf, printcharfun);
1076 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1077 sprintf (buf, " 0x%x>", cs->header.uid);
1078 write_c_string (buf, printcharfun);
1081 static const struct lrecord_description charset_description[] = {
1082 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1084 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1089 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1090 mark_charset, print_charset, 0, 0, 0,
1091 charset_description,
1092 struct Lisp_Charset);
1094 /* Make a new charset. */
1097 make_charset (Charset_ID id, Lisp_Object name,
1098 unsigned char type, unsigned char columns, unsigned char graphic,
1099 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1100 Lisp_Object long_name, Lisp_Object doc,
1102 Lisp_Object decoding_table,
1103 Emchar ucs_min, Emchar ucs_max,
1104 Emchar code_offset, unsigned char byte_offset)
1107 struct Lisp_Charset *cs =
1108 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1109 XSETCHARSET (obj, cs);
1111 CHARSET_ID (cs) = id;
1112 CHARSET_NAME (cs) = name;
1113 CHARSET_SHORT_NAME (cs) = short_name;
1114 CHARSET_LONG_NAME (cs) = long_name;
1115 CHARSET_DIRECTION (cs) = direction;
1116 CHARSET_TYPE (cs) = type;
1117 CHARSET_COLUMNS (cs) = columns;
1118 CHARSET_GRAPHIC (cs) = graphic;
1119 CHARSET_FINAL (cs) = final;
1120 CHARSET_DOC_STRING (cs) = doc;
1121 CHARSET_REGISTRY (cs) = reg;
1122 CHARSET_CCL_PROGRAM (cs) = Qnil;
1123 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1125 CHARSET_DECODING_TABLE(cs) = Qnil;
1126 CHARSET_UCS_MIN(cs) = ucs_min;
1127 CHARSET_UCS_MAX(cs) = ucs_max;
1128 CHARSET_CODE_OFFSET(cs) = code_offset;
1129 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1132 switch (CHARSET_TYPE (cs))
1134 case CHARSET_TYPE_94:
1135 CHARSET_DIMENSION (cs) = 1;
1136 CHARSET_CHARS (cs) = 94;
1138 case CHARSET_TYPE_96:
1139 CHARSET_DIMENSION (cs) = 1;
1140 CHARSET_CHARS (cs) = 96;
1142 case CHARSET_TYPE_94X94:
1143 CHARSET_DIMENSION (cs) = 2;
1144 CHARSET_CHARS (cs) = 94;
1146 case CHARSET_TYPE_96X96:
1147 CHARSET_DIMENSION (cs) = 2;
1148 CHARSET_CHARS (cs) = 96;
1151 case CHARSET_TYPE_128:
1152 CHARSET_DIMENSION (cs) = 1;
1153 CHARSET_CHARS (cs) = 128;
1155 case CHARSET_TYPE_128X128:
1156 CHARSET_DIMENSION (cs) = 2;
1157 CHARSET_CHARS (cs) = 128;
1159 case CHARSET_TYPE_256:
1160 CHARSET_DIMENSION (cs) = 1;
1161 CHARSET_CHARS (cs) = 256;
1163 case CHARSET_TYPE_256X256:
1164 CHARSET_DIMENSION (cs) = 2;
1165 CHARSET_CHARS (cs) = 256;
1171 if (id == LEADING_BYTE_ASCII)
1172 CHARSET_REP_BYTES (cs) = 1;
1174 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1176 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1181 /* some charsets do not have final characters. This includes
1182 ASCII, Control-1, Composite, and the two faux private
1185 if (code_offset == 0)
1187 assert (NILP (charset_by_attributes[type][final]));
1188 charset_by_attributes[type][final] = obj;
1191 assert (NILP (charset_by_attributes[type][final][direction]));
1192 charset_by_attributes[type][final][direction] = obj;
1196 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1197 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1200 /* official leading byte */
1201 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1204 /* Some charsets are "faux" and don't have names or really exist at
1205 all except in the leading-byte table. */
1207 Fputhash (name, obj, Vcharset_hash_table);
1212 get_unallocated_leading_byte (int dimension)
1217 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1220 lb = next_allocated_leading_byte++;
1224 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1227 lb = next_allocated_1_byte_leading_byte++;
1231 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1234 lb = next_allocated_2_byte_leading_byte++;
1240 ("No more character sets free for this dimension",
1241 make_int (dimension));
1248 range_charset_code_point (Lisp_Object charset, Emchar ch)
1252 if ((XCHARSET_UCS_MIN (charset) <= ch)
1253 && (ch <= XCHARSET_UCS_MAX (charset)))
1255 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1257 if (XCHARSET_DIMENSION (charset) == 1)
1258 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
1259 else if (XCHARSET_DIMENSION (charset) == 2)
1260 return list2 (make_int (d / XCHARSET_CHARS (charset)
1261 + XCHARSET_BYTE_OFFSET (charset)),
1262 make_int (d % XCHARSET_CHARS (charset)
1263 + XCHARSET_BYTE_OFFSET (charset)));
1264 else if (XCHARSET_DIMENSION (charset) == 3)
1265 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1266 * XCHARSET_CHARS (charset))
1267 + XCHARSET_BYTE_OFFSET (charset)),
1268 make_int (d / XCHARSET_CHARS (charset)
1269 % XCHARSET_CHARS (charset)
1270 + XCHARSET_BYTE_OFFSET (charset)),
1271 make_int (d % XCHARSET_CHARS (charset)
1272 + XCHARSET_BYTE_OFFSET (charset)));
1273 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1274 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1275 * XCHARSET_CHARS (charset)
1276 * XCHARSET_CHARS (charset))
1277 + XCHARSET_BYTE_OFFSET (charset)),
1278 make_int (d / (XCHARSET_CHARS (charset)
1279 * XCHARSET_CHARS (charset))
1280 % XCHARSET_CHARS (charset)
1281 + XCHARSET_BYTE_OFFSET (charset)),
1282 make_int (d / XCHARSET_CHARS (charset)
1283 % XCHARSET_CHARS (charset)
1284 + XCHARSET_BYTE_OFFSET (charset)),
1285 make_int (d % XCHARSET_CHARS (charset)
1286 + XCHARSET_BYTE_OFFSET (charset)));
1288 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1290 if (XCHARSET_DIMENSION (charset) == 1)
1292 if (XCHARSET_CHARS (charset) == 94)
1294 if (((d = ch - (MIN_CHAR_94
1295 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1297 return list1 (make_int (d + 33));
1299 else if (XCHARSET_CHARS (charset) == 96)
1301 if (((d = ch - (MIN_CHAR_96
1302 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1304 return list1 (make_int (d + 32));
1309 else if (XCHARSET_DIMENSION (charset) == 2)
1311 if (XCHARSET_CHARS (charset) == 94)
1313 if (((d = ch - (MIN_CHAR_94x94
1314 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1317 return list2 (make_int ((d / 94) + 33),
1318 make_int (d % 94 + 33));
1320 else if (XCHARSET_CHARS (charset) == 96)
1322 if (((d = ch - (MIN_CHAR_96x96
1323 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1326 return list2 (make_int ((d / 96) + 32),
1327 make_int (d % 96 + 32));
1335 split_builtin_char (Emchar c)
1337 if (c < MIN_CHAR_OBS_94x94)
1339 if (c <= MAX_CHAR_BASIC_LATIN)
1341 return list2 (Vcharset_ascii, make_int (c));
1345 return list2 (Vcharset_control_1, make_int (c & 0x7F));
1349 return list2 (Vcharset_latin_iso8859_1, make_int (c & 0x7F));
1351 else if ((MIN_CHAR_GREEK <= c) && (c <= MAX_CHAR_GREEK))
1353 return list2 (Vcharset_greek_iso8859_7,
1354 make_int (c - MIN_CHAR_GREEK + 0x20));
1356 else if ((MIN_CHAR_CYRILLIC <= c) && (c <= MAX_CHAR_CYRILLIC))
1358 return list2 (Vcharset_cyrillic_iso8859_5,
1359 make_int (c - MIN_CHAR_CYRILLIC + 0x20));
1361 else if ((MIN_CHAR_HEBREW <= c) && (c <= MAX_CHAR_HEBREW))
1363 return list2 (Vcharset_hebrew_iso8859_8,
1364 make_int (c - MIN_CHAR_HEBREW + 0x20));
1366 else if ((MIN_CHAR_THAI <= c) && (c <= MAX_CHAR_THAI))
1368 return list2 (Vcharset_thai_tis620,
1369 make_int (c - MIN_CHAR_THAI + 0x20));
1371 else if ((MIN_CHAR_HALFWIDTH_KATAKANA <= c)
1372 && (c <= MAX_CHAR_HALFWIDTH_KATAKANA))
1374 return list2 (Vcharset_katakana_jisx0201,
1375 make_int (c - MIN_CHAR_HALFWIDTH_KATAKANA + 33));
1379 return list3 (Vcharset_ucs_bmp,
1380 make_int (c >> 8), make_int (c & 0xff));
1383 else if (c <= MAX_CHAR_OBS_94x94)
1385 return list3 (CHARSET_BY_ATTRIBUTES
1386 (CHARSET_TYPE_94X94,
1387 ((c - MIN_CHAR_OBS_94x94) / (94 * 94)) + '@',
1388 CHARSET_LEFT_TO_RIGHT),
1389 make_int ((((c - MIN_CHAR_OBS_94x94) / 94) % 94) + 33),
1390 make_int (((c - MIN_CHAR_OBS_94x94) % 94) + 33));
1392 else if (c <= MAX_CHAR_94)
1394 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94,
1395 ((c - MIN_CHAR_94) / 94) + '0',
1396 CHARSET_LEFT_TO_RIGHT),
1397 make_int (((c - MIN_CHAR_94) % 94) + 33));
1399 else if (c <= MAX_CHAR_96)
1401 return list2 (CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_96,
1402 ((c - MIN_CHAR_96) / 96) + '0',
1403 CHARSET_LEFT_TO_RIGHT),
1404 make_int (((c - MIN_CHAR_96) % 96) + 32));
1406 else if (c <= MAX_CHAR_94x94)
1408 return list3 (CHARSET_BY_ATTRIBUTES
1409 (CHARSET_TYPE_94X94,
1410 ((c - MIN_CHAR_94x94) / (94 * 94)) + '0',
1411 CHARSET_LEFT_TO_RIGHT),
1412 make_int ((((c - MIN_CHAR_94x94) / 94) % 94) + 33),
1413 make_int (((c - MIN_CHAR_94x94) % 94) + 33));
1415 else if (c <= MAX_CHAR_96x96)
1417 return list3 (CHARSET_BY_ATTRIBUTES
1418 (CHARSET_TYPE_96X96,
1419 ((c - MIN_CHAR_96x96) / (96 * 96)) + '0',
1420 CHARSET_LEFT_TO_RIGHT),
1421 make_int ((((c - MIN_CHAR_96x96) / 96) % 96) + 32),
1422 make_int (((c - MIN_CHAR_96x96) % 96) + 32));
1431 charset_code_point (Lisp_Object charset, Emchar ch)
1433 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1435 if (!EQ (cdef, Qnil))
1437 Lisp_Object field = Fassq (charset, cdef);
1439 if (!EQ (field, Qnil))
1440 return Fcdr (field);
1442 return range_charset_code_point (charset, ch);
1445 Lisp_Object Vdefault_coded_charset_priority_list;
1449 /************************************************************************/
1450 /* Basic charset Lisp functions */
1451 /************************************************************************/
1453 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1454 Return non-nil if OBJECT is a charset.
1458 return CHARSETP (object) ? Qt : Qnil;
1461 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1462 Retrieve the charset of the given name.
1463 If CHARSET-OR-NAME is a charset object, it is simply returned.
1464 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1465 nil is returned. Otherwise the associated charset object is returned.
1469 if (CHARSETP (charset_or_name))
1470 return charset_or_name;
1472 CHECK_SYMBOL (charset_or_name);
1473 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1476 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1477 Retrieve the charset of the given name.
1478 Same as `find-charset' except an error is signalled if there is no such
1479 charset instead of returning nil.
1483 Lisp_Object charset = Ffind_charset (name);
1486 signal_simple_error ("No such charset", name);
1490 /* We store the charsets in hash tables with the names as the key and the
1491 actual charset object as the value. Occasionally we need to use them
1492 in a list format. These routines provide us with that. */
1493 struct charset_list_closure
1495 Lisp_Object *charset_list;
1499 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1500 void *charset_list_closure)
1502 /* This function can GC */
1503 struct charset_list_closure *chcl =
1504 (struct charset_list_closure*) charset_list_closure;
1505 Lisp_Object *charset_list = chcl->charset_list;
1507 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1511 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1512 Return a list of the names of all defined charsets.
1516 Lisp_Object charset_list = Qnil;
1517 struct gcpro gcpro1;
1518 struct charset_list_closure charset_list_closure;
1520 GCPRO1 (charset_list);
1521 charset_list_closure.charset_list = &charset_list;
1522 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1523 &charset_list_closure);
1526 return charset_list;
1529 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1530 Return the name of the given charset.
1534 return XCHARSET_NAME (Fget_charset (charset));
1537 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1538 Define a new character set.
1539 This function is for use with Mule support.
1540 NAME is a symbol, the name by which the character set is normally referred.
1541 DOC-STRING is a string describing the character set.
1542 PROPS is a property list, describing the specific nature of the
1543 character set. Recognized properties are:
1545 'short-name Short version of the charset name (ex: Latin-1)
1546 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1547 'registry A regular expression matching the font registry field for
1549 'dimension Number of octets used to index a character in this charset.
1550 Either 1 or 2. Defaults to 1.
1551 'columns Number of columns used to display a character in this charset.
1552 Only used in TTY mode. (Under X, the actual width of a
1553 character can be derived from the font used to display the
1554 characters.) If unspecified, defaults to the dimension
1555 (this is almost always the correct value).
1556 'chars Number of characters in each dimension (94 or 96).
1557 Defaults to 94. Note that if the dimension is 2, the
1558 character set thus described is 94x94 or 96x96.
1559 'final Final byte of ISO 2022 escape sequence. Must be
1560 supplied. Each combination of (DIMENSION, CHARS) defines a
1561 separate namespace for final bytes. Note that ISO
1562 2022 restricts the final byte to the range
1563 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1564 dimension == 2. Note also that final bytes in the range
1565 0x30 - 0x3F are reserved for user-defined (not official)
1567 'graphic 0 (use left half of font on output) or 1 (use right half
1568 of font on output). Defaults to 0. For example, for
1569 a font whose registry is ISO8859-1, the left half
1570 (octets 0x20 - 0x7F) is the `ascii' character set, while
1571 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1572 character set. With 'graphic set to 0, the octets
1573 will have their high bit cleared; with it set to 1,
1574 the octets will have their high bit set.
1575 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1577 'ccl-program A compiled CCL program used to convert a character in
1578 this charset into an index into the font. This is in
1579 addition to the 'graphic property. The CCL program
1580 is passed the octets of the character, with the high
1581 bit cleared and set depending upon whether the value
1582 of the 'graphic property is 0 or 1.
1584 (name, doc_string, props))
1586 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1587 int direction = CHARSET_LEFT_TO_RIGHT;
1589 Lisp_Object registry = Qnil;
1590 Lisp_Object charset;
1591 Lisp_Object rest, keyword, value;
1592 Lisp_Object ccl_program = Qnil;
1593 Lisp_Object short_name = Qnil, long_name = Qnil;
1594 int byte_offset = -1;
1596 CHECK_SYMBOL (name);
1597 if (!NILP (doc_string))
1598 CHECK_STRING (doc_string);
1600 charset = Ffind_charset (name);
1601 if (!NILP (charset))
1602 signal_simple_error ("Cannot redefine existing charset", name);
1604 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1606 if (EQ (keyword, Qshort_name))
1608 CHECK_STRING (value);
1612 if (EQ (keyword, Qlong_name))
1614 CHECK_STRING (value);
1618 else if (EQ (keyword, Qdimension))
1621 dimension = XINT (value);
1622 if (dimension < 1 || dimension > 2)
1623 signal_simple_error ("Invalid value for 'dimension", value);
1626 else if (EQ (keyword, Qchars))
1629 chars = XINT (value);
1630 if (chars != 94 && chars != 96)
1631 signal_simple_error ("Invalid value for 'chars", value);
1634 else if (EQ (keyword, Qcolumns))
1637 columns = XINT (value);
1638 if (columns != 1 && columns != 2)
1639 signal_simple_error ("Invalid value for 'columns", value);
1642 else if (EQ (keyword, Qgraphic))
1645 graphic = XINT (value);
1647 if (graphic < 0 || graphic > 2)
1649 if (graphic < 0 || graphic > 1)
1651 signal_simple_error ("Invalid value for 'graphic", value);
1654 else if (EQ (keyword, Qregistry))
1656 CHECK_STRING (value);
1660 else if (EQ (keyword, Qdirection))
1662 if (EQ (value, Ql2r))
1663 direction = CHARSET_LEFT_TO_RIGHT;
1664 else if (EQ (value, Qr2l))
1665 direction = CHARSET_RIGHT_TO_LEFT;
1667 signal_simple_error ("Invalid value for 'direction", value);
1670 else if (EQ (keyword, Qfinal))
1672 CHECK_CHAR_COERCE_INT (value);
1673 final = XCHAR (value);
1674 if (final < '0' || final > '~')
1675 signal_simple_error ("Invalid value for 'final", value);
1678 else if (EQ (keyword, Qccl_program))
1680 CHECK_VECTOR (value);
1681 ccl_program = value;
1685 signal_simple_error ("Unrecognized property", keyword);
1689 error ("'final must be specified");
1690 if (dimension == 2 && final > 0x5F)
1692 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1696 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1698 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1700 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1701 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1703 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1705 id = get_unallocated_leading_byte (dimension);
1707 if (NILP (doc_string))
1708 doc_string = build_string ("");
1710 if (NILP (registry))
1711 registry = build_string ("");
1713 if (NILP (short_name))
1714 XSETSTRING (short_name, XSYMBOL (name)->name);
1716 if (NILP (long_name))
1717 long_name = doc_string;
1720 columns = dimension;
1722 if (byte_offset < 0)
1726 else if (chars == 96)
1732 charset = make_charset (id, name, type, columns, graphic,
1733 final, direction, short_name, long_name,
1734 doc_string, registry,
1735 Qnil, 0, 0, 0, byte_offset);
1736 if (!NILP (ccl_program))
1737 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1741 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1743 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1744 NEW-NAME is the name of the new charset. Return the new charset.
1746 (charset, new_name))
1748 Lisp_Object new_charset = Qnil;
1749 int id, dimension, columns, graphic, final;
1750 int direction, type;
1751 Lisp_Object registry, doc_string, short_name, long_name;
1752 struct Lisp_Charset *cs;
1754 charset = Fget_charset (charset);
1755 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1756 signal_simple_error ("Charset already has reverse-direction charset",
1759 CHECK_SYMBOL (new_name);
1760 if (!NILP (Ffind_charset (new_name)))
1761 signal_simple_error ("Cannot redefine existing charset", new_name);
1763 cs = XCHARSET (charset);
1765 type = CHARSET_TYPE (cs);
1766 columns = CHARSET_COLUMNS (cs);
1767 dimension = CHARSET_DIMENSION (cs);
1768 id = get_unallocated_leading_byte (dimension);
1770 graphic = CHARSET_GRAPHIC (cs);
1771 final = CHARSET_FINAL (cs);
1772 direction = CHARSET_RIGHT_TO_LEFT;
1773 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1774 direction = CHARSET_LEFT_TO_RIGHT;
1775 doc_string = CHARSET_DOC_STRING (cs);
1776 short_name = CHARSET_SHORT_NAME (cs);
1777 long_name = CHARSET_LONG_NAME (cs);
1778 registry = CHARSET_REGISTRY (cs);
1780 new_charset = make_charset (id, new_name, type, columns,
1781 graphic, final, direction, short_name, long_name,
1782 doc_string, registry,
1784 CHARSET_DECODING_TABLE(cs),
1785 CHARSET_UCS_MIN(cs),
1786 CHARSET_UCS_MAX(cs),
1787 CHARSET_CODE_OFFSET(cs),
1788 CHARSET_BYTE_OFFSET(cs)
1794 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1795 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1800 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1801 Define symbol ALIAS as an alias for CHARSET.
1805 CHECK_SYMBOL (alias);
1806 charset = Fget_charset (charset);
1807 return Fputhash (alias, charset, Vcharset_hash_table);
1810 /* #### Reverse direction charsets not yet implemented. */
1812 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1814 Return the reverse-direction charset parallel to CHARSET, if any.
1815 This is the charset with the same properties (in particular, the same
1816 dimension, number of characters per dimension, and final byte) as
1817 CHARSET but whose characters are displayed in the opposite direction.
1821 charset = Fget_charset (charset);
1822 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1826 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1827 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1828 If DIRECTION is omitted, both directions will be checked (left-to-right
1829 will be returned if character sets exist for both directions).
1831 (dimension, chars, final, direction))
1833 int dm, ch, fi, di = -1;
1835 Lisp_Object obj = Qnil;
1837 CHECK_INT (dimension);
1838 dm = XINT (dimension);
1839 if (dm < 1 || dm > 2)
1840 signal_simple_error ("Invalid value for DIMENSION", dimension);
1844 if (ch != 94 && ch != 96)
1845 signal_simple_error ("Invalid value for CHARS", chars);
1847 CHECK_CHAR_COERCE_INT (final);
1849 if (fi < '0' || fi > '~')
1850 signal_simple_error ("Invalid value for FINAL", final);
1852 if (EQ (direction, Ql2r))
1853 di = CHARSET_LEFT_TO_RIGHT;
1854 else if (EQ (direction, Qr2l))
1855 di = CHARSET_RIGHT_TO_LEFT;
1856 else if (!NILP (direction))
1857 signal_simple_error ("Invalid value for DIRECTION", direction);
1859 if (dm == 2 && fi > 0x5F)
1861 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1864 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1866 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1870 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1872 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1875 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1878 return XCHARSET_NAME (obj);
1882 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1883 Return short name of CHARSET.
1887 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1890 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1891 Return long name of CHARSET.
1895 return XCHARSET_LONG_NAME (Fget_charset (charset));
1898 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1899 Return description of CHARSET.
1903 return XCHARSET_DOC_STRING (Fget_charset (charset));
1906 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1907 Return dimension of CHARSET.
1911 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1914 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1915 Return property PROP of CHARSET.
1916 Recognized properties are those listed in `make-charset', as well as
1917 'name and 'doc-string.
1921 struct Lisp_Charset *cs;
1923 charset = Fget_charset (charset);
1924 cs = XCHARSET (charset);
1926 CHECK_SYMBOL (prop);
1927 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1928 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1929 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1930 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1931 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1932 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1933 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1934 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1935 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1936 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1937 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1938 if (EQ (prop, Qdirection))
1939 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1940 if (EQ (prop, Qreverse_direction_charset))
1942 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1946 return XCHARSET_NAME (obj);
1948 signal_simple_error ("Unrecognized charset property name", prop);
1949 return Qnil; /* not reached */
1952 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1953 Return charset identification number of CHARSET.
1957 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1960 /* #### We need to figure out which properties we really want to
1963 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1964 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1966 (charset, ccl_program))
1968 charset = Fget_charset (charset);
1969 CHECK_VECTOR (ccl_program);
1970 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1975 invalidate_charset_font_caches (Lisp_Object charset)
1977 /* Invalidate font cache entries for charset on all devices. */
1978 Lisp_Object devcons, concons, hash_table;
1979 DEVICE_LOOP_NO_BREAK (devcons, concons)
1981 struct device *d = XDEVICE (XCAR (devcons));
1982 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1983 if (!UNBOUNDP (hash_table))
1984 Fclrhash (hash_table);
1988 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1989 Set the 'registry property of CHARSET to REGISTRY.
1991 (charset, registry))
1993 charset = Fget_charset (charset);
1994 CHECK_STRING (registry);
1995 XCHARSET_REGISTRY (charset) = registry;
1996 invalidate_charset_font_caches (charset);
1997 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
2002 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
2003 Return mapping-table of CHARSET.
2007 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
2010 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
2011 Set mapping-table of CHARSET to TABLE.
2015 struct Lisp_Charset *cs;
2016 Lisp_Object old_table;
2019 charset = Fget_charset (charset);
2020 cs = XCHARSET (charset);
2022 if (EQ (table, Qnil))
2024 CHARSET_DECODING_TABLE(cs) = table;
2027 else if (VECTORP (table))
2029 if (XVECTOR_LENGTH (table) > CHARSET_CHARS (cs))
2030 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
2031 old_table = CHARSET_DECODING_TABLE(cs);
2032 CHARSET_DECODING_TABLE(cs) = table;
2035 signal_error (Qwrong_type_argument,
2036 list2 (build_translated_string ("vector-or-nil-p"),
2038 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
2040 switch (CHARSET_DIMENSION (cs))
2043 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2045 Lisp_Object c = XVECTOR_DATA(table)[i];
2050 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2054 for (i = 0; i < XVECTOR_LENGTH (table); i++)
2056 Lisp_Object v = XVECTOR_DATA(table)[i];
2062 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
2064 CHARSET_DECODING_TABLE(cs) = old_table;
2065 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
2067 for (j = 0; j < XVECTOR_LENGTH (v); j++)
2069 Lisp_Object c = XVECTOR_DATA(v)[j];
2072 put_char_attribute (c, charset,
2075 (i + CHARSET_BYTE_OFFSET (cs)),
2077 (j + CHARSET_BYTE_OFFSET (cs))));
2081 put_char_attribute (v, charset,
2083 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
2092 /************************************************************************/
2093 /* Lisp primitives for working with characters */
2094 /************************************************************************/
2096 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
2097 Make a character from CHARSET and octets ARG1 and ARG2.
2098 ARG2 is required only for characters from two-dimensional charsets.
2099 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
2100 character s with caron.
2102 (charset, arg1, arg2))
2104 struct Lisp_Charset *cs;
2106 int lowlim, highlim;
2108 charset = Fget_charset (charset);
2109 cs = XCHARSET (charset);
2111 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
2112 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
2114 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
2116 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
2117 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
2120 /* It is useful (and safe, according to Olivier Galibert) to strip
2121 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
2122 write (make-char 'latin-iso8859-2 CODE) where code is the actual
2123 Latin 2 code of the character. */
2131 if (a1 < lowlim || a1 > highlim)
2132 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
2134 if (CHARSET_DIMENSION (cs) == 1)
2138 ("Charset is of dimension one; second octet must be nil", arg2);
2139 return make_char (MAKE_CHAR (charset, a1, 0));
2148 a2 = XINT (arg2) & 0x7f;
2150 if (a2 < lowlim || a2 > highlim)
2151 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
2153 return make_char (MAKE_CHAR (charset, a1, a2));
2156 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
2157 Return the character set of char CH.
2161 CHECK_CHAR_COERCE_INT (ch);
2163 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2166 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2167 Return list of charset and one or two position-codes of CHAR.
2171 /* This function can GC */
2172 struct gcpro gcpro1, gcpro2;
2173 Lisp_Object charset = Qnil;
2174 Lisp_Object rc = Qnil;
2177 GCPRO2 (charset, rc);
2178 CHECK_CHAR_COERCE_INT (character);
2180 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2182 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2184 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2188 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2196 #ifdef ENABLE_COMPOSITE_CHARS
2197 /************************************************************************/
2198 /* composite character functions */
2199 /************************************************************************/
2202 lookup_composite_char (Bufbyte *str, int len)
2204 Lisp_Object lispstr = make_string (str, len);
2205 Lisp_Object ch = Fgethash (lispstr,
2206 Vcomposite_char_string2char_hash_table,
2212 if (composite_char_row_next >= 128)
2213 signal_simple_error ("No more composite chars available", lispstr);
2214 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2215 composite_char_col_next);
2216 Fputhash (make_char (emch), lispstr,
2217 Vcomposite_char_char2string_hash_table);
2218 Fputhash (lispstr, make_char (emch),
2219 Vcomposite_char_string2char_hash_table);
2220 composite_char_col_next++;
2221 if (composite_char_col_next >= 128)
2223 composite_char_col_next = 32;
2224 composite_char_row_next++;
2233 composite_char_string (Emchar ch)
2235 Lisp_Object str = Fgethash (make_char (ch),
2236 Vcomposite_char_char2string_hash_table,
2238 assert (!UNBOUNDP (str));
2242 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2243 Convert a string into a single composite character.
2244 The character is the result of overstriking all the characters in
2249 CHECK_STRING (string);
2250 return make_char (lookup_composite_char (XSTRING_DATA (string),
2251 XSTRING_LENGTH (string)));
2254 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2255 Return a string of the characters comprising a composite character.
2263 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2264 signal_simple_error ("Must be composite char", ch);
2265 return composite_char_string (emch);
2267 #endif /* ENABLE_COMPOSITE_CHARS */
2270 /************************************************************************/
2271 /* initialization */
2272 /************************************************************************/
2275 syms_of_mule_charset (void)
2277 DEFSUBR (Fcharsetp);
2278 DEFSUBR (Ffind_charset);
2279 DEFSUBR (Fget_charset);
2280 DEFSUBR (Fcharset_list);
2281 DEFSUBR (Fcharset_name);
2282 DEFSUBR (Fmake_charset);
2283 DEFSUBR (Fmake_reverse_direction_charset);
2284 /* DEFSUBR (Freverse_direction_charset); */
2285 DEFSUBR (Fdefine_charset_alias);
2286 DEFSUBR (Fcharset_from_attributes);
2287 DEFSUBR (Fcharset_short_name);
2288 DEFSUBR (Fcharset_long_name);
2289 DEFSUBR (Fcharset_description);
2290 DEFSUBR (Fcharset_dimension);
2291 DEFSUBR (Fcharset_property);
2292 DEFSUBR (Fcharset_id);
2293 DEFSUBR (Fset_charset_ccl_program);
2294 DEFSUBR (Fset_charset_registry);
2296 DEFSUBR (Fchar_attribute_alist);
2297 DEFSUBR (Fget_char_attribute);
2298 DEFSUBR (Fput_char_attribute);
2299 DEFSUBR (Fdefine_char);
2300 DEFSUBR (Fcharset_mapping_table);
2301 DEFSUBR (Fset_charset_mapping_table);
2304 DEFSUBR (Fmake_char);
2305 DEFSUBR (Fchar_charset);
2306 DEFSUBR (Fsplit_char);
2308 #ifdef ENABLE_COMPOSITE_CHARS
2309 DEFSUBR (Fmake_composite_char);
2310 DEFSUBR (Fcomposite_char_string);
2313 defsymbol (&Qcharsetp, "charsetp");
2314 defsymbol (&Qregistry, "registry");
2315 defsymbol (&Qfinal, "final");
2316 defsymbol (&Qgraphic, "graphic");
2317 defsymbol (&Qdirection, "direction");
2318 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2319 defsymbol (&Qshort_name, "short-name");
2320 defsymbol (&Qlong_name, "long-name");
2322 defsymbol (&Ql2r, "l2r");
2323 defsymbol (&Qr2l, "r2l");
2325 /* Charsets, compatible with FSF 20.3
2326 Naming convention is Script-Charset[-Edition] */
2327 defsymbol (&Qascii, "ascii");
2328 defsymbol (&Qcontrol_1, "control-1");
2329 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2330 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2331 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2332 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2333 defsymbol (&Qthai_tis620, "thai-tis620");
2334 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2335 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2336 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2337 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2338 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2339 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2340 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2341 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2342 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2343 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2344 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2345 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2346 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2347 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2349 defsymbol (&Qucs, "ucs");
2350 defsymbol (&Qucs_bmp, "ucs-bmp");
2351 defsymbol (&Qlatin_viscii, "latin-viscii");
2352 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2353 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2354 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2355 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2356 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2357 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2359 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2360 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2362 defsymbol (&Qcomposite, "composite");
2366 vars_of_mule_charset (void)
2373 /* Table of charsets indexed by leading byte. */
2374 for (i = 0; i < countof (charset_by_leading_byte); i++)
2375 charset_by_leading_byte[i] = Qnil;
2378 /* Table of charsets indexed by type/final-byte. */
2379 for (i = 0; i < countof (charset_by_attributes); i++)
2380 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2381 charset_by_attributes[i][j] = Qnil;
2383 /* Table of charsets indexed by type/final-byte/direction. */
2384 for (i = 0; i < countof (charset_by_attributes); i++)
2385 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2386 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2387 charset_by_attributes[i][j][k] = Qnil;
2391 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2393 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2394 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2398 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2399 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2400 Leading-code of private TYPE9N charset of column-width 1.
2402 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2406 Vutf_2000_version = build_string("0.12 (Kashiwara)");
2407 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2408 Version number of UTF-2000.
2411 staticpro (&Vcharacter_attribute_table);
2412 Vcharacter_attribute_table = make_char_code_table (Qnil);
2414 Vdefault_coded_charset_priority_list = Qnil;
2415 DEFVAR_LISP ("default-coded-charset-priority-list",
2416 &Vdefault_coded_charset_priority_list /*
2417 Default order of preferred coded-character-sets.
2423 complex_vars_of_mule_charset (void)
2425 staticpro (&Vcharset_hash_table);
2426 Vcharset_hash_table =
2427 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2429 /* Predefined character sets. We store them into variables for
2434 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2435 CHARSET_TYPE_256X256, 1, 2, 0,
2436 CHARSET_LEFT_TO_RIGHT,
2437 build_string ("BMP"),
2438 build_string ("BMP"),
2439 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2440 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2441 Qnil, 0, 0xFFFF, 0, 0);
2443 # define MIN_CHAR_THAI 0
2444 # define MAX_CHAR_THAI 0
2445 # define MIN_CHAR_GREEK 0
2446 # define MAX_CHAR_GREEK 0
2447 # define MIN_CHAR_HEBREW 0
2448 # define MAX_CHAR_HEBREW 0
2449 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2450 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2451 # define MIN_CHAR_CYRILLIC 0
2452 # define MAX_CHAR_CYRILLIC 0
2455 make_charset (LEADING_BYTE_ASCII, Qascii,
2456 CHARSET_TYPE_94, 1, 0, 'B',
2457 CHARSET_LEFT_TO_RIGHT,
2458 build_string ("ASCII"),
2459 build_string ("ASCII)"),
2460 build_string ("ASCII (ISO646 IRV)"),
2461 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2462 Qnil, 0, 0x7F, 0, 0);
2463 Vcharset_control_1 =
2464 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2465 CHARSET_TYPE_94, 1, 1, 0,
2466 CHARSET_LEFT_TO_RIGHT,
2467 build_string ("C1"),
2468 build_string ("Control characters"),
2469 build_string ("Control characters 128-191"),
2471 Qnil, 0x80, 0x9F, 0, 0);
2472 Vcharset_latin_iso8859_1 =
2473 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2474 CHARSET_TYPE_96, 1, 1, 'A',
2475 CHARSET_LEFT_TO_RIGHT,
2476 build_string ("Latin-1"),
2477 build_string ("ISO8859-1 (Latin-1)"),
2478 build_string ("ISO8859-1 (Latin-1)"),
2479 build_string ("iso8859-1"),
2480 Qnil, 0xA0, 0xFF, 0, 32);
2481 Vcharset_latin_iso8859_2 =
2482 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2483 CHARSET_TYPE_96, 1, 1, 'B',
2484 CHARSET_LEFT_TO_RIGHT,
2485 build_string ("Latin-2"),
2486 build_string ("ISO8859-2 (Latin-2)"),
2487 build_string ("ISO8859-2 (Latin-2)"),
2488 build_string ("iso8859-2"),
2490 Vcharset_latin_iso8859_3 =
2491 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2492 CHARSET_TYPE_96, 1, 1, 'C',
2493 CHARSET_LEFT_TO_RIGHT,
2494 build_string ("Latin-3"),
2495 build_string ("ISO8859-3 (Latin-3)"),
2496 build_string ("ISO8859-3 (Latin-3)"),
2497 build_string ("iso8859-3"),
2499 Vcharset_latin_iso8859_4 =
2500 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2501 CHARSET_TYPE_96, 1, 1, 'D',
2502 CHARSET_LEFT_TO_RIGHT,
2503 build_string ("Latin-4"),
2504 build_string ("ISO8859-4 (Latin-4)"),
2505 build_string ("ISO8859-4 (Latin-4)"),
2506 build_string ("iso8859-4"),
2508 Vcharset_thai_tis620 =
2509 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2510 CHARSET_TYPE_96, 1, 1, 'T',
2511 CHARSET_LEFT_TO_RIGHT,
2512 build_string ("TIS620"),
2513 build_string ("TIS620 (Thai)"),
2514 build_string ("TIS620.2529 (Thai)"),
2515 build_string ("tis620"),
2516 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2517 Vcharset_greek_iso8859_7 =
2518 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2519 CHARSET_TYPE_96, 1, 1, 'F',
2520 CHARSET_LEFT_TO_RIGHT,
2521 build_string ("ISO8859-7"),
2522 build_string ("ISO8859-7 (Greek)"),
2523 build_string ("ISO8859-7 (Greek)"),
2524 build_string ("iso8859-7"),
2525 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2526 Vcharset_arabic_iso8859_6 =
2527 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2528 CHARSET_TYPE_96, 1, 1, 'G',
2529 CHARSET_RIGHT_TO_LEFT,
2530 build_string ("ISO8859-6"),
2531 build_string ("ISO8859-6 (Arabic)"),
2532 build_string ("ISO8859-6 (Arabic)"),
2533 build_string ("iso8859-6"),
2535 Vcharset_hebrew_iso8859_8 =
2536 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2537 CHARSET_TYPE_96, 1, 1, 'H',
2538 CHARSET_RIGHT_TO_LEFT,
2539 build_string ("ISO8859-8"),
2540 build_string ("ISO8859-8 (Hebrew)"),
2541 build_string ("ISO8859-8 (Hebrew)"),
2542 build_string ("iso8859-8"),
2543 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2544 Vcharset_katakana_jisx0201 =
2545 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2546 CHARSET_TYPE_94, 1, 1, 'I',
2547 CHARSET_LEFT_TO_RIGHT,
2548 build_string ("JISX0201 Kana"),
2549 build_string ("JISX0201.1976 (Japanese Kana)"),
2550 build_string ("JISX0201.1976 Japanese Kana"),
2551 build_string ("jisx0201\\.1976"),
2553 MIN_CHAR_HALFWIDTH_KATAKANA,
2554 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2555 Vcharset_latin_jisx0201 =
2556 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2557 CHARSET_TYPE_94, 1, 0, 'J',
2558 CHARSET_LEFT_TO_RIGHT,
2559 build_string ("JISX0201 Roman"),
2560 build_string ("JISX0201.1976 (Japanese Roman)"),
2561 build_string ("JISX0201.1976 Japanese Roman"),
2562 build_string ("jisx0201\\.1976"),
2564 Vcharset_cyrillic_iso8859_5 =
2565 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2566 CHARSET_TYPE_96, 1, 1, 'L',
2567 CHARSET_LEFT_TO_RIGHT,
2568 build_string ("ISO8859-5"),
2569 build_string ("ISO8859-5 (Cyrillic)"),
2570 build_string ("ISO8859-5 (Cyrillic)"),
2571 build_string ("iso8859-5"),
2572 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2573 Vcharset_latin_iso8859_9 =
2574 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2575 CHARSET_TYPE_96, 1, 1, 'M',
2576 CHARSET_LEFT_TO_RIGHT,
2577 build_string ("Latin-5"),
2578 build_string ("ISO8859-9 (Latin-5)"),
2579 build_string ("ISO8859-9 (Latin-5)"),
2580 build_string ("iso8859-9"),
2582 Vcharset_japanese_jisx0208_1978 =
2583 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2584 CHARSET_TYPE_94X94, 2, 0, '@',
2585 CHARSET_LEFT_TO_RIGHT,
2586 build_string ("JIS X0208:1978"),
2587 build_string ("JIS X0208:1978 (Japanese)"),
2589 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2590 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2592 Vcharset_chinese_gb2312 =
2593 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2594 CHARSET_TYPE_94X94, 2, 0, 'A',
2595 CHARSET_LEFT_TO_RIGHT,
2596 build_string ("GB2312"),
2597 build_string ("GB2312)"),
2598 build_string ("GB2312 Chinese simplified"),
2599 build_string ("gb2312"),
2601 Vcharset_japanese_jisx0208 =
2602 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2603 CHARSET_TYPE_94X94, 2, 0, 'B',
2604 CHARSET_LEFT_TO_RIGHT,
2605 build_string ("JISX0208"),
2606 build_string ("JIS X0208:1983 (Japanese)"),
2607 build_string ("JIS X0208:1983 Japanese Kanji"),
2608 build_string ("jisx0208\\.1983"),
2610 Vcharset_korean_ksc5601 =
2611 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2612 CHARSET_TYPE_94X94, 2, 0, 'C',
2613 CHARSET_LEFT_TO_RIGHT,
2614 build_string ("KSC5601"),
2615 build_string ("KSC5601 (Korean"),
2616 build_string ("KSC5601 Korean Hangul and Hanja"),
2617 build_string ("ksc5601"),
2619 Vcharset_japanese_jisx0212 =
2620 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2621 CHARSET_TYPE_94X94, 2, 0, 'D',
2622 CHARSET_LEFT_TO_RIGHT,
2623 build_string ("JISX0212"),
2624 build_string ("JISX0212 (Japanese)"),
2625 build_string ("JISX0212 Japanese Supplement"),
2626 build_string ("jisx0212"),
2629 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2630 Vcharset_chinese_cns11643_1 =
2631 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2632 CHARSET_TYPE_94X94, 2, 0, 'G',
2633 CHARSET_LEFT_TO_RIGHT,
2634 build_string ("CNS11643-1"),
2635 build_string ("CNS11643-1 (Chinese traditional)"),
2637 ("CNS 11643 Plane 1 Chinese traditional"),
2638 build_string (CHINESE_CNS_PLANE_RE("1")),
2640 Vcharset_chinese_cns11643_2 =
2641 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2642 CHARSET_TYPE_94X94, 2, 0, 'H',
2643 CHARSET_LEFT_TO_RIGHT,
2644 build_string ("CNS11643-2"),
2645 build_string ("CNS11643-2 (Chinese traditional)"),
2647 ("CNS 11643 Plane 2 Chinese traditional"),
2648 build_string (CHINESE_CNS_PLANE_RE("2")),
2651 Vcharset_latin_viscii_lower =
2652 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2653 CHARSET_TYPE_96, 1, 1, '1',
2654 CHARSET_LEFT_TO_RIGHT,
2655 build_string ("VISCII lower"),
2656 build_string ("VISCII lower (Vietnamese)"),
2657 build_string ("VISCII lower (Vietnamese)"),
2658 build_string ("MULEVISCII-LOWER"),
2660 Vcharset_latin_viscii_upper =
2661 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2662 CHARSET_TYPE_96, 1, 1, '2',
2663 CHARSET_LEFT_TO_RIGHT,
2664 build_string ("VISCII upper"),
2665 build_string ("VISCII upper (Vietnamese)"),
2666 build_string ("VISCII upper (Vietnamese)"),
2667 build_string ("MULEVISCII-UPPER"),
2669 Vcharset_latin_viscii =
2670 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2671 CHARSET_TYPE_256, 1, 2, 0,
2672 CHARSET_LEFT_TO_RIGHT,
2673 build_string ("VISCII"),
2674 build_string ("VISCII 1.1 (Vietnamese)"),
2675 build_string ("VISCII 1.1 (Vietnamese)"),
2676 build_string ("VISCII1\\.1"),
2678 Vcharset_hiragana_jisx0208 =
2679 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2680 CHARSET_TYPE_94X94, 2, 0, 'B',
2681 CHARSET_LEFT_TO_RIGHT,
2682 build_string ("Hiragana"),
2683 build_string ("Hiragana of JIS X0208"),
2684 build_string ("Japanese Hiragana of JIS X0208"),
2685 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2686 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2687 (0x24 - 33) * 94 + (0x21 - 33), 33);
2688 Vcharset_katakana_jisx0208 =
2689 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2690 CHARSET_TYPE_94X94, 2, 0, 'B',
2691 CHARSET_LEFT_TO_RIGHT,
2692 build_string ("Katakana"),
2693 build_string ("Katakana of JIS X0208"),
2694 build_string ("Japanese Katakana of JIS X0208"),
2695 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2696 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2697 (0x25 - 33) * 94 + (0x21 - 33), 33);
2699 Vcharset_chinese_big5_1 =
2700 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2701 CHARSET_TYPE_94X94, 2, 0, '0',
2702 CHARSET_LEFT_TO_RIGHT,
2703 build_string ("Big5"),
2704 build_string ("Big5 (Level-1)"),
2706 ("Big5 Level-1 Chinese traditional"),
2707 build_string ("big5"),
2709 Vcharset_chinese_big5_2 =
2710 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2711 CHARSET_TYPE_94X94, 2, 0, '1',
2712 CHARSET_LEFT_TO_RIGHT,
2713 build_string ("Big5"),
2714 build_string ("Big5 (Level-2)"),
2716 ("Big5 Level-2 Chinese traditional"),
2717 build_string ("big5"),
2720 #ifdef ENABLE_COMPOSITE_CHARS
2721 /* #### For simplicity, we put composite chars into a 96x96 charset.
2722 This is going to lead to problems because you can run out of
2723 room, esp. as we don't yet recycle numbers. */
2724 Vcharset_composite =
2725 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2726 CHARSET_TYPE_96X96, 2, 0, 0,
2727 CHARSET_LEFT_TO_RIGHT,
2728 build_string ("Composite"),
2729 build_string ("Composite characters"),
2730 build_string ("Composite characters"),
2733 composite_char_row_next = 32;
2734 composite_char_col_next = 32;
2736 Vcomposite_char_string2char_hash_table =
2737 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2738 Vcomposite_char_char2string_hash_table =
2739 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2740 staticpro (&Vcomposite_char_string2char_hash_table);
2741 staticpro (&Vcomposite_char_char2string_hash_table);
2742 #endif /* ENABLE_COMPOSITE_CHARS */