1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_hiragana_jisx0208;
66 Lisp_Object Vcharset_katakana_jisx0208;
68 Lisp_Object Vcharset_chinese_big5_1;
69 Lisp_Object Vcharset_chinese_big5_2;
71 #ifdef ENABLE_COMPOSITE_CHARS
72 Lisp_Object Vcharset_composite;
74 /* Hash tables for composite chars. One maps string representing
75 composed chars to their equivalent chars; one goes the
77 Lisp_Object Vcomposite_char_char2string_hash_table;
78 Lisp_Object Vcomposite_char_string2char_hash_table;
80 static int composite_char_row_next;
81 static int composite_char_col_next;
83 #endif /* ENABLE_COMPOSITE_CHARS */
85 /* Table of charsets indexed by leading byte. */
86 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
88 /* Table of charsets indexed by type/final-byte/direction. */
90 Lisp_Object charset_by_attributes[4][128];
92 Lisp_Object charset_by_attributes[4][128][2];
96 /* Table of number of bytes in the string representation of a character
97 indexed by the first byte of that representation.
99 rep_bytes_by_first_byte(c) is more efficient than the equivalent
100 canonical computation:
102 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
104 Bytecount rep_bytes_by_first_byte[0xA0] =
105 { /* 0x00 - 0x7f are for straight ASCII */
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 /* 0x80 - 0x8f are for Dimension-1 official charsets */
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 /* 0x90 - 0x9d are for Dimension-2 official charsets */
121 /* 0x9e is for Dimension-1 private charsets */
122 /* 0x9f is for Dimension-2 private charsets */
123 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
129 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
131 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
134 for (i = 0; i < 256; i++)
136 markobj (cte->property[i]);
142 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
144 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
145 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
148 for (i = 0; i < 256; i++)
149 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
151 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
153 if (!char_byte_table_equal (cte1->property[i],
154 cte2->property[i], depth + 1))
161 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
167 char_byte_table_hash (Lisp_Object obj, int depth)
169 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
171 return internal_array_hash (cte->property, 256, depth);
174 static const struct lrecord_description char_byte_table_description[] = {
175 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
179 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_byte_table,
180 mark_char_byte_table,
181 internal_object_printer,
182 0, char_byte_table_equal,
183 char_byte_table_hash,
184 char_byte_table_description,
185 struct Lisp_Char_Byte_Table);
189 make_char_byte_table (Lisp_Object initval)
193 struct Lisp_Char_Byte_Table *cte =
194 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
195 &lrecord_char_byte_table);
197 for (i = 0; i < 256; i++)
198 cte->property[i] = initval;
200 XSETCHAR_BYTE_TABLE (obj, cte);
205 copy_char_byte_table (Lisp_Object entry)
207 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
210 struct Lisp_Char_Byte_Table *ctenew =
211 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
212 &lrecord_char_byte_table);
214 for (i = 0; i < 256; i++)
216 Lisp_Object new = cte->property[i];
217 if (CHAR_BYTE_TABLE_P (new))
218 ctenew->property[i] = copy_char_byte_table (new);
220 ctenew->property[i] = new;
223 XSETCHAR_BYTE_TABLE (obj, ctenew);
227 #define make_char_code_table(initval) make_char_byte_table(initval)
230 get_char_code_table (Emchar ch, Lisp_Object table)
232 struct Lisp_Char_Byte_Table* cpt = XCHAR_BYTE_TABLE (table);
233 Lisp_Object ret = cpt->property [ch >> 24];
235 if (CHAR_BYTE_TABLE_P (ret))
236 cpt = XCHAR_BYTE_TABLE (ret);
240 ret = cpt->property [(unsigned char) (ch >> 16)];
241 if (CHAR_BYTE_TABLE_P (ret))
242 cpt = XCHAR_BYTE_TABLE (ret);
246 ret = cpt->property [(unsigned char) (ch >> 8)];
247 if (CHAR_BYTE_TABLE_P (ret))
248 cpt = XCHAR_BYTE_TABLE (ret);
252 return cpt->property [(unsigned char) ch];
256 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
258 struct Lisp_Char_Byte_Table* cpt1 = XCHAR_BYTE_TABLE (table);
259 Lisp_Object ret = cpt1->property[ch >> 24];
261 if (CHAR_BYTE_TABLE_P (ret))
263 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
265 ret = cpt2->property[(unsigned char)(ch >> 16)];
266 if (CHAR_BYTE_TABLE_P (ret))
268 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
270 ret = cpt3->property[(unsigned char)(ch >> 8)];
271 if (CHAR_BYTE_TABLE_P (ret))
273 struct Lisp_Char_Byte_Table* cpt4
274 = XCHAR_BYTE_TABLE (ret);
276 cpt4->property[(unsigned char)ch] = value;
278 else if (!EQ (ret, value))
280 Lisp_Object cpt4 = make_char_byte_table (ret);
282 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
283 cpt3->property[(unsigned char)(ch >> 8)] = cpt4;
286 else if (!EQ (ret, value))
288 Lisp_Object cpt3 = make_char_byte_table (ret);
289 Lisp_Object cpt4 = make_char_byte_table (ret);
291 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
292 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)]
294 cpt2->property[(unsigned char)(ch >> 16)] = cpt3;
297 else if (!EQ (ret, value))
299 Lisp_Object cpt2 = make_char_byte_table (ret);
300 Lisp_Object cpt3 = make_char_byte_table (ret);
301 Lisp_Object cpt4 = make_char_byte_table (ret);
303 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
304 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] = cpt4;
305 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(ch >> 16)] = cpt3;
306 cpt1->property[(unsigned char)(ch >> 24)] = cpt2;
311 Lisp_Object Vcharacter_attribute_table;
313 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
314 Return the alist of attributes of CHARACTER.
318 return get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
321 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
322 Return the value of CHARACTER's ATTRIBUTE.
324 (character, attribute))
327 = get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
333 if (!NILP (ccs = Ffind_charset (attribute)))
336 return Fcdr (Fassq (attribute, ret));
340 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
343 Emchar char_code = XCHAR (character);
345 = get_char_code_table (char_code, Vcharacter_attribute_table);
348 cell = Fassq (attribute, ret);
352 ret = Fcons (Fcons (attribute, value), ret);
354 else if (!EQ (Fcdr (cell), value))
356 Fsetcdr (cell, value);
358 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
362 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
363 Store CHARACTER's ATTRIBUTE with VALUE.
365 (character, attribute, value))
369 ccs = Ffind_charset (attribute);
373 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
378 /* ad-hoc method for `ascii' */
379 if ((XCHARSET_CHARS (ccs) == 94) &&
380 (XCHARSET_BYTE_OFFSET (ccs) != 33))
381 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
383 ccs_len = XCHARSET_CHARS (ccs);
386 signal_simple_error ("Invalid value for coded-charset",
390 rest = Fget_char_attribute (character, attribute);
397 Lisp_Object ei = Fcar (rest);
399 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
400 nv = XVECTOR_DATA(v)[i];
407 XVECTOR_DATA(v)[i] = Qnil;
408 v = XCHARSET_DECODING_TABLE (ccs);
413 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
420 Lisp_Object ei = Fcar (rest);
423 signal_simple_error ("Invalid value for coded-charset",
425 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
426 nv = XVECTOR_DATA(v)[i];
432 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
439 XVECTOR_DATA(v)[i] = character;
441 return put_char_attribute (character, attribute, value);
446 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
447 Store character's ATTRIBUTES.
451 Lisp_Object rest = attributes;
452 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
453 Lisp_Object character;
459 Lisp_Object cell = Fcar (rest);
463 signal_simple_error ("Invalid argument", attributes);
464 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
465 && XCHARSET_FINAL (ccs))
469 if (XCHARSET_DIMENSION (ccs) == 1)
471 Lisp_Object eb1 = Fcar (Fcdr (cell));
475 signal_simple_error ("Invalid argument", attributes);
477 switch (XCHARSET_CHARS (ccs))
481 + (XCHARSET_FINAL (ccs) - '0') * 94 + (b1 - 33);
485 + (XCHARSET_FINAL (ccs) - '0') * 96 + (b1 - 32);
491 else if (XCHARSET_DIMENSION (ccs) == 2)
493 Lisp_Object eb1 = Fcar (Fcdr (cell));
494 Lisp_Object eb2 = Fcar (Fcdr (Fcdr (cell)));
498 signal_simple_error ("Invalid argument", attributes);
501 signal_simple_error ("Invalid argument", attributes);
503 switch (XCHARSET_CHARS (ccs))
506 code = MIN_CHAR_94x94
507 + (XCHARSET_FINAL (ccs) - '0') * 94 * 94
508 + (b1 - 33) * 94 + (b2 - 33);
511 code = MIN_CHAR_96x96
512 + (XCHARSET_FINAL (ccs) - '0') * 96 * 96
513 + (b1 - 32) * 96 + (b2 - 32);
524 character = make_char (code);
531 else if (!INTP (code))
532 signal_simple_error ("Invalid argument", attributes);
534 character = make_char (XINT (code));
539 Lisp_Object cell = Fcar (rest);
542 signal_simple_error ("Invalid argument", attributes);
543 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
547 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
550 Lisp_Object Vutf_2000_version;
554 int leading_code_private_11;
557 Lisp_Object Qcharsetp;
559 /* Qdoc_string, Qdimension, Qchars defined in general.c */
560 Lisp_Object Qregistry, Qfinal, Qgraphic;
561 Lisp_Object Qdirection;
562 Lisp_Object Qreverse_direction_charset;
563 Lisp_Object Qleading_byte;
564 Lisp_Object Qshort_name, Qlong_name;
580 Qjapanese_jisx0208_1978,
592 Qvietnamese_viscii_lower,
593 Qvietnamese_viscii_upper,
601 Lisp_Object Ql2r, Qr2l;
603 Lisp_Object Vcharset_hash_table;
606 static Charset_ID next_allocated_leading_byte;
608 static Charset_ID next_allocated_1_byte_leading_byte;
609 static Charset_ID next_allocated_2_byte_leading_byte;
612 /* Composite characters are characters constructed by overstriking two
613 or more regular characters.
615 1) The old Mule implementation involves storing composite characters
616 in a buffer as a tag followed by all of the actual characters
617 used to make up the composite character. I think this is a bad
618 idea; it greatly complicates code that wants to handle strings
619 one character at a time because it has to deal with the possibility
620 of great big ungainly characters. It's much more reasonable to
621 simply store an index into a table of composite characters.
623 2) The current implementation only allows for 16,384 separate
624 composite characters over the lifetime of the XEmacs process.
625 This could become a potential problem if the user
626 edited lots of different files that use composite characters.
627 Due to FSF bogosity, increasing the number of allowable
628 composite characters under Mule would decrease the number
629 of possible faces that can exist. Mule already has shrunk
630 this to 2048, and further shrinkage would become uncomfortable.
631 No such problems exist in XEmacs.
633 Composite characters could be represented as 0x80 C1 C2 C3,
634 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
635 for slightly under 2^20 (one million) composite characters
636 over the XEmacs process lifetime, and you only need to
637 increase the size of a Mule character from 19 to 21 bits.
638 Or you could use 0x80 C1 C2 C3 C4, allowing for about
639 85 million (slightly over 2^26) composite characters. */
642 /************************************************************************/
643 /* Basic Emchar functions */
644 /************************************************************************/
646 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
647 string in STR. Returns the number of bytes stored.
648 Do not call this directly. Use the macro set_charptr_emchar() instead.
652 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
667 else if ( c <= 0x7ff )
669 *p++ = (c >> 6) | 0xc0;
670 *p++ = (c & 0x3f) | 0x80;
672 else if ( c <= 0xffff )
674 *p++ = (c >> 12) | 0xe0;
675 *p++ = ((c >> 6) & 0x3f) | 0x80;
676 *p++ = (c & 0x3f) | 0x80;
678 else if ( c <= 0x1fffff )
680 *p++ = (c >> 18) | 0xf0;
681 *p++ = ((c >> 12) & 0x3f) | 0x80;
682 *p++ = ((c >> 6) & 0x3f) | 0x80;
683 *p++ = (c & 0x3f) | 0x80;
685 else if ( c <= 0x3ffffff )
687 *p++ = (c >> 24) | 0xf8;
688 *p++ = ((c >> 18) & 0x3f) | 0x80;
689 *p++ = ((c >> 12) & 0x3f) | 0x80;
690 *p++ = ((c >> 6) & 0x3f) | 0x80;
691 *p++ = (c & 0x3f) | 0x80;
695 *p++ = (c >> 30) | 0xfc;
696 *p++ = ((c >> 24) & 0x3f) | 0x80;
697 *p++ = ((c >> 18) & 0x3f) | 0x80;
698 *p++ = ((c >> 12) & 0x3f) | 0x80;
699 *p++ = ((c >> 6) & 0x3f) | 0x80;
700 *p++ = (c & 0x3f) | 0x80;
703 BREAKUP_CHAR (c, charset, c1, c2);
704 lb = CHAR_LEADING_BYTE (c);
705 if (LEADING_BYTE_PRIVATE_P (lb))
706 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
708 if (EQ (charset, Vcharset_control_1))
717 /* Return the first character from a Mule-encoded string in STR,
718 assuming it's non-ASCII. Do not call this directly.
719 Use the macro charptr_emchar() instead. */
722 non_ascii_charptr_emchar (CONST Bufbyte *str)
735 else if ( b >= 0xf8 )
740 else if ( b >= 0xf0 )
745 else if ( b >= 0xe0 )
750 else if ( b >= 0xc0 )
760 for( ; len > 0; len-- )
763 ch = ( ch << 6 ) | ( b & 0x3f );
767 Bufbyte i0 = *str, i1, i2 = 0;
770 if (i0 == LEADING_BYTE_CONTROL_1)
771 return (Emchar) (*++str - 0x20);
773 if (LEADING_BYTE_PREFIX_P (i0))
778 charset = CHARSET_BY_LEADING_BYTE (i0);
779 if (XCHARSET_DIMENSION (charset) == 2)
782 return MAKE_CHAR (charset, i1, i2);
786 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
787 Do not call this directly. Use the macro valid_char_p() instead. */
791 non_ascii_valid_char_p (Emchar ch)
795 /* Must have only lowest 19 bits set */
799 f1 = CHAR_FIELD1 (ch);
800 f2 = CHAR_FIELD2 (ch);
801 f3 = CHAR_FIELD3 (ch);
807 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
808 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
809 f2 > MAX_CHAR_FIELD2_PRIVATE)
814 if (f3 != 0x20 && f3 != 0x7F)
818 NOTE: This takes advantage of the fact that
819 FIELD2_TO_OFFICIAL_LEADING_BYTE and
820 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
822 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
823 return (XCHARSET_CHARS (charset) == 96);
829 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
830 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
831 f1 > MAX_CHAR_FIELD1_PRIVATE)
833 if (f2 < 0x20 || f3 < 0x20)
836 #ifdef ENABLE_COMPOSITE_CHARS
837 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
839 if (UNBOUNDP (Fgethash (make_int (ch),
840 Vcomposite_char_char2string_hash_table,
845 #endif /* ENABLE_COMPOSITE_CHARS */
847 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
850 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
852 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
855 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
857 return (XCHARSET_CHARS (charset) == 96);
863 /************************************************************************/
864 /* Basic string functions */
865 /************************************************************************/
867 /* Copy the character pointed to by PTR into STR, assuming it's
868 non-ASCII. Do not call this directly. Use the macro
869 charptr_copy_char() instead. */
872 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
874 Bufbyte *strptr = str;
876 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
878 /* Notice fallthrough. */
880 case 6: *++strptr = *ptr++;
881 case 5: *++strptr = *ptr++;
883 case 4: *++strptr = *ptr++;
884 case 3: *++strptr = *ptr++;
885 case 2: *++strptr = *ptr;
890 return strptr + 1 - str;
894 /************************************************************************/
895 /* streams of Emchars */
896 /************************************************************************/
898 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
899 The functions below are not meant to be called directly; use
900 the macros in insdel.h. */
903 Lstream_get_emchar_1 (Lstream *stream, int ch)
905 Bufbyte str[MAX_EMCHAR_LEN];
906 Bufbyte *strptr = str;
908 str[0] = (Bufbyte) ch;
909 switch (REP_BYTES_BY_FIRST_BYTE (ch))
911 /* Notice fallthrough. */
914 ch = Lstream_getc (stream);
916 *++strptr = (Bufbyte) ch;
918 ch = Lstream_getc (stream);
920 *++strptr = (Bufbyte) ch;
923 ch = Lstream_getc (stream);
925 *++strptr = (Bufbyte) ch;
927 ch = Lstream_getc (stream);
929 *++strptr = (Bufbyte) ch;
931 ch = Lstream_getc (stream);
933 *++strptr = (Bufbyte) ch;
938 return charptr_emchar (str);
942 Lstream_fput_emchar (Lstream *stream, Emchar ch)
944 Bufbyte str[MAX_EMCHAR_LEN];
945 Bytecount len = set_charptr_emchar (str, ch);
946 return Lstream_write (stream, str, len);
950 Lstream_funget_emchar (Lstream *stream, Emchar ch)
952 Bufbyte str[MAX_EMCHAR_LEN];
953 Bytecount len = set_charptr_emchar (str, ch);
954 Lstream_unread (stream, str, len);
958 /************************************************************************/
960 /************************************************************************/
963 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
965 struct Lisp_Charset *cs = XCHARSET (obj);
967 markobj (cs->short_name);
968 markobj (cs->long_name);
969 markobj (cs->doc_string);
970 markobj (cs->registry);
971 markobj (cs->ccl_program);
973 markobj (cs->decoding_table);
979 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
981 struct Lisp_Charset *cs = XCHARSET (obj);
985 error ("printing unreadable object #<charset %s 0x%x>",
986 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
989 write_c_string ("#<charset ", printcharfun);
990 print_internal (CHARSET_NAME (cs), printcharfun, 0);
991 write_c_string (" ", printcharfun);
992 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
993 write_c_string (" ", printcharfun);
994 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
995 write_c_string (" ", printcharfun);
996 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
997 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
998 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
999 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1000 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1002 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1003 CHARSET_COLUMNS (cs),
1004 CHARSET_GRAPHIC (cs),
1005 CHARSET_FINAL (cs));
1006 write_c_string (buf, printcharfun);
1007 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1008 sprintf (buf, " 0x%x>", cs->header.uid);
1009 write_c_string (buf, printcharfun);
1012 static const struct lrecord_description charset_description[] = {
1013 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1015 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1020 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1021 mark_charset, print_charset, 0, 0, 0,
1022 charset_description,
1023 struct Lisp_Charset);
1025 /* Make a new charset. */
1028 make_charset (Charset_ID id, Lisp_Object name,
1029 unsigned char type, unsigned char columns, unsigned char graphic,
1030 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1031 Lisp_Object long_name, Lisp_Object doc,
1033 Lisp_Object decoding_table,
1034 Emchar ucs_min, Emchar ucs_max,
1035 Emchar code_offset, unsigned char byte_offset)
1038 struct Lisp_Charset *cs =
1039 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1040 XSETCHARSET (obj, cs);
1042 CHARSET_ID (cs) = id;
1043 CHARSET_NAME (cs) = name;
1044 CHARSET_SHORT_NAME (cs) = short_name;
1045 CHARSET_LONG_NAME (cs) = long_name;
1046 CHARSET_DIRECTION (cs) = direction;
1047 CHARSET_TYPE (cs) = type;
1048 CHARSET_COLUMNS (cs) = columns;
1049 CHARSET_GRAPHIC (cs) = graphic;
1050 CHARSET_FINAL (cs) = final;
1051 CHARSET_DOC_STRING (cs) = doc;
1052 CHARSET_REGISTRY (cs) = reg;
1053 CHARSET_CCL_PROGRAM (cs) = Qnil;
1054 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1056 CHARSET_DECODING_TABLE(cs) = Qnil;
1057 CHARSET_UCS_MIN(cs) = ucs_min;
1058 CHARSET_UCS_MAX(cs) = ucs_max;
1059 CHARSET_CODE_OFFSET(cs) = code_offset;
1060 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1063 switch (CHARSET_TYPE (cs))
1065 case CHARSET_TYPE_94:
1066 CHARSET_DIMENSION (cs) = 1;
1067 CHARSET_CHARS (cs) = 94;
1069 case CHARSET_TYPE_96:
1070 CHARSET_DIMENSION (cs) = 1;
1071 CHARSET_CHARS (cs) = 96;
1073 case CHARSET_TYPE_94X94:
1074 CHARSET_DIMENSION (cs) = 2;
1075 CHARSET_CHARS (cs) = 94;
1077 case CHARSET_TYPE_96X96:
1078 CHARSET_DIMENSION (cs) = 2;
1079 CHARSET_CHARS (cs) = 96;
1082 case CHARSET_TYPE_128:
1083 CHARSET_DIMENSION (cs) = 1;
1084 CHARSET_CHARS (cs) = 128;
1086 case CHARSET_TYPE_128X128:
1087 CHARSET_DIMENSION (cs) = 2;
1088 CHARSET_CHARS (cs) = 128;
1090 case CHARSET_TYPE_256:
1091 CHARSET_DIMENSION (cs) = 1;
1092 CHARSET_CHARS (cs) = 256;
1094 case CHARSET_TYPE_256X256:
1095 CHARSET_DIMENSION (cs) = 2;
1096 CHARSET_CHARS (cs) = 256;
1102 if (id == LEADING_BYTE_ASCII)
1103 CHARSET_REP_BYTES (cs) = 1;
1105 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1107 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1112 /* some charsets do not have final characters. This includes
1113 ASCII, Control-1, Composite, and the two faux private
1116 if (code_offset == 0)
1118 assert (NILP (charset_by_attributes[type][final]));
1119 charset_by_attributes[type][final] = obj;
1122 assert (NILP (charset_by_attributes[type][final][direction]));
1123 charset_by_attributes[type][final][direction] = obj;
1127 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1128 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1131 /* official leading byte */
1132 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1135 /* Some charsets are "faux" and don't have names or really exist at
1136 all except in the leading-byte table. */
1138 Fputhash (name, obj, Vcharset_hash_table);
1143 get_unallocated_leading_byte (int dimension)
1148 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1151 lb = next_allocated_leading_byte++;
1155 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1158 lb = next_allocated_1_byte_leading_byte++;
1162 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1165 lb = next_allocated_2_byte_leading_byte++;
1171 ("No more character sets free for this dimension",
1172 make_int (dimension));
1179 range_charset_code_point (Lisp_Object charset, Emchar ch)
1183 if ((XCHARSET_UCS_MIN (charset) <= ch)
1184 && (ch <= XCHARSET_UCS_MAX (charset)))
1186 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1188 if (XCHARSET_DIMENSION (charset) == 1)
1189 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
1190 else if (XCHARSET_DIMENSION (charset) == 2)
1191 return list2 (make_int (d / XCHARSET_CHARS (charset)
1192 + XCHARSET_BYTE_OFFSET (charset)),
1193 make_int (d % XCHARSET_CHARS (charset)
1194 + XCHARSET_BYTE_OFFSET (charset)));
1195 else if (XCHARSET_DIMENSION (charset) == 3)
1196 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1197 * XCHARSET_CHARS (charset))
1198 + XCHARSET_BYTE_OFFSET (charset)),
1199 make_int (d / XCHARSET_CHARS (charset)
1200 % XCHARSET_CHARS (charset)
1201 + XCHARSET_BYTE_OFFSET (charset)),
1202 make_int (d % XCHARSET_CHARS (charset)
1203 + XCHARSET_BYTE_OFFSET (charset)));
1204 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1205 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1206 * XCHARSET_CHARS (charset)
1207 * XCHARSET_CHARS (charset))
1208 + XCHARSET_BYTE_OFFSET (charset)),
1209 make_int (d / (XCHARSET_CHARS (charset)
1210 * XCHARSET_CHARS (charset))
1211 % XCHARSET_CHARS (charset)
1212 + XCHARSET_BYTE_OFFSET (charset)),
1213 make_int (d / XCHARSET_CHARS (charset)
1214 % XCHARSET_CHARS (charset)
1215 + XCHARSET_BYTE_OFFSET (charset)),
1216 make_int (d % XCHARSET_CHARS (charset)
1217 + XCHARSET_BYTE_OFFSET (charset)));
1219 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1221 if (XCHARSET_DIMENSION (charset) == 1)
1223 if (XCHARSET_CHARS (charset) == 94)
1225 if (((d = ch - (MIN_CHAR_94
1226 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1228 return list1 (make_int (d + 33));
1230 else if (XCHARSET_CHARS (charset) == 96)
1232 if (((d = ch - (MIN_CHAR_96
1233 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1235 return list1 (make_int (d + 32));
1240 else if (XCHARSET_DIMENSION (charset) == 2)
1242 if (XCHARSET_CHARS (charset) == 94)
1244 if (((d = ch - (MIN_CHAR_94x94
1245 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1248 return list2 ((d / 94) + 33, d % 94 + 33);
1250 else if (XCHARSET_CHARS (charset) == 96)
1252 if (((d = ch - (MIN_CHAR_96x96
1253 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1256 return list2 ((d / 96) + 32, d % 96 + 32);
1264 charset_code_point (Lisp_Object charset, Emchar ch)
1266 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1268 if (!EQ (cdef, Qnil))
1270 Lisp_Object field = Fassq (charset, cdef);
1272 if (!EQ (field, Qnil))
1273 return Fcdr (field);
1275 return range_charset_code_point (charset, ch);
1278 Lisp_Object Vdefault_coded_charset_priority_list;
1282 /************************************************************************/
1283 /* Basic charset Lisp functions */
1284 /************************************************************************/
1286 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1287 Return non-nil if OBJECT is a charset.
1291 return CHARSETP (object) ? Qt : Qnil;
1294 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1295 Retrieve the charset of the given name.
1296 If CHARSET-OR-NAME is a charset object, it is simply returned.
1297 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1298 nil is returned. Otherwise the associated charset object is returned.
1302 if (CHARSETP (charset_or_name))
1303 return charset_or_name;
1305 CHECK_SYMBOL (charset_or_name);
1306 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1309 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1310 Retrieve the charset of the given name.
1311 Same as `find-charset' except an error is signalled if there is no such
1312 charset instead of returning nil.
1316 Lisp_Object charset = Ffind_charset (name);
1319 signal_simple_error ("No such charset", name);
1323 /* We store the charsets in hash tables with the names as the key and the
1324 actual charset object as the value. Occasionally we need to use them
1325 in a list format. These routines provide us with that. */
1326 struct charset_list_closure
1328 Lisp_Object *charset_list;
1332 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1333 void *charset_list_closure)
1335 /* This function can GC */
1336 struct charset_list_closure *chcl =
1337 (struct charset_list_closure*) charset_list_closure;
1338 Lisp_Object *charset_list = chcl->charset_list;
1340 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1344 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1345 Return a list of the names of all defined charsets.
1349 Lisp_Object charset_list = Qnil;
1350 struct gcpro gcpro1;
1351 struct charset_list_closure charset_list_closure;
1353 GCPRO1 (charset_list);
1354 charset_list_closure.charset_list = &charset_list;
1355 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1356 &charset_list_closure);
1359 return charset_list;
1362 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1363 Return the name of the given charset.
1367 return XCHARSET_NAME (Fget_charset (charset));
1370 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1371 Define a new character set.
1372 This function is for use with Mule support.
1373 NAME is a symbol, the name by which the character set is normally referred.
1374 DOC-STRING is a string describing the character set.
1375 PROPS is a property list, describing the specific nature of the
1376 character set. Recognized properties are:
1378 'short-name Short version of the charset name (ex: Latin-1)
1379 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1380 'registry A regular expression matching the font registry field for
1382 'dimension Number of octets used to index a character in this charset.
1383 Either 1 or 2. Defaults to 1.
1384 'columns Number of columns used to display a character in this charset.
1385 Only used in TTY mode. (Under X, the actual width of a
1386 character can be derived from the font used to display the
1387 characters.) If unspecified, defaults to the dimension
1388 (this is almost always the correct value).
1389 'chars Number of characters in each dimension (94 or 96).
1390 Defaults to 94. Note that if the dimension is 2, the
1391 character set thus described is 94x94 or 96x96.
1392 'final Final byte of ISO 2022 escape sequence. Must be
1393 supplied. Each combination of (DIMENSION, CHARS) defines a
1394 separate namespace for final bytes. Note that ISO
1395 2022 restricts the final byte to the range
1396 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1397 dimension == 2. Note also that final bytes in the range
1398 0x30 - 0x3F are reserved for user-defined (not official)
1400 'graphic 0 (use left half of font on output) or 1 (use right half
1401 of font on output). Defaults to 0. For example, for
1402 a font whose registry is ISO8859-1, the left half
1403 (octets 0x20 - 0x7F) is the `ascii' character set, while
1404 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1405 character set. With 'graphic set to 0, the octets
1406 will have their high bit cleared; with it set to 1,
1407 the octets will have their high bit set.
1408 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1410 'ccl-program A compiled CCL program used to convert a character in
1411 this charset into an index into the font. This is in
1412 addition to the 'graphic property. The CCL program
1413 is passed the octets of the character, with the high
1414 bit cleared and set depending upon whether the value
1415 of the 'graphic property is 0 or 1.
1417 (name, doc_string, props))
1419 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1420 int direction = CHARSET_LEFT_TO_RIGHT;
1422 Lisp_Object registry = Qnil;
1423 Lisp_Object charset;
1424 Lisp_Object rest, keyword, value;
1425 Lisp_Object ccl_program = Qnil;
1426 Lisp_Object short_name = Qnil, long_name = Qnil;
1427 unsigned char byte_offset = 0;
1429 CHECK_SYMBOL (name);
1430 if (!NILP (doc_string))
1431 CHECK_STRING (doc_string);
1433 charset = Ffind_charset (name);
1434 if (!NILP (charset))
1435 signal_simple_error ("Cannot redefine existing charset", name);
1437 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1439 if (EQ (keyword, Qshort_name))
1441 CHECK_STRING (value);
1445 if (EQ (keyword, Qlong_name))
1447 CHECK_STRING (value);
1451 else if (EQ (keyword, Qdimension))
1454 dimension = XINT (value);
1455 if (dimension < 1 || dimension > 2)
1456 signal_simple_error ("Invalid value for 'dimension", value);
1459 else if (EQ (keyword, Qchars))
1462 chars = XINT (value);
1463 if (chars != 94 && chars != 96)
1464 signal_simple_error ("Invalid value for 'chars", value);
1467 else if (EQ (keyword, Qcolumns))
1470 columns = XINT (value);
1471 if (columns != 1 && columns != 2)
1472 signal_simple_error ("Invalid value for 'columns", value);
1475 else if (EQ (keyword, Qgraphic))
1478 graphic = XINT (value);
1480 if (graphic < 0 || graphic > 2)
1482 if (graphic < 0 || graphic > 1)
1484 signal_simple_error ("Invalid value for 'graphic", value);
1487 else if (EQ (keyword, Qregistry))
1489 CHECK_STRING (value);
1493 else if (EQ (keyword, Qdirection))
1495 if (EQ (value, Ql2r))
1496 direction = CHARSET_LEFT_TO_RIGHT;
1497 else if (EQ (value, Qr2l))
1498 direction = CHARSET_RIGHT_TO_LEFT;
1500 signal_simple_error ("Invalid value for 'direction", value);
1503 else if (EQ (keyword, Qfinal))
1505 CHECK_CHAR_COERCE_INT (value);
1506 final = XCHAR (value);
1507 if (final < '0' || final > '~')
1508 signal_simple_error ("Invalid value for 'final", value);
1511 else if (EQ (keyword, Qccl_program))
1513 CHECK_VECTOR (value);
1514 ccl_program = value;
1518 signal_simple_error ("Unrecognized property", keyword);
1522 error ("'final must be specified");
1523 if (dimension == 2 && final > 0x5F)
1525 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1529 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1531 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1533 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1534 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1536 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1538 id = get_unallocated_leading_byte (dimension);
1540 if (NILP (doc_string))
1541 doc_string = build_string ("");
1543 if (NILP (registry))
1544 registry = build_string ("");
1546 if (NILP (short_name))
1547 XSETSTRING (short_name, XSYMBOL (name)->name);
1549 if (NILP (long_name))
1550 long_name = doc_string;
1553 columns = dimension;
1554 charset = make_charset (id, name, type, columns, graphic,
1555 final, direction, short_name, long_name,
1556 doc_string, registry,
1557 Qnil, 0, 0, 0, byte_offset);
1558 if (!NILP (ccl_program))
1559 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1563 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1565 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1566 NEW-NAME is the name of the new charset. Return the new charset.
1568 (charset, new_name))
1570 Lisp_Object new_charset = Qnil;
1571 int id, dimension, columns, graphic, final;
1572 int direction, type;
1573 Lisp_Object registry, doc_string, short_name, long_name;
1574 struct Lisp_Charset *cs;
1576 charset = Fget_charset (charset);
1577 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1578 signal_simple_error ("Charset already has reverse-direction charset",
1581 CHECK_SYMBOL (new_name);
1582 if (!NILP (Ffind_charset (new_name)))
1583 signal_simple_error ("Cannot redefine existing charset", new_name);
1585 cs = XCHARSET (charset);
1587 type = CHARSET_TYPE (cs);
1588 columns = CHARSET_COLUMNS (cs);
1589 dimension = CHARSET_DIMENSION (cs);
1590 id = get_unallocated_leading_byte (dimension);
1592 graphic = CHARSET_GRAPHIC (cs);
1593 final = CHARSET_FINAL (cs);
1594 direction = CHARSET_RIGHT_TO_LEFT;
1595 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1596 direction = CHARSET_LEFT_TO_RIGHT;
1597 doc_string = CHARSET_DOC_STRING (cs);
1598 short_name = CHARSET_SHORT_NAME (cs);
1599 long_name = CHARSET_LONG_NAME (cs);
1600 registry = CHARSET_REGISTRY (cs);
1602 new_charset = make_charset (id, new_name, type, columns,
1603 graphic, final, direction, short_name, long_name,
1604 doc_string, registry,
1606 CHARSET_DECODING_TABLE(cs),
1607 CHARSET_UCS_MIN(cs),
1608 CHARSET_UCS_MAX(cs),
1609 CHARSET_CODE_OFFSET(cs),
1610 CHARSET_BYTE_OFFSET(cs)
1616 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1617 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1622 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1623 Define symbol ALIAS as an alias for CHARSET.
1627 CHECK_SYMBOL (alias);
1628 charset = Fget_charset (charset);
1629 return Fputhash (alias, charset, Vcharset_hash_table);
1632 /* #### Reverse direction charsets not yet implemented. */
1634 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1636 Return the reverse-direction charset parallel to CHARSET, if any.
1637 This is the charset with the same properties (in particular, the same
1638 dimension, number of characters per dimension, and final byte) as
1639 CHARSET but whose characters are displayed in the opposite direction.
1643 charset = Fget_charset (charset);
1644 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1648 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1649 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1650 If DIRECTION is omitted, both directions will be checked (left-to-right
1651 will be returned if character sets exist for both directions).
1653 (dimension, chars, final, direction))
1655 int dm, ch, fi, di = -1;
1657 Lisp_Object obj = Qnil;
1659 CHECK_INT (dimension);
1660 dm = XINT (dimension);
1661 if (dm < 1 || dm > 2)
1662 signal_simple_error ("Invalid value for DIMENSION", dimension);
1666 if (ch != 94 && ch != 96)
1667 signal_simple_error ("Invalid value for CHARS", chars);
1669 CHECK_CHAR_COERCE_INT (final);
1671 if (fi < '0' || fi > '~')
1672 signal_simple_error ("Invalid value for FINAL", final);
1674 if (EQ (direction, Ql2r))
1675 di = CHARSET_LEFT_TO_RIGHT;
1676 else if (EQ (direction, Qr2l))
1677 di = CHARSET_RIGHT_TO_LEFT;
1678 else if (!NILP (direction))
1679 signal_simple_error ("Invalid value for DIRECTION", direction);
1681 if (dm == 2 && fi > 0x5F)
1683 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1686 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1688 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1692 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1694 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1697 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1700 return XCHARSET_NAME (obj);
1704 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1705 Return short name of CHARSET.
1709 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1712 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1713 Return long name of CHARSET.
1717 return XCHARSET_LONG_NAME (Fget_charset (charset));
1720 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1721 Return description of CHARSET.
1725 return XCHARSET_DOC_STRING (Fget_charset (charset));
1728 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1729 Return dimension of CHARSET.
1733 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1736 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1737 Return property PROP of CHARSET.
1738 Recognized properties are those listed in `make-charset', as well as
1739 'name and 'doc-string.
1743 struct Lisp_Charset *cs;
1745 charset = Fget_charset (charset);
1746 cs = XCHARSET (charset);
1748 CHECK_SYMBOL (prop);
1749 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1750 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1751 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1752 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1753 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1754 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1755 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1756 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1757 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1758 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1759 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1760 if (EQ (prop, Qdirection))
1761 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1762 if (EQ (prop, Qreverse_direction_charset))
1764 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1768 return XCHARSET_NAME (obj);
1770 signal_simple_error ("Unrecognized charset property name", prop);
1771 return Qnil; /* not reached */
1774 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1775 Return charset identification number of CHARSET.
1779 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1782 /* #### We need to figure out which properties we really want to
1785 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1786 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1788 (charset, ccl_program))
1790 charset = Fget_charset (charset);
1791 CHECK_VECTOR (ccl_program);
1792 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1797 invalidate_charset_font_caches (Lisp_Object charset)
1799 /* Invalidate font cache entries for charset on all devices. */
1800 Lisp_Object devcons, concons, hash_table;
1801 DEVICE_LOOP_NO_BREAK (devcons, concons)
1803 struct device *d = XDEVICE (XCAR (devcons));
1804 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1805 if (!UNBOUNDP (hash_table))
1806 Fclrhash (hash_table);
1810 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1811 Set the 'registry property of CHARSET to REGISTRY.
1813 (charset, registry))
1815 charset = Fget_charset (charset);
1816 CHECK_STRING (registry);
1817 XCHARSET_REGISTRY (charset) = registry;
1818 invalidate_charset_font_caches (charset);
1819 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1824 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1825 Return mapping-table of CHARSET.
1829 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1832 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1833 Set mapping-table of CHARSET to TABLE.
1837 struct Lisp_Charset *cs;
1838 Lisp_Object old_table;
1841 charset = Fget_charset (charset);
1842 cs = XCHARSET (charset);
1844 if (EQ (table, Qnil))
1846 CHARSET_DECODING_TABLE(cs) = table;
1849 else if (VECTORP (table))
1851 if (XVECTOR_LENGTH (table) > CHARSET_CHARS (cs))
1852 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
1853 old_table = CHARSET_DECODING_TABLE(cs);
1854 CHARSET_DECODING_TABLE(cs) = table;
1857 signal_error (Qwrong_type_argument,
1858 list2 (build_translated_string ("vector-or-nil-p"),
1860 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
1862 switch (CHARSET_DIMENSION (cs))
1865 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1867 Lisp_Object c = XVECTOR_DATA(table)[i];
1872 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
1876 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1878 Lisp_Object v = XVECTOR_DATA(table)[i];
1884 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
1886 CHARSET_DECODING_TABLE(cs) = old_table;
1887 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
1889 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1891 Lisp_Object c = XVECTOR_DATA(v)[j];
1894 put_char_attribute (c, charset,
1897 (i + CHARSET_BYTE_OFFSET (cs)),
1899 (j + CHARSET_BYTE_OFFSET (cs))));
1903 put_char_attribute (v, charset,
1905 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
1914 /************************************************************************/
1915 /* Lisp primitives for working with characters */
1916 /************************************************************************/
1918 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1919 Make a character from CHARSET and octets ARG1 and ARG2.
1920 ARG2 is required only for characters from two-dimensional charsets.
1921 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1922 character s with caron.
1924 (charset, arg1, arg2))
1926 struct Lisp_Charset *cs;
1928 int lowlim, highlim;
1930 charset = Fget_charset (charset);
1931 cs = XCHARSET (charset);
1933 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1934 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1936 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
1938 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1939 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1942 /* It is useful (and safe, according to Olivier Galibert) to strip
1943 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1944 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1945 Latin 2 code of the character. */
1953 if (a1 < lowlim || a1 > highlim)
1954 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1956 if (CHARSET_DIMENSION (cs) == 1)
1960 ("Charset is of dimension one; second octet must be nil", arg2);
1961 return make_char (MAKE_CHAR (charset, a1, 0));
1970 a2 = XINT (arg2) & 0x7f;
1972 if (a2 < lowlim || a2 > highlim)
1973 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1975 return make_char (MAKE_CHAR (charset, a1, a2));
1978 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1979 Return the character set of char CH.
1983 CHECK_CHAR_COERCE_INT (ch);
1985 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
1988 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1989 Return list of charset and one or two position-codes of CHAR.
1993 /* This function can GC */
1994 struct gcpro gcpro1, gcpro2;
1995 Lisp_Object charset = Qnil;
1996 Lisp_Object rc = Qnil;
1999 GCPRO2 (charset, rc);
2000 CHECK_CHAR_COERCE_INT (character);
2002 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2004 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2006 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2010 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2018 #ifdef ENABLE_COMPOSITE_CHARS
2019 /************************************************************************/
2020 /* composite character functions */
2021 /************************************************************************/
2024 lookup_composite_char (Bufbyte *str, int len)
2026 Lisp_Object lispstr = make_string (str, len);
2027 Lisp_Object ch = Fgethash (lispstr,
2028 Vcomposite_char_string2char_hash_table,
2034 if (composite_char_row_next >= 128)
2035 signal_simple_error ("No more composite chars available", lispstr);
2036 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2037 composite_char_col_next);
2038 Fputhash (make_char (emch), lispstr,
2039 Vcomposite_char_char2string_hash_table);
2040 Fputhash (lispstr, make_char (emch),
2041 Vcomposite_char_string2char_hash_table);
2042 composite_char_col_next++;
2043 if (composite_char_col_next >= 128)
2045 composite_char_col_next = 32;
2046 composite_char_row_next++;
2055 composite_char_string (Emchar ch)
2057 Lisp_Object str = Fgethash (make_char (ch),
2058 Vcomposite_char_char2string_hash_table,
2060 assert (!UNBOUNDP (str));
2064 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2065 Convert a string into a single composite character.
2066 The character is the result of overstriking all the characters in
2071 CHECK_STRING (string);
2072 return make_char (lookup_composite_char (XSTRING_DATA (string),
2073 XSTRING_LENGTH (string)));
2076 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2077 Return a string of the characters comprising a composite character.
2085 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2086 signal_simple_error ("Must be composite char", ch);
2087 return composite_char_string (emch);
2089 #endif /* ENABLE_COMPOSITE_CHARS */
2092 /************************************************************************/
2093 /* initialization */
2094 /************************************************************************/
2097 syms_of_mule_charset (void)
2099 DEFSUBR (Fcharsetp);
2100 DEFSUBR (Ffind_charset);
2101 DEFSUBR (Fget_charset);
2102 DEFSUBR (Fcharset_list);
2103 DEFSUBR (Fcharset_name);
2104 DEFSUBR (Fmake_charset);
2105 DEFSUBR (Fmake_reverse_direction_charset);
2106 /* DEFSUBR (Freverse_direction_charset); */
2107 DEFSUBR (Fdefine_charset_alias);
2108 DEFSUBR (Fcharset_from_attributes);
2109 DEFSUBR (Fcharset_short_name);
2110 DEFSUBR (Fcharset_long_name);
2111 DEFSUBR (Fcharset_description);
2112 DEFSUBR (Fcharset_dimension);
2113 DEFSUBR (Fcharset_property);
2114 DEFSUBR (Fcharset_id);
2115 DEFSUBR (Fset_charset_ccl_program);
2116 DEFSUBR (Fset_charset_registry);
2118 DEFSUBR (Fchar_attribute_alist);
2119 DEFSUBR (Fget_char_attribute);
2120 DEFSUBR (Fput_char_attribute);
2121 DEFSUBR (Fdefine_char);
2122 DEFSUBR (Fcharset_mapping_table);
2123 DEFSUBR (Fset_charset_mapping_table);
2126 DEFSUBR (Fmake_char);
2127 DEFSUBR (Fchar_charset);
2128 DEFSUBR (Fsplit_char);
2130 #ifdef ENABLE_COMPOSITE_CHARS
2131 DEFSUBR (Fmake_composite_char);
2132 DEFSUBR (Fcomposite_char_string);
2135 defsymbol (&Qcharsetp, "charsetp");
2136 defsymbol (&Qregistry, "registry");
2137 defsymbol (&Qfinal, "final");
2138 defsymbol (&Qgraphic, "graphic");
2139 defsymbol (&Qdirection, "direction");
2140 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2141 defsymbol (&Qshort_name, "short-name");
2142 defsymbol (&Qlong_name, "long-name");
2144 defsymbol (&Ql2r, "l2r");
2145 defsymbol (&Qr2l, "r2l");
2147 /* Charsets, compatible with FSF 20.3
2148 Naming convention is Script-Charset[-Edition] */
2149 defsymbol (&Qascii, "ascii");
2150 defsymbol (&Qcontrol_1, "control-1");
2151 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2152 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2153 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2154 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2155 defsymbol (&Qthai_tis620, "thai-tis620");
2156 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2157 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2158 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2159 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2160 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2161 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2162 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2163 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2164 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2165 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2166 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2167 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2168 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2169 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2171 defsymbol (&Qucs, "ucs");
2172 defsymbol (&Qucs_bmp, "ucs-bmp");
2173 defsymbol (&Qlatin_viscii, "latin-viscii");
2174 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2175 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2176 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2177 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2178 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2179 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2181 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2182 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2184 defsymbol (&Qcomposite, "composite");
2188 vars_of_mule_charset (void)
2195 /* Table of charsets indexed by leading byte. */
2196 for (i = 0; i < countof (charset_by_leading_byte); i++)
2197 charset_by_leading_byte[i] = Qnil;
2200 /* Table of charsets indexed by type/final-byte. */
2201 for (i = 0; i < countof (charset_by_attributes); i++)
2202 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2203 charset_by_attributes[i][j] = Qnil;
2205 /* Table of charsets indexed by type/final-byte/direction. */
2206 for (i = 0; i < countof (charset_by_attributes); i++)
2207 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2208 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2209 charset_by_attributes[i][j][k] = Qnil;
2213 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2215 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2216 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2220 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2221 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2222 Leading-code of private TYPE9N charset of column-width 1.
2224 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2228 Vutf_2000_version = build_string("0.11 (Shiki)");
2229 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2230 Version number of UTF-2000.
2233 staticpro (&Vcharacter_attribute_table);
2234 Vcharacter_attribute_table = make_char_code_table (Qnil);
2236 Vdefault_coded_charset_priority_list = Qnil;
2237 DEFVAR_LISP ("default-coded-charset-priority-list",
2238 &Vdefault_coded_charset_priority_list /*
2239 Default order of preferred coded-character-sets.
2245 complex_vars_of_mule_charset (void)
2247 staticpro (&Vcharset_hash_table);
2248 Vcharset_hash_table =
2249 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2251 /* Predefined character sets. We store them into variables for
2256 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2257 CHARSET_TYPE_256X256, 1, 2, 0,
2258 CHARSET_LEFT_TO_RIGHT,
2259 build_string ("BMP"),
2260 build_string ("BMP"),
2261 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2262 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2263 Qnil, 0, 0xFFFF, 0, 0);
2265 # define MIN_CHAR_THAI 0
2266 # define MAX_CHAR_THAI 0
2267 # define MIN_CHAR_GREEK 0
2268 # define MAX_CHAR_GREEK 0
2269 # define MIN_CHAR_HEBREW 0
2270 # define MAX_CHAR_HEBREW 0
2271 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2272 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2273 # define MIN_CHAR_CYRILLIC 0
2274 # define MAX_CHAR_CYRILLIC 0
2277 make_charset (LEADING_BYTE_ASCII, Qascii,
2278 CHARSET_TYPE_94, 1, 0, 'B',
2279 CHARSET_LEFT_TO_RIGHT,
2280 build_string ("ASCII"),
2281 build_string ("ASCII)"),
2282 build_string ("ASCII (ISO646 IRV)"),
2283 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2284 Qnil, 0, 0x7F, 0, 0);
2285 Vcharset_control_1 =
2286 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2287 CHARSET_TYPE_94, 1, 1, 0,
2288 CHARSET_LEFT_TO_RIGHT,
2289 build_string ("C1"),
2290 build_string ("Control characters"),
2291 build_string ("Control characters 128-191"),
2293 Qnil, 0x80, 0x9F, 0, 0);
2294 Vcharset_latin_iso8859_1 =
2295 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2296 CHARSET_TYPE_96, 1, 1, 'A',
2297 CHARSET_LEFT_TO_RIGHT,
2298 build_string ("Latin-1"),
2299 build_string ("ISO8859-1 (Latin-1)"),
2300 build_string ("ISO8859-1 (Latin-1)"),
2301 build_string ("iso8859-1"),
2302 Qnil, 0xA0, 0xFF, 0, 32);
2303 Vcharset_latin_iso8859_2 =
2304 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2305 CHARSET_TYPE_96, 1, 1, 'B',
2306 CHARSET_LEFT_TO_RIGHT,
2307 build_string ("Latin-2"),
2308 build_string ("ISO8859-2 (Latin-2)"),
2309 build_string ("ISO8859-2 (Latin-2)"),
2310 build_string ("iso8859-2"),
2312 Vcharset_latin_iso8859_3 =
2313 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2314 CHARSET_TYPE_96, 1, 1, 'C',
2315 CHARSET_LEFT_TO_RIGHT,
2316 build_string ("Latin-3"),
2317 build_string ("ISO8859-3 (Latin-3)"),
2318 build_string ("ISO8859-3 (Latin-3)"),
2319 build_string ("iso8859-3"),
2321 Vcharset_latin_iso8859_4 =
2322 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2323 CHARSET_TYPE_96, 1, 1, 'D',
2324 CHARSET_LEFT_TO_RIGHT,
2325 build_string ("Latin-4"),
2326 build_string ("ISO8859-4 (Latin-4)"),
2327 build_string ("ISO8859-4 (Latin-4)"),
2328 build_string ("iso8859-4"),
2330 Vcharset_thai_tis620 =
2331 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2332 CHARSET_TYPE_96, 1, 1, 'T',
2333 CHARSET_LEFT_TO_RIGHT,
2334 build_string ("TIS620"),
2335 build_string ("TIS620 (Thai)"),
2336 build_string ("TIS620.2529 (Thai)"),
2337 build_string ("tis620"),
2338 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2339 Vcharset_greek_iso8859_7 =
2340 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2341 CHARSET_TYPE_96, 1, 1, 'F',
2342 CHARSET_LEFT_TO_RIGHT,
2343 build_string ("ISO8859-7"),
2344 build_string ("ISO8859-7 (Greek)"),
2345 build_string ("ISO8859-7 (Greek)"),
2346 build_string ("iso8859-7"),
2347 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2348 Vcharset_arabic_iso8859_6 =
2349 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2350 CHARSET_TYPE_96, 1, 1, 'G',
2351 CHARSET_RIGHT_TO_LEFT,
2352 build_string ("ISO8859-6"),
2353 build_string ("ISO8859-6 (Arabic)"),
2354 build_string ("ISO8859-6 (Arabic)"),
2355 build_string ("iso8859-6"),
2357 Vcharset_hebrew_iso8859_8 =
2358 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2359 CHARSET_TYPE_96, 1, 1, 'H',
2360 CHARSET_RIGHT_TO_LEFT,
2361 build_string ("ISO8859-8"),
2362 build_string ("ISO8859-8 (Hebrew)"),
2363 build_string ("ISO8859-8 (Hebrew)"),
2364 build_string ("iso8859-8"),
2365 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2366 Vcharset_katakana_jisx0201 =
2367 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2368 CHARSET_TYPE_94, 1, 1, 'I',
2369 CHARSET_LEFT_TO_RIGHT,
2370 build_string ("JISX0201 Kana"),
2371 build_string ("JISX0201.1976 (Japanese Kana)"),
2372 build_string ("JISX0201.1976 Japanese Kana"),
2373 build_string ("jisx0201\\.1976"),
2375 MIN_CHAR_HALFWIDTH_KATAKANA,
2376 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2377 Vcharset_latin_jisx0201 =
2378 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2379 CHARSET_TYPE_94, 1, 0, 'J',
2380 CHARSET_LEFT_TO_RIGHT,
2381 build_string ("JISX0201 Roman"),
2382 build_string ("JISX0201.1976 (Japanese Roman)"),
2383 build_string ("JISX0201.1976 Japanese Roman"),
2384 build_string ("jisx0201\\.1976"),
2386 Vcharset_cyrillic_iso8859_5 =
2387 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2388 CHARSET_TYPE_96, 1, 1, 'L',
2389 CHARSET_LEFT_TO_RIGHT,
2390 build_string ("ISO8859-5"),
2391 build_string ("ISO8859-5 (Cyrillic)"),
2392 build_string ("ISO8859-5 (Cyrillic)"),
2393 build_string ("iso8859-5"),
2394 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2395 Vcharset_latin_iso8859_9 =
2396 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2397 CHARSET_TYPE_96, 1, 1, 'M',
2398 CHARSET_LEFT_TO_RIGHT,
2399 build_string ("Latin-5"),
2400 build_string ("ISO8859-9 (Latin-5)"),
2401 build_string ("ISO8859-9 (Latin-5)"),
2402 build_string ("iso8859-9"),
2404 Vcharset_japanese_jisx0208_1978 =
2405 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2406 CHARSET_TYPE_94X94, 2, 0, '@',
2407 CHARSET_LEFT_TO_RIGHT,
2408 build_string ("JIS X0208:1978"),
2409 build_string ("JIS X0208:1978 (Japanese)"),
2411 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2412 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2414 Vcharset_chinese_gb2312 =
2415 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2416 CHARSET_TYPE_94X94, 2, 0, 'A',
2417 CHARSET_LEFT_TO_RIGHT,
2418 build_string ("GB2312"),
2419 build_string ("GB2312)"),
2420 build_string ("GB2312 Chinese simplified"),
2421 build_string ("gb2312"),
2423 Vcharset_japanese_jisx0208 =
2424 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2425 CHARSET_TYPE_94X94, 2, 0, 'B',
2426 CHARSET_LEFT_TO_RIGHT,
2427 build_string ("JISX0208"),
2428 build_string ("JIS X0208:1983 (Japanese)"),
2429 build_string ("JIS X0208:1983 Japanese Kanji"),
2430 build_string ("jisx0208\\.1983"),
2432 Vcharset_korean_ksc5601 =
2433 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2434 CHARSET_TYPE_94X94, 2, 0, 'C',
2435 CHARSET_LEFT_TO_RIGHT,
2436 build_string ("KSC5601"),
2437 build_string ("KSC5601 (Korean"),
2438 build_string ("KSC5601 Korean Hangul and Hanja"),
2439 build_string ("ksc5601"),
2441 Vcharset_japanese_jisx0212 =
2442 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2443 CHARSET_TYPE_94X94, 2, 0, 'D',
2444 CHARSET_LEFT_TO_RIGHT,
2445 build_string ("JISX0212"),
2446 build_string ("JISX0212 (Japanese)"),
2447 build_string ("JISX0212 Japanese Supplement"),
2448 build_string ("jisx0212"),
2451 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2452 Vcharset_chinese_cns11643_1 =
2453 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2454 CHARSET_TYPE_94X94, 2, 0, 'G',
2455 CHARSET_LEFT_TO_RIGHT,
2456 build_string ("CNS11643-1"),
2457 build_string ("CNS11643-1 (Chinese traditional)"),
2459 ("CNS 11643 Plane 1 Chinese traditional"),
2460 build_string (CHINESE_CNS_PLANE_RE("1")),
2462 Vcharset_chinese_cns11643_2 =
2463 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2464 CHARSET_TYPE_94X94, 2, 0, 'H',
2465 CHARSET_LEFT_TO_RIGHT,
2466 build_string ("CNS11643-2"),
2467 build_string ("CNS11643-2 (Chinese traditional)"),
2469 ("CNS 11643 Plane 2 Chinese traditional"),
2470 build_string (CHINESE_CNS_PLANE_RE("2")),
2473 Vcharset_latin_viscii_lower =
2474 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2475 CHARSET_TYPE_96, 1, 1, '1',
2476 CHARSET_LEFT_TO_RIGHT,
2477 build_string ("VISCII lower"),
2478 build_string ("VISCII lower (Vietnamese)"),
2479 build_string ("VISCII lower (Vietnamese)"),
2480 build_string ("MULEVISCII-LOWER"),
2482 Vcharset_latin_viscii_upper =
2483 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2484 CHARSET_TYPE_96, 1, 1, '2',
2485 CHARSET_LEFT_TO_RIGHT,
2486 build_string ("VISCII upper"),
2487 build_string ("VISCII upper (Vietnamese)"),
2488 build_string ("VISCII upper (Vietnamese)"),
2489 build_string ("MULEVISCII-UPPER"),
2491 Vcharset_latin_viscii =
2492 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2493 CHARSET_TYPE_256, 1, 2, 0,
2494 CHARSET_LEFT_TO_RIGHT,
2495 build_string ("VISCII"),
2496 build_string ("VISCII 1.1 (Vietnamese)"),
2497 build_string ("VISCII 1.1 (Vietnamese)"),
2498 build_string ("VISCII1\\.1"),
2500 Vcharset_hiragana_jisx0208 =
2501 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2502 CHARSET_TYPE_94X94, 2, 0, 'B',
2503 CHARSET_LEFT_TO_RIGHT,
2504 build_string ("Hiragana"),
2505 build_string ("Hiragana of JIS X0208"),
2506 build_string ("Japanese Hiragana of JIS X0208"),
2507 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2508 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2509 (0x24 - 33) * 94 + (0x21 - 33), 33);
2510 Vcharset_katakana_jisx0208 =
2511 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2512 CHARSET_TYPE_94X94, 2, 0, 'B',
2513 CHARSET_LEFT_TO_RIGHT,
2514 build_string ("Katakana"),
2515 build_string ("Katakana of JIS X0208"),
2516 build_string ("Japanese Katakana of JIS X0208"),
2517 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2518 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2519 (0x25 - 33) * 94 + (0x21 - 33), 33);
2521 Vcharset_chinese_big5_1 =
2522 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2523 CHARSET_TYPE_94X94, 2, 0, '0',
2524 CHARSET_LEFT_TO_RIGHT,
2525 build_string ("Big5"),
2526 build_string ("Big5 (Level-1)"),
2528 ("Big5 Level-1 Chinese traditional"),
2529 build_string ("big5"),
2531 Vcharset_chinese_big5_2 =
2532 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2533 CHARSET_TYPE_94X94, 2, 0, '1',
2534 CHARSET_LEFT_TO_RIGHT,
2535 build_string ("Big5"),
2536 build_string ("Big5 (Level-2)"),
2538 ("Big5 Level-2 Chinese traditional"),
2539 build_string ("big5"),
2542 #ifdef ENABLE_COMPOSITE_CHARS
2543 /* #### For simplicity, we put composite chars into a 96x96 charset.
2544 This is going to lead to problems because you can run out of
2545 room, esp. as we don't yet recycle numbers. */
2546 Vcharset_composite =
2547 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2548 CHARSET_TYPE_96X96, 2, 0, 0,
2549 CHARSET_LEFT_TO_RIGHT,
2550 build_string ("Composite"),
2551 build_string ("Composite characters"),
2552 build_string ("Composite characters"),
2555 composite_char_row_next = 32;
2556 composite_char_col_next = 32;
2558 Vcomposite_char_string2char_hash_table =
2559 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2560 Vcomposite_char_char2string_hash_table =
2561 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2562 staticpro (&Vcomposite_char_string2char_hash_table);
2563 staticpro (&Vcomposite_char_char2string_hash_table);
2564 #endif /* ENABLE_COMPOSITE_CHARS */