1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_hiragana_jisx0208;
66 Lisp_Object Vcharset_katakana_jisx0208;
68 Lisp_Object Vcharset_chinese_big5_1;
69 Lisp_Object Vcharset_chinese_big5_2;
71 #ifdef ENABLE_COMPOSITE_CHARS
72 Lisp_Object Vcharset_composite;
74 /* Hash tables for composite chars. One maps string representing
75 composed chars to their equivalent chars; one goes the
77 Lisp_Object Vcomposite_char_char2string_hash_table;
78 Lisp_Object Vcomposite_char_string2char_hash_table;
80 static int composite_char_row_next;
81 static int composite_char_col_next;
83 #endif /* ENABLE_COMPOSITE_CHARS */
85 /* Table of charsets indexed by leading byte. */
86 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
88 /* Table of charsets indexed by type/final-byte/direction. */
90 Lisp_Object charset_by_attributes[4][128];
92 Lisp_Object charset_by_attributes[4][128][2];
96 /* Table of number of bytes in the string representation of a character
97 indexed by the first byte of that representation.
99 rep_bytes_by_first_byte(c) is more efficient than the equivalent
100 canonical computation:
102 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
104 Bytecount rep_bytes_by_first_byte[0xA0] =
105 { /* 0x00 - 0x7f are for straight ASCII */
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 /* 0x80 - 0x8f are for Dimension-1 official charsets */
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 /* 0x90 - 0x9d are for Dimension-2 official charsets */
121 /* 0x9e is for Dimension-1 private charsets */
122 /* 0x9f is for Dimension-2 private charsets */
123 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
129 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
131 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
134 for (i = 0; i < 256; i++)
136 markobj (cte->property[i]);
142 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
144 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
145 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
148 for (i = 0; i < 256; i++)
149 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
151 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
153 if (!char_byte_table_equal (cte1->property[i],
154 cte2->property[i], depth + 1))
161 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
167 char_byte_table_hash (Lisp_Object obj, int depth)
169 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
171 return internal_array_hash (cte->property, 256, depth);
174 static const struct lrecord_description char_byte_table_description[] = {
175 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
179 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_byte_table,
180 mark_char_byte_table,
181 internal_object_printer,
182 0, char_byte_table_equal,
183 char_byte_table_hash,
184 char_byte_table_description,
185 struct Lisp_Char_Byte_Table);
189 make_char_byte_table (Lisp_Object initval)
193 struct Lisp_Char_Byte_Table *cte =
194 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
195 &lrecord_char_byte_table);
197 for (i = 0; i < 256; i++)
198 cte->property[i] = initval;
200 XSETCHAR_BYTE_TABLE (obj, cte);
205 copy_char_byte_table (Lisp_Object entry)
207 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
210 struct Lisp_Char_Byte_Table *ctenew =
211 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
212 &lrecord_char_byte_table);
214 for (i = 0; i < 256; i++)
216 Lisp_Object new = cte->property[i];
217 if (CHAR_BYTE_TABLE_P (new))
218 ctenew->property[i] = copy_char_byte_table (new);
220 ctenew->property[i] = new;
223 XSETCHAR_BYTE_TABLE (obj, ctenew);
227 #define make_char_code_table(initval) make_char_byte_table(initval)
230 get_char_code_table (Emchar ch, Lisp_Object table)
232 struct Lisp_Char_Byte_Table* cpt = XCHAR_BYTE_TABLE (table);
233 Lisp_Object ret = cpt->property [ch >> 24];
235 if (CHAR_BYTE_TABLE_P (ret))
236 cpt = XCHAR_BYTE_TABLE (ret);
240 ret = cpt->property [(unsigned char) (ch >> 16)];
241 if (CHAR_BYTE_TABLE_P (ret))
242 cpt = XCHAR_BYTE_TABLE (ret);
246 ret = cpt->property [(unsigned char) (ch >> 8)];
247 if (CHAR_BYTE_TABLE_P (ret))
248 cpt = XCHAR_BYTE_TABLE (ret);
252 return cpt->property [(unsigned char) ch];
256 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
258 struct Lisp_Char_Byte_Table* cpt1 = XCHAR_BYTE_TABLE (table);
259 Lisp_Object ret = cpt1->property[ch >> 24];
261 if (CHAR_BYTE_TABLE_P (ret))
263 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
265 ret = cpt2->property[(unsigned char)(ch >> 16)];
266 if (CHAR_BYTE_TABLE_P (ret))
268 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
270 ret = cpt3->property[(unsigned char)(ch >> 8)];
271 if (CHAR_BYTE_TABLE_P (ret))
273 struct Lisp_Char_Byte_Table* cpt4
274 = XCHAR_BYTE_TABLE (ret);
276 cpt4->property[(unsigned char)ch] = value;
278 else if (!EQ (ret, value))
280 Lisp_Object cpt4 = make_char_byte_table (ret);
282 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
283 cpt3->property[(unsigned char)(ch >> 8)] = cpt4;
286 else if (!EQ (ret, value))
288 Lisp_Object cpt3 = make_char_byte_table (ret);
289 Lisp_Object cpt4 = make_char_byte_table (ret);
291 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
292 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)]
294 cpt2->property[(unsigned char)(ch >> 16)] = cpt3;
297 else if (!EQ (ret, value))
299 Lisp_Object cpt2 = make_char_byte_table (ret);
300 Lisp_Object cpt3 = make_char_byte_table (ret);
301 Lisp_Object cpt4 = make_char_byte_table (ret);
303 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
304 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] = cpt4;
305 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(ch >> 16)] = cpt3;
306 cpt1->property[(unsigned char)(ch >> 24)] = cpt2;
311 Lisp_Object Vcharacter_attribute_table;
313 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
314 Return the alist of attributes of CHARACTER.
318 return get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
321 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
322 Return the value of CHARACTER's ATTRIBUTE.
324 (character, attribute))
327 = get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
333 if (!NILP (ccs = Ffind_charset (attribute)))
336 return Fcdr (Fassq (attribute, ret));
340 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
343 Emchar char_code = XCHAR (character);
345 = get_char_code_table (char_code, Vcharacter_attribute_table);
348 cell = Fassq (attribute, ret);
352 ret = Fcons (Fcons (attribute, value), ret);
354 else if (!EQ (Fcdr (cell), value))
356 Fsetcdr (cell, value);
358 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
362 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
363 Store CHARACTER's ATTRIBUTE with VALUE.
365 (character, attribute, value))
369 ccs = Ffind_charset (attribute);
373 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
378 /* ad-hoc method for `ascii' */
379 if ((XCHARSET_CHARS (ccs) == 94) &&
380 (XCHARSET_BYTE_OFFSET (ccs) != 33))
381 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
383 ccs_len = XCHARSET_CHARS (ccs);
386 signal_simple_error ("Invalid value for coded-charset",
390 rest = Fget_char_attribute (character, attribute);
397 Lisp_Object ei = Fcar (rest);
399 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
400 nv = XVECTOR_DATA(v)[i];
407 XVECTOR_DATA(v)[i] = Qnil;
408 v = XCHARSET_DECODING_TABLE (ccs);
413 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
420 Lisp_Object ei = Fcar (rest);
423 signal_simple_error ("Invalid value for coded-charset",
425 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
426 nv = XVECTOR_DATA(v)[i];
432 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
439 XVECTOR_DATA(v)[i] = character;
441 return put_char_attribute (character, attribute, value);
446 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
447 Store character's ATTRIBUTES.
451 Lisp_Object rest = attributes;
452 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
453 Lisp_Object character;
459 Lisp_Object cell = Fcar (rest);
463 signal_simple_error ("Invalid argument", attributes);
464 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
465 && XCHARSET_FINAL (ccs))
469 if (XCHARSET_DIMENSION (ccs) == 1)
471 Lisp_Object eb1 = Fcar (Fcdr (cell));
475 signal_simple_error ("Invalid argument", attributes);
477 switch (XCHARSET_CHARS (ccs))
481 + (XCHARSET_FINAL (ccs) - '0') * 94 + (b1 - 33);
485 + (XCHARSET_FINAL (ccs) - '0') * 96 + (b1 - 32);
491 else if (XCHARSET_DIMENSION (ccs) == 2)
493 Lisp_Object eb1 = Fcar (Fcdr (cell));
494 Lisp_Object eb2 = Fcar (Fcdr (Fcdr (cell)));
498 signal_simple_error ("Invalid argument", attributes);
501 signal_simple_error ("Invalid argument", attributes);
503 switch (XCHARSET_CHARS (ccs))
506 code = MIN_CHAR_94x94
507 + (XCHARSET_FINAL (ccs) - '0') * 94 * 94
508 + (b1 - 33) * 94 + (b2 - 33);
511 code = MIN_CHAR_96x96
512 + (XCHARSET_FINAL (ccs) - '0') * 96 * 96
513 + (b1 - 32) * 96 + (b2 - 32);
524 character = make_char (code);
525 goto setup_attributes;
531 else if (!INTP (code))
532 signal_simple_error ("Invalid argument", attributes);
534 character = make_char (XINT (code));
540 Lisp_Object cell = Fcar (rest);
543 signal_simple_error ("Invalid argument", attributes);
544 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
548 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
551 Lisp_Object Vutf_2000_version;
555 int leading_code_private_11;
558 Lisp_Object Qcharsetp;
560 /* Qdoc_string, Qdimension, Qchars defined in general.c */
561 Lisp_Object Qregistry, Qfinal, Qgraphic;
562 Lisp_Object Qdirection;
563 Lisp_Object Qreverse_direction_charset;
564 Lisp_Object Qleading_byte;
565 Lisp_Object Qshort_name, Qlong_name;
581 Qjapanese_jisx0208_1978,
593 Qvietnamese_viscii_lower,
594 Qvietnamese_viscii_upper,
602 Lisp_Object Ql2r, Qr2l;
604 Lisp_Object Vcharset_hash_table;
607 static Charset_ID next_allocated_leading_byte;
609 static Charset_ID next_allocated_1_byte_leading_byte;
610 static Charset_ID next_allocated_2_byte_leading_byte;
613 /* Composite characters are characters constructed by overstriking two
614 or more regular characters.
616 1) The old Mule implementation involves storing composite characters
617 in a buffer as a tag followed by all of the actual characters
618 used to make up the composite character. I think this is a bad
619 idea; it greatly complicates code that wants to handle strings
620 one character at a time because it has to deal with the possibility
621 of great big ungainly characters. It's much more reasonable to
622 simply store an index into a table of composite characters.
624 2) The current implementation only allows for 16,384 separate
625 composite characters over the lifetime of the XEmacs process.
626 This could become a potential problem if the user
627 edited lots of different files that use composite characters.
628 Due to FSF bogosity, increasing the number of allowable
629 composite characters under Mule would decrease the number
630 of possible faces that can exist. Mule already has shrunk
631 this to 2048, and further shrinkage would become uncomfortable.
632 No such problems exist in XEmacs.
634 Composite characters could be represented as 0x80 C1 C2 C3,
635 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
636 for slightly under 2^20 (one million) composite characters
637 over the XEmacs process lifetime, and you only need to
638 increase the size of a Mule character from 19 to 21 bits.
639 Or you could use 0x80 C1 C2 C3 C4, allowing for about
640 85 million (slightly over 2^26) composite characters. */
643 /************************************************************************/
644 /* Basic Emchar functions */
645 /************************************************************************/
647 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
648 string in STR. Returns the number of bytes stored.
649 Do not call this directly. Use the macro set_charptr_emchar() instead.
653 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
668 else if ( c <= 0x7ff )
670 *p++ = (c >> 6) | 0xc0;
671 *p++ = (c & 0x3f) | 0x80;
673 else if ( c <= 0xffff )
675 *p++ = (c >> 12) | 0xe0;
676 *p++ = ((c >> 6) & 0x3f) | 0x80;
677 *p++ = (c & 0x3f) | 0x80;
679 else if ( c <= 0x1fffff )
681 *p++ = (c >> 18) | 0xf0;
682 *p++ = ((c >> 12) & 0x3f) | 0x80;
683 *p++ = ((c >> 6) & 0x3f) | 0x80;
684 *p++ = (c & 0x3f) | 0x80;
686 else if ( c <= 0x3ffffff )
688 *p++ = (c >> 24) | 0xf8;
689 *p++ = ((c >> 18) & 0x3f) | 0x80;
690 *p++ = ((c >> 12) & 0x3f) | 0x80;
691 *p++ = ((c >> 6) & 0x3f) | 0x80;
692 *p++ = (c & 0x3f) | 0x80;
696 *p++ = (c >> 30) | 0xfc;
697 *p++ = ((c >> 24) & 0x3f) | 0x80;
698 *p++ = ((c >> 18) & 0x3f) | 0x80;
699 *p++ = ((c >> 12) & 0x3f) | 0x80;
700 *p++ = ((c >> 6) & 0x3f) | 0x80;
701 *p++ = (c & 0x3f) | 0x80;
704 BREAKUP_CHAR (c, charset, c1, c2);
705 lb = CHAR_LEADING_BYTE (c);
706 if (LEADING_BYTE_PRIVATE_P (lb))
707 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
709 if (EQ (charset, Vcharset_control_1))
718 /* Return the first character from a Mule-encoded string in STR,
719 assuming it's non-ASCII. Do not call this directly.
720 Use the macro charptr_emchar() instead. */
723 non_ascii_charptr_emchar (CONST Bufbyte *str)
736 else if ( b >= 0xf8 )
741 else if ( b >= 0xf0 )
746 else if ( b >= 0xe0 )
751 else if ( b >= 0xc0 )
761 for( ; len > 0; len-- )
764 ch = ( ch << 6 ) | ( b & 0x3f );
768 Bufbyte i0 = *str, i1, i2 = 0;
771 if (i0 == LEADING_BYTE_CONTROL_1)
772 return (Emchar) (*++str - 0x20);
774 if (LEADING_BYTE_PREFIX_P (i0))
779 charset = CHARSET_BY_LEADING_BYTE (i0);
780 if (XCHARSET_DIMENSION (charset) == 2)
783 return MAKE_CHAR (charset, i1, i2);
787 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
788 Do not call this directly. Use the macro valid_char_p() instead. */
792 non_ascii_valid_char_p (Emchar ch)
796 /* Must have only lowest 19 bits set */
800 f1 = CHAR_FIELD1 (ch);
801 f2 = CHAR_FIELD2 (ch);
802 f3 = CHAR_FIELD3 (ch);
808 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
809 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
810 f2 > MAX_CHAR_FIELD2_PRIVATE)
815 if (f3 != 0x20 && f3 != 0x7F)
819 NOTE: This takes advantage of the fact that
820 FIELD2_TO_OFFICIAL_LEADING_BYTE and
821 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
823 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
824 return (XCHARSET_CHARS (charset) == 96);
830 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
831 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
832 f1 > MAX_CHAR_FIELD1_PRIVATE)
834 if (f2 < 0x20 || f3 < 0x20)
837 #ifdef ENABLE_COMPOSITE_CHARS
838 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
840 if (UNBOUNDP (Fgethash (make_int (ch),
841 Vcomposite_char_char2string_hash_table,
846 #endif /* ENABLE_COMPOSITE_CHARS */
848 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
851 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
853 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
856 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
858 return (XCHARSET_CHARS (charset) == 96);
864 /************************************************************************/
865 /* Basic string functions */
866 /************************************************************************/
868 /* Copy the character pointed to by PTR into STR, assuming it's
869 non-ASCII. Do not call this directly. Use the macro
870 charptr_copy_char() instead. */
873 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
875 Bufbyte *strptr = str;
877 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
879 /* Notice fallthrough. */
881 case 6: *++strptr = *ptr++;
882 case 5: *++strptr = *ptr++;
884 case 4: *++strptr = *ptr++;
885 case 3: *++strptr = *ptr++;
886 case 2: *++strptr = *ptr;
891 return strptr + 1 - str;
895 /************************************************************************/
896 /* streams of Emchars */
897 /************************************************************************/
899 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
900 The functions below are not meant to be called directly; use
901 the macros in insdel.h. */
904 Lstream_get_emchar_1 (Lstream *stream, int ch)
906 Bufbyte str[MAX_EMCHAR_LEN];
907 Bufbyte *strptr = str;
909 str[0] = (Bufbyte) ch;
910 switch (REP_BYTES_BY_FIRST_BYTE (ch))
912 /* Notice fallthrough. */
915 ch = Lstream_getc (stream);
917 *++strptr = (Bufbyte) ch;
919 ch = Lstream_getc (stream);
921 *++strptr = (Bufbyte) ch;
924 ch = Lstream_getc (stream);
926 *++strptr = (Bufbyte) ch;
928 ch = Lstream_getc (stream);
930 *++strptr = (Bufbyte) ch;
932 ch = Lstream_getc (stream);
934 *++strptr = (Bufbyte) ch;
939 return charptr_emchar (str);
943 Lstream_fput_emchar (Lstream *stream, Emchar ch)
945 Bufbyte str[MAX_EMCHAR_LEN];
946 Bytecount len = set_charptr_emchar (str, ch);
947 return Lstream_write (stream, str, len);
951 Lstream_funget_emchar (Lstream *stream, Emchar ch)
953 Bufbyte str[MAX_EMCHAR_LEN];
954 Bytecount len = set_charptr_emchar (str, ch);
955 Lstream_unread (stream, str, len);
959 /************************************************************************/
961 /************************************************************************/
964 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
966 struct Lisp_Charset *cs = XCHARSET (obj);
968 markobj (cs->short_name);
969 markobj (cs->long_name);
970 markobj (cs->doc_string);
971 markobj (cs->registry);
972 markobj (cs->ccl_program);
974 markobj (cs->decoding_table);
980 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
982 struct Lisp_Charset *cs = XCHARSET (obj);
986 error ("printing unreadable object #<charset %s 0x%x>",
987 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
990 write_c_string ("#<charset ", printcharfun);
991 print_internal (CHARSET_NAME (cs), printcharfun, 0);
992 write_c_string (" ", printcharfun);
993 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
994 write_c_string (" ", printcharfun);
995 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
996 write_c_string (" ", printcharfun);
997 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
998 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
999 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1000 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1001 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1003 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1004 CHARSET_COLUMNS (cs),
1005 CHARSET_GRAPHIC (cs),
1006 CHARSET_FINAL (cs));
1007 write_c_string (buf, printcharfun);
1008 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1009 sprintf (buf, " 0x%x>", cs->header.uid);
1010 write_c_string (buf, printcharfun);
1013 static const struct lrecord_description charset_description[] = {
1014 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1016 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1021 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1022 mark_charset, print_charset, 0, 0, 0,
1023 charset_description,
1024 struct Lisp_Charset);
1026 /* Make a new charset. */
1029 make_charset (Charset_ID id, Lisp_Object name,
1030 unsigned char type, unsigned char columns, unsigned char graphic,
1031 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1032 Lisp_Object long_name, Lisp_Object doc,
1034 Lisp_Object decoding_table,
1035 Emchar ucs_min, Emchar ucs_max,
1036 Emchar code_offset, unsigned char byte_offset)
1039 struct Lisp_Charset *cs =
1040 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1041 XSETCHARSET (obj, cs);
1043 CHARSET_ID (cs) = id;
1044 CHARSET_NAME (cs) = name;
1045 CHARSET_SHORT_NAME (cs) = short_name;
1046 CHARSET_LONG_NAME (cs) = long_name;
1047 CHARSET_DIRECTION (cs) = direction;
1048 CHARSET_TYPE (cs) = type;
1049 CHARSET_COLUMNS (cs) = columns;
1050 CHARSET_GRAPHIC (cs) = graphic;
1051 CHARSET_FINAL (cs) = final;
1052 CHARSET_DOC_STRING (cs) = doc;
1053 CHARSET_REGISTRY (cs) = reg;
1054 CHARSET_CCL_PROGRAM (cs) = Qnil;
1055 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1057 CHARSET_DECODING_TABLE(cs) = Qnil;
1058 CHARSET_UCS_MIN(cs) = ucs_min;
1059 CHARSET_UCS_MAX(cs) = ucs_max;
1060 CHARSET_CODE_OFFSET(cs) = code_offset;
1061 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1064 switch (CHARSET_TYPE (cs))
1066 case CHARSET_TYPE_94:
1067 CHARSET_DIMENSION (cs) = 1;
1068 CHARSET_CHARS (cs) = 94;
1070 case CHARSET_TYPE_96:
1071 CHARSET_DIMENSION (cs) = 1;
1072 CHARSET_CHARS (cs) = 96;
1074 case CHARSET_TYPE_94X94:
1075 CHARSET_DIMENSION (cs) = 2;
1076 CHARSET_CHARS (cs) = 94;
1078 case CHARSET_TYPE_96X96:
1079 CHARSET_DIMENSION (cs) = 2;
1080 CHARSET_CHARS (cs) = 96;
1083 case CHARSET_TYPE_128:
1084 CHARSET_DIMENSION (cs) = 1;
1085 CHARSET_CHARS (cs) = 128;
1087 case CHARSET_TYPE_128X128:
1088 CHARSET_DIMENSION (cs) = 2;
1089 CHARSET_CHARS (cs) = 128;
1091 case CHARSET_TYPE_256:
1092 CHARSET_DIMENSION (cs) = 1;
1093 CHARSET_CHARS (cs) = 256;
1095 case CHARSET_TYPE_256X256:
1096 CHARSET_DIMENSION (cs) = 2;
1097 CHARSET_CHARS (cs) = 256;
1103 if (id == LEADING_BYTE_ASCII)
1104 CHARSET_REP_BYTES (cs) = 1;
1106 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1108 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1113 /* some charsets do not have final characters. This includes
1114 ASCII, Control-1, Composite, and the two faux private
1117 if (code_offset == 0)
1119 assert (NILP (charset_by_attributes[type][final]));
1120 charset_by_attributes[type][final] = obj;
1123 assert (NILP (charset_by_attributes[type][final][direction]));
1124 charset_by_attributes[type][final][direction] = obj;
1128 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1129 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1132 /* official leading byte */
1133 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1136 /* Some charsets are "faux" and don't have names or really exist at
1137 all except in the leading-byte table. */
1139 Fputhash (name, obj, Vcharset_hash_table);
1144 get_unallocated_leading_byte (int dimension)
1149 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1152 lb = next_allocated_leading_byte++;
1156 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1159 lb = next_allocated_1_byte_leading_byte++;
1163 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1166 lb = next_allocated_2_byte_leading_byte++;
1172 ("No more character sets free for this dimension",
1173 make_int (dimension));
1180 range_charset_code_point (Lisp_Object charset, Emchar ch)
1184 if ((XCHARSET_UCS_MIN (charset) <= ch)
1185 && (ch <= XCHARSET_UCS_MAX (charset)))
1187 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1189 if (XCHARSET_DIMENSION (charset) == 1)
1190 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
1191 else if (XCHARSET_DIMENSION (charset) == 2)
1192 return list2 (make_int (d / XCHARSET_CHARS (charset)
1193 + XCHARSET_BYTE_OFFSET (charset)),
1194 make_int (d % XCHARSET_CHARS (charset)
1195 + XCHARSET_BYTE_OFFSET (charset)));
1196 else if (XCHARSET_DIMENSION (charset) == 3)
1197 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1198 * XCHARSET_CHARS (charset))
1199 + XCHARSET_BYTE_OFFSET (charset)),
1200 make_int (d / XCHARSET_CHARS (charset)
1201 % XCHARSET_CHARS (charset)
1202 + XCHARSET_BYTE_OFFSET (charset)),
1203 make_int (d % XCHARSET_CHARS (charset)
1204 + XCHARSET_BYTE_OFFSET (charset)));
1205 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1206 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1207 * XCHARSET_CHARS (charset)
1208 * XCHARSET_CHARS (charset))
1209 + XCHARSET_BYTE_OFFSET (charset)),
1210 make_int (d / (XCHARSET_CHARS (charset)
1211 * XCHARSET_CHARS (charset))
1212 % XCHARSET_CHARS (charset)
1213 + XCHARSET_BYTE_OFFSET (charset)),
1214 make_int (d / XCHARSET_CHARS (charset)
1215 % XCHARSET_CHARS (charset)
1216 + XCHARSET_BYTE_OFFSET (charset)),
1217 make_int (d % XCHARSET_CHARS (charset)
1218 + XCHARSET_BYTE_OFFSET (charset)));
1220 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1222 if (XCHARSET_DIMENSION (charset) == 1)
1224 if (XCHARSET_CHARS (charset) == 94)
1226 if (((d = ch - (MIN_CHAR_94
1227 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1229 return list1 (make_int (d + 33));
1231 else if (XCHARSET_CHARS (charset) == 96)
1233 if (((d = ch - (MIN_CHAR_96
1234 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1236 return list1 (make_int (d + 32));
1241 else if (XCHARSET_DIMENSION (charset) == 2)
1243 if (XCHARSET_CHARS (charset) == 94)
1245 if (((d = ch - (MIN_CHAR_94x94
1246 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1249 return list2 ((d / 94) + 33, d % 94 + 33);
1251 else if (XCHARSET_CHARS (charset) == 96)
1253 if (((d = ch - (MIN_CHAR_96x96
1254 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1257 return list2 ((d / 96) + 32, d % 96 + 32);
1265 charset_code_point (Lisp_Object charset, Emchar ch)
1267 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1269 if (!EQ (cdef, Qnil))
1271 Lisp_Object field = Fassq (charset, cdef);
1273 if (!EQ (field, Qnil))
1274 return Fcdr (field);
1276 return range_charset_code_point (charset, ch);
1279 Lisp_Object Vdefault_coded_charset_priority_list;
1283 /************************************************************************/
1284 /* Basic charset Lisp functions */
1285 /************************************************************************/
1287 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1288 Return non-nil if OBJECT is a charset.
1292 return CHARSETP (object) ? Qt : Qnil;
1295 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1296 Retrieve the charset of the given name.
1297 If CHARSET-OR-NAME is a charset object, it is simply returned.
1298 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1299 nil is returned. Otherwise the associated charset object is returned.
1303 if (CHARSETP (charset_or_name))
1304 return charset_or_name;
1306 CHECK_SYMBOL (charset_or_name);
1307 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1310 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1311 Retrieve the charset of the given name.
1312 Same as `find-charset' except an error is signalled if there is no such
1313 charset instead of returning nil.
1317 Lisp_Object charset = Ffind_charset (name);
1320 signal_simple_error ("No such charset", name);
1324 /* We store the charsets in hash tables with the names as the key and the
1325 actual charset object as the value. Occasionally we need to use them
1326 in a list format. These routines provide us with that. */
1327 struct charset_list_closure
1329 Lisp_Object *charset_list;
1333 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1334 void *charset_list_closure)
1336 /* This function can GC */
1337 struct charset_list_closure *chcl =
1338 (struct charset_list_closure*) charset_list_closure;
1339 Lisp_Object *charset_list = chcl->charset_list;
1341 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1345 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1346 Return a list of the names of all defined charsets.
1350 Lisp_Object charset_list = Qnil;
1351 struct gcpro gcpro1;
1352 struct charset_list_closure charset_list_closure;
1354 GCPRO1 (charset_list);
1355 charset_list_closure.charset_list = &charset_list;
1356 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1357 &charset_list_closure);
1360 return charset_list;
1363 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1364 Return the name of the given charset.
1368 return XCHARSET_NAME (Fget_charset (charset));
1371 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1372 Define a new character set.
1373 This function is for use with Mule support.
1374 NAME is a symbol, the name by which the character set is normally referred.
1375 DOC-STRING is a string describing the character set.
1376 PROPS is a property list, describing the specific nature of the
1377 character set. Recognized properties are:
1379 'short-name Short version of the charset name (ex: Latin-1)
1380 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1381 'registry A regular expression matching the font registry field for
1383 'dimension Number of octets used to index a character in this charset.
1384 Either 1 or 2. Defaults to 1.
1385 'columns Number of columns used to display a character in this charset.
1386 Only used in TTY mode. (Under X, the actual width of a
1387 character can be derived from the font used to display the
1388 characters.) If unspecified, defaults to the dimension
1389 (this is almost always the correct value).
1390 'chars Number of characters in each dimension (94 or 96).
1391 Defaults to 94. Note that if the dimension is 2, the
1392 character set thus described is 94x94 or 96x96.
1393 'final Final byte of ISO 2022 escape sequence. Must be
1394 supplied. Each combination of (DIMENSION, CHARS) defines a
1395 separate namespace for final bytes. Note that ISO
1396 2022 restricts the final byte to the range
1397 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1398 dimension == 2. Note also that final bytes in the range
1399 0x30 - 0x3F are reserved for user-defined (not official)
1401 'graphic 0 (use left half of font on output) or 1 (use right half
1402 of font on output). Defaults to 0. For example, for
1403 a font whose registry is ISO8859-1, the left half
1404 (octets 0x20 - 0x7F) is the `ascii' character set, while
1405 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1406 character set. With 'graphic set to 0, the octets
1407 will have their high bit cleared; with it set to 1,
1408 the octets will have their high bit set.
1409 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1411 'ccl-program A compiled CCL program used to convert a character in
1412 this charset into an index into the font. This is in
1413 addition to the 'graphic property. The CCL program
1414 is passed the octets of the character, with the high
1415 bit cleared and set depending upon whether the value
1416 of the 'graphic property is 0 or 1.
1418 (name, doc_string, props))
1420 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1421 int direction = CHARSET_LEFT_TO_RIGHT;
1423 Lisp_Object registry = Qnil;
1424 Lisp_Object charset;
1425 Lisp_Object rest, keyword, value;
1426 Lisp_Object ccl_program = Qnil;
1427 Lisp_Object short_name = Qnil, long_name = Qnil;
1428 int byte_offset = -1;
1430 CHECK_SYMBOL (name);
1431 if (!NILP (doc_string))
1432 CHECK_STRING (doc_string);
1434 charset = Ffind_charset (name);
1435 if (!NILP (charset))
1436 signal_simple_error ("Cannot redefine existing charset", name);
1438 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1440 if (EQ (keyword, Qshort_name))
1442 CHECK_STRING (value);
1446 if (EQ (keyword, Qlong_name))
1448 CHECK_STRING (value);
1452 else if (EQ (keyword, Qdimension))
1455 dimension = XINT (value);
1456 if (dimension < 1 || dimension > 2)
1457 signal_simple_error ("Invalid value for 'dimension", value);
1460 else if (EQ (keyword, Qchars))
1463 chars = XINT (value);
1464 if (chars != 94 && chars != 96)
1465 signal_simple_error ("Invalid value for 'chars", value);
1468 else if (EQ (keyword, Qcolumns))
1471 columns = XINT (value);
1472 if (columns != 1 && columns != 2)
1473 signal_simple_error ("Invalid value for 'columns", value);
1476 else if (EQ (keyword, Qgraphic))
1479 graphic = XINT (value);
1481 if (graphic < 0 || graphic > 2)
1483 if (graphic < 0 || graphic > 1)
1485 signal_simple_error ("Invalid value for 'graphic", value);
1488 else if (EQ (keyword, Qregistry))
1490 CHECK_STRING (value);
1494 else if (EQ (keyword, Qdirection))
1496 if (EQ (value, Ql2r))
1497 direction = CHARSET_LEFT_TO_RIGHT;
1498 else if (EQ (value, Qr2l))
1499 direction = CHARSET_RIGHT_TO_LEFT;
1501 signal_simple_error ("Invalid value for 'direction", value);
1504 else if (EQ (keyword, Qfinal))
1506 CHECK_CHAR_COERCE_INT (value);
1507 final = XCHAR (value);
1508 if (final < '0' || final > '~')
1509 signal_simple_error ("Invalid value for 'final", value);
1512 else if (EQ (keyword, Qccl_program))
1514 CHECK_VECTOR (value);
1515 ccl_program = value;
1519 signal_simple_error ("Unrecognized property", keyword);
1523 error ("'final must be specified");
1524 if (dimension == 2 && final > 0x5F)
1526 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1530 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1532 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1534 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1535 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1537 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1539 id = get_unallocated_leading_byte (dimension);
1541 if (NILP (doc_string))
1542 doc_string = build_string ("");
1544 if (NILP (registry))
1545 registry = build_string ("");
1547 if (NILP (short_name))
1548 XSETSTRING (short_name, XSYMBOL (name)->name);
1550 if (NILP (long_name))
1551 long_name = doc_string;
1554 columns = dimension;
1556 if (byte_offset < 0)
1560 else if (chars == 96)
1566 charset = make_charset (id, name, type, columns, graphic,
1567 final, direction, short_name, long_name,
1568 doc_string, registry,
1569 Qnil, 0, 0, 0, byte_offset);
1570 if (!NILP (ccl_program))
1571 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1575 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1577 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1578 NEW-NAME is the name of the new charset. Return the new charset.
1580 (charset, new_name))
1582 Lisp_Object new_charset = Qnil;
1583 int id, dimension, columns, graphic, final;
1584 int direction, type;
1585 Lisp_Object registry, doc_string, short_name, long_name;
1586 struct Lisp_Charset *cs;
1588 charset = Fget_charset (charset);
1589 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1590 signal_simple_error ("Charset already has reverse-direction charset",
1593 CHECK_SYMBOL (new_name);
1594 if (!NILP (Ffind_charset (new_name)))
1595 signal_simple_error ("Cannot redefine existing charset", new_name);
1597 cs = XCHARSET (charset);
1599 type = CHARSET_TYPE (cs);
1600 columns = CHARSET_COLUMNS (cs);
1601 dimension = CHARSET_DIMENSION (cs);
1602 id = get_unallocated_leading_byte (dimension);
1604 graphic = CHARSET_GRAPHIC (cs);
1605 final = CHARSET_FINAL (cs);
1606 direction = CHARSET_RIGHT_TO_LEFT;
1607 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1608 direction = CHARSET_LEFT_TO_RIGHT;
1609 doc_string = CHARSET_DOC_STRING (cs);
1610 short_name = CHARSET_SHORT_NAME (cs);
1611 long_name = CHARSET_LONG_NAME (cs);
1612 registry = CHARSET_REGISTRY (cs);
1614 new_charset = make_charset (id, new_name, type, columns,
1615 graphic, final, direction, short_name, long_name,
1616 doc_string, registry,
1618 CHARSET_DECODING_TABLE(cs),
1619 CHARSET_UCS_MIN(cs),
1620 CHARSET_UCS_MAX(cs),
1621 CHARSET_CODE_OFFSET(cs),
1622 CHARSET_BYTE_OFFSET(cs)
1628 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1629 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1634 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1635 Define symbol ALIAS as an alias for CHARSET.
1639 CHECK_SYMBOL (alias);
1640 charset = Fget_charset (charset);
1641 return Fputhash (alias, charset, Vcharset_hash_table);
1644 /* #### Reverse direction charsets not yet implemented. */
1646 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1648 Return the reverse-direction charset parallel to CHARSET, if any.
1649 This is the charset with the same properties (in particular, the same
1650 dimension, number of characters per dimension, and final byte) as
1651 CHARSET but whose characters are displayed in the opposite direction.
1655 charset = Fget_charset (charset);
1656 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1660 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1661 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1662 If DIRECTION is omitted, both directions will be checked (left-to-right
1663 will be returned if character sets exist for both directions).
1665 (dimension, chars, final, direction))
1667 int dm, ch, fi, di = -1;
1669 Lisp_Object obj = Qnil;
1671 CHECK_INT (dimension);
1672 dm = XINT (dimension);
1673 if (dm < 1 || dm > 2)
1674 signal_simple_error ("Invalid value for DIMENSION", dimension);
1678 if (ch != 94 && ch != 96)
1679 signal_simple_error ("Invalid value for CHARS", chars);
1681 CHECK_CHAR_COERCE_INT (final);
1683 if (fi < '0' || fi > '~')
1684 signal_simple_error ("Invalid value for FINAL", final);
1686 if (EQ (direction, Ql2r))
1687 di = CHARSET_LEFT_TO_RIGHT;
1688 else if (EQ (direction, Qr2l))
1689 di = CHARSET_RIGHT_TO_LEFT;
1690 else if (!NILP (direction))
1691 signal_simple_error ("Invalid value for DIRECTION", direction);
1693 if (dm == 2 && fi > 0x5F)
1695 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1698 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1700 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1704 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1706 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1709 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1712 return XCHARSET_NAME (obj);
1716 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1717 Return short name of CHARSET.
1721 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1724 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1725 Return long name of CHARSET.
1729 return XCHARSET_LONG_NAME (Fget_charset (charset));
1732 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1733 Return description of CHARSET.
1737 return XCHARSET_DOC_STRING (Fget_charset (charset));
1740 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1741 Return dimension of CHARSET.
1745 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1748 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1749 Return property PROP of CHARSET.
1750 Recognized properties are those listed in `make-charset', as well as
1751 'name and 'doc-string.
1755 struct Lisp_Charset *cs;
1757 charset = Fget_charset (charset);
1758 cs = XCHARSET (charset);
1760 CHECK_SYMBOL (prop);
1761 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1762 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1763 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1764 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1765 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1766 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1767 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1768 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1769 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1770 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1771 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1772 if (EQ (prop, Qdirection))
1773 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1774 if (EQ (prop, Qreverse_direction_charset))
1776 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1780 return XCHARSET_NAME (obj);
1782 signal_simple_error ("Unrecognized charset property name", prop);
1783 return Qnil; /* not reached */
1786 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1787 Return charset identification number of CHARSET.
1791 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1794 /* #### We need to figure out which properties we really want to
1797 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1798 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1800 (charset, ccl_program))
1802 charset = Fget_charset (charset);
1803 CHECK_VECTOR (ccl_program);
1804 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1809 invalidate_charset_font_caches (Lisp_Object charset)
1811 /* Invalidate font cache entries for charset on all devices. */
1812 Lisp_Object devcons, concons, hash_table;
1813 DEVICE_LOOP_NO_BREAK (devcons, concons)
1815 struct device *d = XDEVICE (XCAR (devcons));
1816 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1817 if (!UNBOUNDP (hash_table))
1818 Fclrhash (hash_table);
1822 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1823 Set the 'registry property of CHARSET to REGISTRY.
1825 (charset, registry))
1827 charset = Fget_charset (charset);
1828 CHECK_STRING (registry);
1829 XCHARSET_REGISTRY (charset) = registry;
1830 invalidate_charset_font_caches (charset);
1831 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1836 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1837 Return mapping-table of CHARSET.
1841 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1844 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1845 Set mapping-table of CHARSET to TABLE.
1849 struct Lisp_Charset *cs;
1850 Lisp_Object old_table;
1853 charset = Fget_charset (charset);
1854 cs = XCHARSET (charset);
1856 if (EQ (table, Qnil))
1858 CHARSET_DECODING_TABLE(cs) = table;
1861 else if (VECTORP (table))
1863 if (XVECTOR_LENGTH (table) > CHARSET_CHARS (cs))
1864 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
1865 old_table = CHARSET_DECODING_TABLE(cs);
1866 CHARSET_DECODING_TABLE(cs) = table;
1869 signal_error (Qwrong_type_argument,
1870 list2 (build_translated_string ("vector-or-nil-p"),
1872 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
1874 switch (CHARSET_DIMENSION (cs))
1877 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1879 Lisp_Object c = XVECTOR_DATA(table)[i];
1884 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
1888 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1890 Lisp_Object v = XVECTOR_DATA(table)[i];
1896 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
1898 CHARSET_DECODING_TABLE(cs) = old_table;
1899 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
1901 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1903 Lisp_Object c = XVECTOR_DATA(v)[j];
1906 put_char_attribute (c, charset,
1909 (i + CHARSET_BYTE_OFFSET (cs)),
1911 (j + CHARSET_BYTE_OFFSET (cs))));
1915 put_char_attribute (v, charset,
1917 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
1926 /************************************************************************/
1927 /* Lisp primitives for working with characters */
1928 /************************************************************************/
1930 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1931 Make a character from CHARSET and octets ARG1 and ARG2.
1932 ARG2 is required only for characters from two-dimensional charsets.
1933 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1934 character s with caron.
1936 (charset, arg1, arg2))
1938 struct Lisp_Charset *cs;
1940 int lowlim, highlim;
1942 charset = Fget_charset (charset);
1943 cs = XCHARSET (charset);
1945 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1946 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1948 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
1950 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1951 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1954 /* It is useful (and safe, according to Olivier Galibert) to strip
1955 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1956 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1957 Latin 2 code of the character. */
1965 if (a1 < lowlim || a1 > highlim)
1966 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1968 if (CHARSET_DIMENSION (cs) == 1)
1972 ("Charset is of dimension one; second octet must be nil", arg2);
1973 return make_char (MAKE_CHAR (charset, a1, 0));
1982 a2 = XINT (arg2) & 0x7f;
1984 if (a2 < lowlim || a2 > highlim)
1985 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1987 return make_char (MAKE_CHAR (charset, a1, a2));
1990 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1991 Return the character set of char CH.
1995 CHECK_CHAR_COERCE_INT (ch);
1997 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
2000 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
2001 Return list of charset and one or two position-codes of CHAR.
2005 /* This function can GC */
2006 struct gcpro gcpro1, gcpro2;
2007 Lisp_Object charset = Qnil;
2008 Lisp_Object rc = Qnil;
2011 GCPRO2 (charset, rc);
2012 CHECK_CHAR_COERCE_INT (character);
2014 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2016 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2018 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2022 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2030 #ifdef ENABLE_COMPOSITE_CHARS
2031 /************************************************************************/
2032 /* composite character functions */
2033 /************************************************************************/
2036 lookup_composite_char (Bufbyte *str, int len)
2038 Lisp_Object lispstr = make_string (str, len);
2039 Lisp_Object ch = Fgethash (lispstr,
2040 Vcomposite_char_string2char_hash_table,
2046 if (composite_char_row_next >= 128)
2047 signal_simple_error ("No more composite chars available", lispstr);
2048 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2049 composite_char_col_next);
2050 Fputhash (make_char (emch), lispstr,
2051 Vcomposite_char_char2string_hash_table);
2052 Fputhash (lispstr, make_char (emch),
2053 Vcomposite_char_string2char_hash_table);
2054 composite_char_col_next++;
2055 if (composite_char_col_next >= 128)
2057 composite_char_col_next = 32;
2058 composite_char_row_next++;
2067 composite_char_string (Emchar ch)
2069 Lisp_Object str = Fgethash (make_char (ch),
2070 Vcomposite_char_char2string_hash_table,
2072 assert (!UNBOUNDP (str));
2076 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2077 Convert a string into a single composite character.
2078 The character is the result of overstriking all the characters in
2083 CHECK_STRING (string);
2084 return make_char (lookup_composite_char (XSTRING_DATA (string),
2085 XSTRING_LENGTH (string)));
2088 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2089 Return a string of the characters comprising a composite character.
2097 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2098 signal_simple_error ("Must be composite char", ch);
2099 return composite_char_string (emch);
2101 #endif /* ENABLE_COMPOSITE_CHARS */
2104 /************************************************************************/
2105 /* initialization */
2106 /************************************************************************/
2109 syms_of_mule_charset (void)
2111 DEFSUBR (Fcharsetp);
2112 DEFSUBR (Ffind_charset);
2113 DEFSUBR (Fget_charset);
2114 DEFSUBR (Fcharset_list);
2115 DEFSUBR (Fcharset_name);
2116 DEFSUBR (Fmake_charset);
2117 DEFSUBR (Fmake_reverse_direction_charset);
2118 /* DEFSUBR (Freverse_direction_charset); */
2119 DEFSUBR (Fdefine_charset_alias);
2120 DEFSUBR (Fcharset_from_attributes);
2121 DEFSUBR (Fcharset_short_name);
2122 DEFSUBR (Fcharset_long_name);
2123 DEFSUBR (Fcharset_description);
2124 DEFSUBR (Fcharset_dimension);
2125 DEFSUBR (Fcharset_property);
2126 DEFSUBR (Fcharset_id);
2127 DEFSUBR (Fset_charset_ccl_program);
2128 DEFSUBR (Fset_charset_registry);
2130 DEFSUBR (Fchar_attribute_alist);
2131 DEFSUBR (Fget_char_attribute);
2132 DEFSUBR (Fput_char_attribute);
2133 DEFSUBR (Fdefine_char);
2134 DEFSUBR (Fcharset_mapping_table);
2135 DEFSUBR (Fset_charset_mapping_table);
2138 DEFSUBR (Fmake_char);
2139 DEFSUBR (Fchar_charset);
2140 DEFSUBR (Fsplit_char);
2142 #ifdef ENABLE_COMPOSITE_CHARS
2143 DEFSUBR (Fmake_composite_char);
2144 DEFSUBR (Fcomposite_char_string);
2147 defsymbol (&Qcharsetp, "charsetp");
2148 defsymbol (&Qregistry, "registry");
2149 defsymbol (&Qfinal, "final");
2150 defsymbol (&Qgraphic, "graphic");
2151 defsymbol (&Qdirection, "direction");
2152 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2153 defsymbol (&Qshort_name, "short-name");
2154 defsymbol (&Qlong_name, "long-name");
2156 defsymbol (&Ql2r, "l2r");
2157 defsymbol (&Qr2l, "r2l");
2159 /* Charsets, compatible with FSF 20.3
2160 Naming convention is Script-Charset[-Edition] */
2161 defsymbol (&Qascii, "ascii");
2162 defsymbol (&Qcontrol_1, "control-1");
2163 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2164 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2165 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2166 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2167 defsymbol (&Qthai_tis620, "thai-tis620");
2168 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2169 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2170 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2171 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2172 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2173 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2174 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2175 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2176 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2177 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2178 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2179 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2180 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2181 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2183 defsymbol (&Qucs, "ucs");
2184 defsymbol (&Qucs_bmp, "ucs-bmp");
2185 defsymbol (&Qlatin_viscii, "latin-viscii");
2186 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2187 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2188 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2189 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2190 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2191 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2193 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2194 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2196 defsymbol (&Qcomposite, "composite");
2200 vars_of_mule_charset (void)
2207 /* Table of charsets indexed by leading byte. */
2208 for (i = 0; i < countof (charset_by_leading_byte); i++)
2209 charset_by_leading_byte[i] = Qnil;
2212 /* Table of charsets indexed by type/final-byte. */
2213 for (i = 0; i < countof (charset_by_attributes); i++)
2214 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2215 charset_by_attributes[i][j] = Qnil;
2217 /* Table of charsets indexed by type/final-byte/direction. */
2218 for (i = 0; i < countof (charset_by_attributes); i++)
2219 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2220 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2221 charset_by_attributes[i][j][k] = Qnil;
2225 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2227 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2228 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2232 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2233 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2234 Leading-code of private TYPE9N charset of column-width 1.
2236 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2240 Vutf_2000_version = build_string("0.11 (Shiki)");
2241 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2242 Version number of UTF-2000.
2245 staticpro (&Vcharacter_attribute_table);
2246 Vcharacter_attribute_table = make_char_code_table (Qnil);
2248 Vdefault_coded_charset_priority_list = Qnil;
2249 DEFVAR_LISP ("default-coded-charset-priority-list",
2250 &Vdefault_coded_charset_priority_list /*
2251 Default order of preferred coded-character-sets.
2257 complex_vars_of_mule_charset (void)
2259 staticpro (&Vcharset_hash_table);
2260 Vcharset_hash_table =
2261 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2263 /* Predefined character sets. We store them into variables for
2268 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2269 CHARSET_TYPE_256X256, 1, 2, 0,
2270 CHARSET_LEFT_TO_RIGHT,
2271 build_string ("BMP"),
2272 build_string ("BMP"),
2273 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2274 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2275 Qnil, 0, 0xFFFF, 0, 0);
2277 # define MIN_CHAR_THAI 0
2278 # define MAX_CHAR_THAI 0
2279 # define MIN_CHAR_GREEK 0
2280 # define MAX_CHAR_GREEK 0
2281 # define MIN_CHAR_HEBREW 0
2282 # define MAX_CHAR_HEBREW 0
2283 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2284 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2285 # define MIN_CHAR_CYRILLIC 0
2286 # define MAX_CHAR_CYRILLIC 0
2289 make_charset (LEADING_BYTE_ASCII, Qascii,
2290 CHARSET_TYPE_94, 1, 0, 'B',
2291 CHARSET_LEFT_TO_RIGHT,
2292 build_string ("ASCII"),
2293 build_string ("ASCII)"),
2294 build_string ("ASCII (ISO646 IRV)"),
2295 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2296 Qnil, 0, 0x7F, 0, 0);
2297 Vcharset_control_1 =
2298 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2299 CHARSET_TYPE_94, 1, 1, 0,
2300 CHARSET_LEFT_TO_RIGHT,
2301 build_string ("C1"),
2302 build_string ("Control characters"),
2303 build_string ("Control characters 128-191"),
2305 Qnil, 0x80, 0x9F, 0, 0);
2306 Vcharset_latin_iso8859_1 =
2307 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2308 CHARSET_TYPE_96, 1, 1, 'A',
2309 CHARSET_LEFT_TO_RIGHT,
2310 build_string ("Latin-1"),
2311 build_string ("ISO8859-1 (Latin-1)"),
2312 build_string ("ISO8859-1 (Latin-1)"),
2313 build_string ("iso8859-1"),
2314 Qnil, 0xA0, 0xFF, 0, 32);
2315 Vcharset_latin_iso8859_2 =
2316 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2317 CHARSET_TYPE_96, 1, 1, 'B',
2318 CHARSET_LEFT_TO_RIGHT,
2319 build_string ("Latin-2"),
2320 build_string ("ISO8859-2 (Latin-2)"),
2321 build_string ("ISO8859-2 (Latin-2)"),
2322 build_string ("iso8859-2"),
2324 Vcharset_latin_iso8859_3 =
2325 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2326 CHARSET_TYPE_96, 1, 1, 'C',
2327 CHARSET_LEFT_TO_RIGHT,
2328 build_string ("Latin-3"),
2329 build_string ("ISO8859-3 (Latin-3)"),
2330 build_string ("ISO8859-3 (Latin-3)"),
2331 build_string ("iso8859-3"),
2333 Vcharset_latin_iso8859_4 =
2334 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2335 CHARSET_TYPE_96, 1, 1, 'D',
2336 CHARSET_LEFT_TO_RIGHT,
2337 build_string ("Latin-4"),
2338 build_string ("ISO8859-4 (Latin-4)"),
2339 build_string ("ISO8859-4 (Latin-4)"),
2340 build_string ("iso8859-4"),
2342 Vcharset_thai_tis620 =
2343 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2344 CHARSET_TYPE_96, 1, 1, 'T',
2345 CHARSET_LEFT_TO_RIGHT,
2346 build_string ("TIS620"),
2347 build_string ("TIS620 (Thai)"),
2348 build_string ("TIS620.2529 (Thai)"),
2349 build_string ("tis620"),
2350 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2351 Vcharset_greek_iso8859_7 =
2352 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2353 CHARSET_TYPE_96, 1, 1, 'F',
2354 CHARSET_LEFT_TO_RIGHT,
2355 build_string ("ISO8859-7"),
2356 build_string ("ISO8859-7 (Greek)"),
2357 build_string ("ISO8859-7 (Greek)"),
2358 build_string ("iso8859-7"),
2359 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2360 Vcharset_arabic_iso8859_6 =
2361 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2362 CHARSET_TYPE_96, 1, 1, 'G',
2363 CHARSET_RIGHT_TO_LEFT,
2364 build_string ("ISO8859-6"),
2365 build_string ("ISO8859-6 (Arabic)"),
2366 build_string ("ISO8859-6 (Arabic)"),
2367 build_string ("iso8859-6"),
2369 Vcharset_hebrew_iso8859_8 =
2370 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2371 CHARSET_TYPE_96, 1, 1, 'H',
2372 CHARSET_RIGHT_TO_LEFT,
2373 build_string ("ISO8859-8"),
2374 build_string ("ISO8859-8 (Hebrew)"),
2375 build_string ("ISO8859-8 (Hebrew)"),
2376 build_string ("iso8859-8"),
2377 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2378 Vcharset_katakana_jisx0201 =
2379 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2380 CHARSET_TYPE_94, 1, 1, 'I',
2381 CHARSET_LEFT_TO_RIGHT,
2382 build_string ("JISX0201 Kana"),
2383 build_string ("JISX0201.1976 (Japanese Kana)"),
2384 build_string ("JISX0201.1976 Japanese Kana"),
2385 build_string ("jisx0201\\.1976"),
2387 MIN_CHAR_HALFWIDTH_KATAKANA,
2388 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2389 Vcharset_latin_jisx0201 =
2390 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2391 CHARSET_TYPE_94, 1, 0, 'J',
2392 CHARSET_LEFT_TO_RIGHT,
2393 build_string ("JISX0201 Roman"),
2394 build_string ("JISX0201.1976 (Japanese Roman)"),
2395 build_string ("JISX0201.1976 Japanese Roman"),
2396 build_string ("jisx0201\\.1976"),
2398 Vcharset_cyrillic_iso8859_5 =
2399 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2400 CHARSET_TYPE_96, 1, 1, 'L',
2401 CHARSET_LEFT_TO_RIGHT,
2402 build_string ("ISO8859-5"),
2403 build_string ("ISO8859-5 (Cyrillic)"),
2404 build_string ("ISO8859-5 (Cyrillic)"),
2405 build_string ("iso8859-5"),
2406 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2407 Vcharset_latin_iso8859_9 =
2408 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2409 CHARSET_TYPE_96, 1, 1, 'M',
2410 CHARSET_LEFT_TO_RIGHT,
2411 build_string ("Latin-5"),
2412 build_string ("ISO8859-9 (Latin-5)"),
2413 build_string ("ISO8859-9 (Latin-5)"),
2414 build_string ("iso8859-9"),
2416 Vcharset_japanese_jisx0208_1978 =
2417 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2418 CHARSET_TYPE_94X94, 2, 0, '@',
2419 CHARSET_LEFT_TO_RIGHT,
2420 build_string ("JIS X0208:1978"),
2421 build_string ("JIS X0208:1978 (Japanese)"),
2423 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2424 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2426 Vcharset_chinese_gb2312 =
2427 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2428 CHARSET_TYPE_94X94, 2, 0, 'A',
2429 CHARSET_LEFT_TO_RIGHT,
2430 build_string ("GB2312"),
2431 build_string ("GB2312)"),
2432 build_string ("GB2312 Chinese simplified"),
2433 build_string ("gb2312"),
2435 Vcharset_japanese_jisx0208 =
2436 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2437 CHARSET_TYPE_94X94, 2, 0, 'B',
2438 CHARSET_LEFT_TO_RIGHT,
2439 build_string ("JISX0208"),
2440 build_string ("JIS X0208:1983 (Japanese)"),
2441 build_string ("JIS X0208:1983 Japanese Kanji"),
2442 build_string ("jisx0208\\.1983"),
2444 Vcharset_korean_ksc5601 =
2445 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2446 CHARSET_TYPE_94X94, 2, 0, 'C',
2447 CHARSET_LEFT_TO_RIGHT,
2448 build_string ("KSC5601"),
2449 build_string ("KSC5601 (Korean"),
2450 build_string ("KSC5601 Korean Hangul and Hanja"),
2451 build_string ("ksc5601"),
2453 Vcharset_japanese_jisx0212 =
2454 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2455 CHARSET_TYPE_94X94, 2, 0, 'D',
2456 CHARSET_LEFT_TO_RIGHT,
2457 build_string ("JISX0212"),
2458 build_string ("JISX0212 (Japanese)"),
2459 build_string ("JISX0212 Japanese Supplement"),
2460 build_string ("jisx0212"),
2463 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2464 Vcharset_chinese_cns11643_1 =
2465 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2466 CHARSET_TYPE_94X94, 2, 0, 'G',
2467 CHARSET_LEFT_TO_RIGHT,
2468 build_string ("CNS11643-1"),
2469 build_string ("CNS11643-1 (Chinese traditional)"),
2471 ("CNS 11643 Plane 1 Chinese traditional"),
2472 build_string (CHINESE_CNS_PLANE_RE("1")),
2474 Vcharset_chinese_cns11643_2 =
2475 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2476 CHARSET_TYPE_94X94, 2, 0, 'H',
2477 CHARSET_LEFT_TO_RIGHT,
2478 build_string ("CNS11643-2"),
2479 build_string ("CNS11643-2 (Chinese traditional)"),
2481 ("CNS 11643 Plane 2 Chinese traditional"),
2482 build_string (CHINESE_CNS_PLANE_RE("2")),
2485 Vcharset_latin_viscii_lower =
2486 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2487 CHARSET_TYPE_96, 1, 1, '1',
2488 CHARSET_LEFT_TO_RIGHT,
2489 build_string ("VISCII lower"),
2490 build_string ("VISCII lower (Vietnamese)"),
2491 build_string ("VISCII lower (Vietnamese)"),
2492 build_string ("MULEVISCII-LOWER"),
2494 Vcharset_latin_viscii_upper =
2495 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2496 CHARSET_TYPE_96, 1, 1, '2',
2497 CHARSET_LEFT_TO_RIGHT,
2498 build_string ("VISCII upper"),
2499 build_string ("VISCII upper (Vietnamese)"),
2500 build_string ("VISCII upper (Vietnamese)"),
2501 build_string ("MULEVISCII-UPPER"),
2503 Vcharset_latin_viscii =
2504 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2505 CHARSET_TYPE_256, 1, 2, 0,
2506 CHARSET_LEFT_TO_RIGHT,
2507 build_string ("VISCII"),
2508 build_string ("VISCII 1.1 (Vietnamese)"),
2509 build_string ("VISCII 1.1 (Vietnamese)"),
2510 build_string ("VISCII1\\.1"),
2512 Vcharset_hiragana_jisx0208 =
2513 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2514 CHARSET_TYPE_94X94, 2, 0, 'B',
2515 CHARSET_LEFT_TO_RIGHT,
2516 build_string ("Hiragana"),
2517 build_string ("Hiragana of JIS X0208"),
2518 build_string ("Japanese Hiragana of JIS X0208"),
2519 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2520 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2521 (0x24 - 33) * 94 + (0x21 - 33), 33);
2522 Vcharset_katakana_jisx0208 =
2523 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2524 CHARSET_TYPE_94X94, 2, 0, 'B',
2525 CHARSET_LEFT_TO_RIGHT,
2526 build_string ("Katakana"),
2527 build_string ("Katakana of JIS X0208"),
2528 build_string ("Japanese Katakana of JIS X0208"),
2529 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2530 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2531 (0x25 - 33) * 94 + (0x21 - 33), 33);
2533 Vcharset_chinese_big5_1 =
2534 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2535 CHARSET_TYPE_94X94, 2, 0, '0',
2536 CHARSET_LEFT_TO_RIGHT,
2537 build_string ("Big5"),
2538 build_string ("Big5 (Level-1)"),
2540 ("Big5 Level-1 Chinese traditional"),
2541 build_string ("big5"),
2543 Vcharset_chinese_big5_2 =
2544 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2545 CHARSET_TYPE_94X94, 2, 0, '1',
2546 CHARSET_LEFT_TO_RIGHT,
2547 build_string ("Big5"),
2548 build_string ("Big5 (Level-2)"),
2550 ("Big5 Level-2 Chinese traditional"),
2551 build_string ("big5"),
2554 #ifdef ENABLE_COMPOSITE_CHARS
2555 /* #### For simplicity, we put composite chars into a 96x96 charset.
2556 This is going to lead to problems because you can run out of
2557 room, esp. as we don't yet recycle numbers. */
2558 Vcharset_composite =
2559 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2560 CHARSET_TYPE_96X96, 2, 0, 0,
2561 CHARSET_LEFT_TO_RIGHT,
2562 build_string ("Composite"),
2563 build_string ("Composite characters"),
2564 build_string ("Composite characters"),
2567 composite_char_row_next = 32;
2568 composite_char_col_next = 32;
2570 Vcomposite_char_string2char_hash_table =
2571 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2572 Vcomposite_char_char2string_hash_table =
2573 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2574 staticpro (&Vcomposite_char_string2char_hash_table);
2575 staticpro (&Vcomposite_char_char2string_hash_table);
2576 #endif /* ENABLE_COMPOSITE_CHARS */