1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
5 This file is part of XEmacs.
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* Synched up with: FSF 20.3. Not in FSF. */
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
37 /* The various pre-defined charsets. */
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_ucs_bmp;
62 Lisp_Object Vcharset_latin_viscii;
63 Lisp_Object Vcharset_latin_viscii_lower;
64 Lisp_Object Vcharset_latin_viscii_upper;
65 Lisp_Object Vcharset_hiragana_jisx0208;
66 Lisp_Object Vcharset_katakana_jisx0208;
68 Lisp_Object Vcharset_chinese_big5_1;
69 Lisp_Object Vcharset_chinese_big5_2;
71 #ifdef ENABLE_COMPOSITE_CHARS
72 Lisp_Object Vcharset_composite;
74 /* Hash tables for composite chars. One maps string representing
75 composed chars to their equivalent chars; one goes the
77 Lisp_Object Vcomposite_char_char2string_hash_table;
78 Lisp_Object Vcomposite_char_string2char_hash_table;
80 static int composite_char_row_next;
81 static int composite_char_col_next;
83 #endif /* ENABLE_COMPOSITE_CHARS */
85 /* Table of charsets indexed by leading byte. */
86 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
88 /* Table of charsets indexed by type/final-byte/direction. */
90 Lisp_Object charset_by_attributes[4][128];
92 Lisp_Object charset_by_attributes[4][128][2];
96 /* Table of number of bytes in the string representation of a character
97 indexed by the first byte of that representation.
99 rep_bytes_by_first_byte(c) is more efficient than the equivalent
100 canonical computation:
102 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
104 Bytecount rep_bytes_by_first_byte[0xA0] =
105 { /* 0x00 - 0x7f are for straight ASCII */
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
114 /* 0x80 - 0x8f are for Dimension-1 official charsets */
116 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
118 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 /* 0x90 - 0x9d are for Dimension-2 official charsets */
121 /* 0x9e is for Dimension-1 private charsets */
122 /* 0x9f is for Dimension-2 private charsets */
123 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
129 mark_char_byte_table (Lisp_Object obj, void (*markobj) (Lisp_Object))
131 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
134 for (i = 0; i < 256; i++)
136 markobj (cte->property[i]);
142 char_byte_table_equal (Lisp_Object obj1, Lisp_Object obj2, int depth)
144 struct Lisp_Char_Byte_Table *cte1 = XCHAR_BYTE_TABLE (obj1);
145 struct Lisp_Char_Byte_Table *cte2 = XCHAR_BYTE_TABLE (obj2);
148 for (i = 0; i < 256; i++)
149 if (CHAR_BYTE_TABLE_P (cte1->property[i]))
151 if (CHAR_BYTE_TABLE_P (cte2->property[i]))
153 if (!char_byte_table_equal (cte1->property[i],
154 cte2->property[i], depth + 1))
161 if (!internal_equal (cte1->property[i], cte2->property[i], depth + 1))
167 char_byte_table_hash (Lisp_Object obj, int depth)
169 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (obj);
171 return internal_array_hash (cte->property, 256, depth);
174 static const struct lrecord_description char_byte_table_description[] = {
175 { XD_LISP_OBJECT, offsetof(struct Lisp_Char_Byte_Table, property), 256 },
179 DEFINE_LRECORD_IMPLEMENTATION ("char-code-table", char_byte_table,
180 mark_char_byte_table,
181 internal_object_printer,
182 0, char_byte_table_equal,
183 char_byte_table_hash,
184 char_byte_table_description,
185 struct Lisp_Char_Byte_Table);
189 make_char_byte_table (Lisp_Object initval)
193 struct Lisp_Char_Byte_Table *cte =
194 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
195 &lrecord_char_byte_table);
197 for (i = 0; i < 256; i++)
198 cte->property[i] = initval;
200 XSETCHAR_BYTE_TABLE (obj, cte);
205 copy_char_byte_table (Lisp_Object entry)
207 struct Lisp_Char_Byte_Table *cte = XCHAR_BYTE_TABLE (entry);
210 struct Lisp_Char_Byte_Table *ctenew =
211 alloc_lcrecord_type (struct Lisp_Char_Byte_Table,
212 &lrecord_char_byte_table);
214 for (i = 0; i < 256; i++)
216 Lisp_Object new = cte->property[i];
217 if (CHAR_BYTE_TABLE_P (new))
218 ctenew->property[i] = copy_char_byte_table (new);
220 ctenew->property[i] = new;
223 XSETCHAR_BYTE_TABLE (obj, ctenew);
227 #define make_char_code_table(initval) make_char_byte_table(initval)
230 get_char_code_table (Emchar ch, Lisp_Object table)
232 struct Lisp_Char_Byte_Table* cpt = XCHAR_BYTE_TABLE (table);
233 Lisp_Object ret = cpt->property [ch >> 24];
235 if (CHAR_BYTE_TABLE_P (ret))
236 cpt = XCHAR_BYTE_TABLE (ret);
240 ret = cpt->property [(unsigned char) (ch >> 16)];
241 if (CHAR_BYTE_TABLE_P (ret))
242 cpt = XCHAR_BYTE_TABLE (ret);
246 ret = cpt->property [(unsigned char) (ch >> 8)];
247 if (CHAR_BYTE_TABLE_P (ret))
248 cpt = XCHAR_BYTE_TABLE (ret);
252 return cpt->property [(unsigned char) ch];
256 put_char_code_table (Emchar ch, Lisp_Object value, Lisp_Object table)
258 struct Lisp_Char_Byte_Table* cpt1 = XCHAR_BYTE_TABLE (table);
259 Lisp_Object ret = cpt1->property[ch >> 24];
261 if (CHAR_BYTE_TABLE_P (ret))
263 struct Lisp_Char_Byte_Table* cpt2 = XCHAR_BYTE_TABLE (ret);
265 ret = cpt2->property[(unsigned char)(ch >> 16)];
266 if (CHAR_BYTE_TABLE_P (ret))
268 struct Lisp_Char_Byte_Table* cpt3 = XCHAR_BYTE_TABLE (ret);
270 ret = cpt3->property[(unsigned char)(ch >> 8)];
271 if (CHAR_BYTE_TABLE_P (ret))
273 struct Lisp_Char_Byte_Table* cpt4
274 = XCHAR_BYTE_TABLE (ret);
276 cpt4->property[(unsigned char)ch] = value;
278 else if (!EQ (ret, value))
280 Lisp_Object cpt4 = make_char_byte_table (ret);
282 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
283 cpt3->property[(unsigned char)(ch >> 8)] = cpt4;
286 else if (!EQ (ret, value))
288 Lisp_Object cpt3 = make_char_byte_table (ret);
289 Lisp_Object cpt4 = make_char_byte_table (ret);
291 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
292 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)]
294 cpt2->property[(unsigned char)(ch >> 16)] = cpt3;
297 else if (!EQ (ret, value))
299 Lisp_Object cpt2 = make_char_byte_table (ret);
300 Lisp_Object cpt3 = make_char_byte_table (ret);
301 Lisp_Object cpt4 = make_char_byte_table (ret);
303 XCHAR_BYTE_TABLE(cpt4)->property[(unsigned char)ch] = value;
304 XCHAR_BYTE_TABLE(cpt3)->property[(unsigned char)(ch >> 8)] = cpt4;
305 XCHAR_BYTE_TABLE(cpt2)->property[(unsigned char)(ch >> 16)] = cpt3;
306 cpt1->property[(unsigned char)(ch >> 24)] = cpt2;
311 Lisp_Object Vcharacter_attribute_table;
313 DEFUN ("char-attribute-alist", Fchar_attribute_alist, 1, 1, 0, /*
314 Return the alist of attributes of CHARACTER.
318 return get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
321 DEFUN ("get-char-attribute", Fget_char_attribute, 2, 2, 0, /*
322 Return the value of CHARACTER's ATTRIBUTE.
324 (character, attribute))
327 = get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
333 if (!NILP (ccs = Ffind_charset (attribute)))
336 return Fcdr (Fassq (attribute, ret));
340 put_char_attribute (Lisp_Object character, Lisp_Object attribute,
343 Emchar char_code = XCHAR (character);
345 = get_char_code_table (char_code, Vcharacter_attribute_table);
348 cell = Fassq (attribute, ret);
352 ret = Fcons (Fcons (attribute, value), ret);
354 else if (!EQ (Fcdr (cell), value))
356 Fsetcdr (cell, value);
358 put_char_code_table (char_code, ret, Vcharacter_attribute_table);
362 DEFUN ("put-char-attribute", Fput_char_attribute, 3, 3, 0, /*
363 Store CHARACTER's ATTRIBUTE with VALUE.
365 (character, attribute, value))
369 ccs = Ffind_charset (attribute);
373 Lisp_Object v = XCHARSET_DECODING_TABLE (ccs);
378 /* ad-hoc method for `ascii' */
379 if ((XCHARSET_CHARS (ccs) == 94) &&
380 (XCHARSET_BYTE_OFFSET (ccs) != 33))
381 ccs_len = 128 - XCHARSET_BYTE_OFFSET (ccs);
383 ccs_len = XCHARSET_CHARS (ccs);
386 signal_simple_error ("Invalid value for coded-charset",
390 rest = Fget_char_attribute (character, attribute);
397 Lisp_Object ei = Fcar (rest);
399 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
400 nv = XVECTOR_DATA(v)[i];
407 XVECTOR_DATA(v)[i] = Qnil;
408 v = XCHARSET_DECODING_TABLE (ccs);
413 XCHARSET_DECODING_TABLE (ccs) = v = make_vector (ccs_len, Qnil);
420 Lisp_Object ei = Fcar (rest);
423 signal_simple_error ("Invalid value for coded-charset",
425 i = XINT (ei) - XCHARSET_BYTE_OFFSET (ccs);
426 nv = XVECTOR_DATA(v)[i];
432 nv = (XVECTOR_DATA(v)[i] = make_vector (ccs_len, Qnil));
439 XVECTOR_DATA(v)[i] = character;
441 return put_char_attribute (character, attribute, value);
446 DEFUN ("define-char", Fdefine_char, 1, 1, 0, /*
447 Store character's ATTRIBUTES.
451 Lisp_Object rest = attributes;
452 Lisp_Object code = Fcdr (Fassq (Qucs, attributes));
453 Lisp_Object character;
459 Lisp_Object cell = Fcar (rest);
463 signal_simple_error ("Invalid argument", attributes);
464 if (!NILP (ccs = Ffind_charset (Fcar (cell)))
465 && XCHARSET_FINAL (ccs))
469 if (XCHARSET_DIMENSION (ccs) == 1)
471 Lisp_Object eb1 = Fcar (Fcdr (cell));
475 signal_simple_error ("Invalid argument", attributes);
477 switch (XCHARSET_CHARS (ccs))
481 + (XCHARSET_FINAL (ccs) - '0') * 94 + (b1 - 33);
485 + (XCHARSET_FINAL (ccs) - '0') * 96 + (b1 - 32);
491 else if (XCHARSET_DIMENSION (ccs) == 2)
493 Lisp_Object eb1 = Fcar (Fcdr (cell));
494 Lisp_Object eb2 = Fcar (Fcdr (Fcdr (cell)));
498 signal_simple_error ("Invalid argument", attributes);
501 signal_simple_error ("Invalid argument", attributes);
503 switch (XCHARSET_CHARS (ccs))
506 code = MIN_CHAR_94x94
507 + (XCHARSET_FINAL (ccs) - '0') * 94 * 94
508 + (b1 - 33) * 94 + (b2 - 33);
511 code = MIN_CHAR_96x96
512 + (XCHARSET_FINAL (ccs) - '0') * 96 * 96
513 + (b1 - 32) * 96 + (b2 - 32);
524 character = make_char (code);
525 goto setup_attributes;
531 else if (!INTP (code))
532 signal_simple_error ("Invalid argument", attributes);
534 character = make_char (XINT (code));
540 Lisp_Object cell = Fcar (rest);
543 signal_simple_error ("Invalid argument", attributes);
544 Fput_char_attribute (character, Fcar (cell), Fcdr (cell));
548 get_char_code_table (XCHAR (character), Vcharacter_attribute_table);
551 Lisp_Object Vutf_2000_version;
555 int leading_code_private_11;
558 Lisp_Object Qcharsetp;
560 /* Qdoc_string, Qdimension, Qchars defined in general.c */
561 Lisp_Object Qregistry, Qfinal, Qgraphic;
562 Lisp_Object Qdirection;
563 Lisp_Object Qreverse_direction_charset;
564 Lisp_Object Qleading_byte;
565 Lisp_Object Qshort_name, Qlong_name;
581 Qjapanese_jisx0208_1978,
593 Qvietnamese_viscii_lower,
594 Qvietnamese_viscii_upper,
602 Lisp_Object Ql2r, Qr2l;
604 Lisp_Object Vcharset_hash_table;
607 static Charset_ID next_allocated_leading_byte;
609 static Charset_ID next_allocated_1_byte_leading_byte;
610 static Charset_ID next_allocated_2_byte_leading_byte;
613 /* Composite characters are characters constructed by overstriking two
614 or more regular characters.
616 1) The old Mule implementation involves storing composite characters
617 in a buffer as a tag followed by all of the actual characters
618 used to make up the composite character. I think this is a bad
619 idea; it greatly complicates code that wants to handle strings
620 one character at a time because it has to deal with the possibility
621 of great big ungainly characters. It's much more reasonable to
622 simply store an index into a table of composite characters.
624 2) The current implementation only allows for 16,384 separate
625 composite characters over the lifetime of the XEmacs process.
626 This could become a potential problem if the user
627 edited lots of different files that use composite characters.
628 Due to FSF bogosity, increasing the number of allowable
629 composite characters under Mule would decrease the number
630 of possible faces that can exist. Mule already has shrunk
631 this to 2048, and further shrinkage would become uncomfortable.
632 No such problems exist in XEmacs.
634 Composite characters could be represented as 0x80 C1 C2 C3,
635 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
636 for slightly under 2^20 (one million) composite characters
637 over the XEmacs process lifetime, and you only need to
638 increase the size of a Mule character from 19 to 21 bits.
639 Or you could use 0x80 C1 C2 C3 C4, allowing for about
640 85 million (slightly over 2^26) composite characters. */
643 /************************************************************************/
644 /* Basic Emchar functions */
645 /************************************************************************/
647 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
648 string in STR. Returns the number of bytes stored.
649 Do not call this directly. Use the macro set_charptr_emchar() instead.
653 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
668 else if ( c <= 0x7ff )
670 *p++ = (c >> 6) | 0xc0;
671 *p++ = (c & 0x3f) | 0x80;
673 else if ( c <= 0xffff )
675 *p++ = (c >> 12) | 0xe0;
676 *p++ = ((c >> 6) & 0x3f) | 0x80;
677 *p++ = (c & 0x3f) | 0x80;
679 else if ( c <= 0x1fffff )
681 *p++ = (c >> 18) | 0xf0;
682 *p++ = ((c >> 12) & 0x3f) | 0x80;
683 *p++ = ((c >> 6) & 0x3f) | 0x80;
684 *p++ = (c & 0x3f) | 0x80;
686 else if ( c <= 0x3ffffff )
688 *p++ = (c >> 24) | 0xf8;
689 *p++ = ((c >> 18) & 0x3f) | 0x80;
690 *p++ = ((c >> 12) & 0x3f) | 0x80;
691 *p++ = ((c >> 6) & 0x3f) | 0x80;
692 *p++ = (c & 0x3f) | 0x80;
696 *p++ = (c >> 30) | 0xfc;
697 *p++ = ((c >> 24) & 0x3f) | 0x80;
698 *p++ = ((c >> 18) & 0x3f) | 0x80;
699 *p++ = ((c >> 12) & 0x3f) | 0x80;
700 *p++ = ((c >> 6) & 0x3f) | 0x80;
701 *p++ = (c & 0x3f) | 0x80;
704 BREAKUP_CHAR (c, charset, c1, c2);
705 lb = CHAR_LEADING_BYTE (c);
706 if (LEADING_BYTE_PRIVATE_P (lb))
707 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
709 if (EQ (charset, Vcharset_control_1))
718 /* Return the first character from a Mule-encoded string in STR,
719 assuming it's non-ASCII. Do not call this directly.
720 Use the macro charptr_emchar() instead. */
723 non_ascii_charptr_emchar (CONST Bufbyte *str)
736 else if ( b >= 0xf8 )
741 else if ( b >= 0xf0 )
746 else if ( b >= 0xe0 )
751 else if ( b >= 0xc0 )
761 for( ; len > 0; len-- )
764 ch = ( ch << 6 ) | ( b & 0x3f );
768 Bufbyte i0 = *str, i1, i2 = 0;
771 if (i0 == LEADING_BYTE_CONTROL_1)
772 return (Emchar) (*++str - 0x20);
774 if (LEADING_BYTE_PREFIX_P (i0))
779 charset = CHARSET_BY_LEADING_BYTE (i0);
780 if (XCHARSET_DIMENSION (charset) == 2)
783 return MAKE_CHAR (charset, i1, i2);
787 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
788 Do not call this directly. Use the macro valid_char_p() instead. */
792 non_ascii_valid_char_p (Emchar ch)
796 /* Must have only lowest 19 bits set */
800 f1 = CHAR_FIELD1 (ch);
801 f2 = CHAR_FIELD2 (ch);
802 f3 = CHAR_FIELD3 (ch);
808 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
809 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
810 f2 > MAX_CHAR_FIELD2_PRIVATE)
815 if (f3 != 0x20 && f3 != 0x7F)
819 NOTE: This takes advantage of the fact that
820 FIELD2_TO_OFFICIAL_LEADING_BYTE and
821 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
823 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
824 return (XCHARSET_CHARS (charset) == 96);
830 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
831 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
832 f1 > MAX_CHAR_FIELD1_PRIVATE)
834 if (f2 < 0x20 || f3 < 0x20)
837 #ifdef ENABLE_COMPOSITE_CHARS
838 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
840 if (UNBOUNDP (Fgethash (make_int (ch),
841 Vcomposite_char_char2string_hash_table,
846 #endif /* ENABLE_COMPOSITE_CHARS */
848 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
851 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
853 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
856 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
858 return (XCHARSET_CHARS (charset) == 96);
864 /************************************************************************/
865 /* Basic string functions */
866 /************************************************************************/
868 /* Copy the character pointed to by PTR into STR, assuming it's
869 non-ASCII. Do not call this directly. Use the macro
870 charptr_copy_char() instead. */
873 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
875 Bufbyte *strptr = str;
877 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
879 /* Notice fallthrough. */
881 case 6: *++strptr = *ptr++;
882 case 5: *++strptr = *ptr++;
884 case 4: *++strptr = *ptr++;
885 case 3: *++strptr = *ptr++;
886 case 2: *++strptr = *ptr;
891 return strptr + 1 - str;
895 /************************************************************************/
896 /* streams of Emchars */
897 /************************************************************************/
899 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
900 The functions below are not meant to be called directly; use
901 the macros in insdel.h. */
904 Lstream_get_emchar_1 (Lstream *stream, int ch)
906 Bufbyte str[MAX_EMCHAR_LEN];
907 Bufbyte *strptr = str;
909 str[0] = (Bufbyte) ch;
910 switch (REP_BYTES_BY_FIRST_BYTE (ch))
912 /* Notice fallthrough. */
915 ch = Lstream_getc (stream);
917 *++strptr = (Bufbyte) ch;
919 ch = Lstream_getc (stream);
921 *++strptr = (Bufbyte) ch;
924 ch = Lstream_getc (stream);
926 *++strptr = (Bufbyte) ch;
928 ch = Lstream_getc (stream);
930 *++strptr = (Bufbyte) ch;
932 ch = Lstream_getc (stream);
934 *++strptr = (Bufbyte) ch;
939 return charptr_emchar (str);
943 Lstream_fput_emchar (Lstream *stream, Emchar ch)
945 Bufbyte str[MAX_EMCHAR_LEN];
946 Bytecount len = set_charptr_emchar (str, ch);
947 return Lstream_write (stream, str, len);
951 Lstream_funget_emchar (Lstream *stream, Emchar ch)
953 Bufbyte str[MAX_EMCHAR_LEN];
954 Bytecount len = set_charptr_emchar (str, ch);
955 Lstream_unread (stream, str, len);
959 /************************************************************************/
961 /************************************************************************/
964 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
966 struct Lisp_Charset *cs = XCHARSET (obj);
968 markobj (cs->short_name);
969 markobj (cs->long_name);
970 markobj (cs->doc_string);
971 markobj (cs->registry);
972 markobj (cs->ccl_program);
974 markobj (cs->decoding_table);
980 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
982 struct Lisp_Charset *cs = XCHARSET (obj);
986 error ("printing unreadable object #<charset %s 0x%x>",
987 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
990 write_c_string ("#<charset ", printcharfun);
991 print_internal (CHARSET_NAME (cs), printcharfun, 0);
992 write_c_string (" ", printcharfun);
993 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
994 write_c_string (" ", printcharfun);
995 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
996 write_c_string (" ", printcharfun);
997 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
998 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
999 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
1000 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
1001 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
1003 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
1004 CHARSET_COLUMNS (cs),
1005 CHARSET_GRAPHIC (cs),
1006 CHARSET_FINAL (cs));
1007 write_c_string (buf, printcharfun);
1008 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
1009 sprintf (buf, " 0x%x>", cs->header.uid);
1010 write_c_string (buf, printcharfun);
1013 static const struct lrecord_description charset_description[] = {
1014 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
1016 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, decoding_table), 2 },
1021 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
1022 mark_charset, print_charset, 0, 0, 0,
1023 charset_description,
1024 struct Lisp_Charset);
1026 /* Make a new charset. */
1029 make_charset (Charset_ID id, Lisp_Object name,
1030 unsigned char type, unsigned char columns, unsigned char graphic,
1031 Bufbyte final, unsigned char direction, Lisp_Object short_name,
1032 Lisp_Object long_name, Lisp_Object doc,
1034 Lisp_Object decoding_table,
1035 Emchar ucs_min, Emchar ucs_max,
1036 Emchar code_offset, unsigned char byte_offset)
1039 struct Lisp_Charset *cs =
1040 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
1041 XSETCHARSET (obj, cs);
1043 CHARSET_ID (cs) = id;
1044 CHARSET_NAME (cs) = name;
1045 CHARSET_SHORT_NAME (cs) = short_name;
1046 CHARSET_LONG_NAME (cs) = long_name;
1047 CHARSET_DIRECTION (cs) = direction;
1048 CHARSET_TYPE (cs) = type;
1049 CHARSET_COLUMNS (cs) = columns;
1050 CHARSET_GRAPHIC (cs) = graphic;
1051 CHARSET_FINAL (cs) = final;
1052 CHARSET_DOC_STRING (cs) = doc;
1053 CHARSET_REGISTRY (cs) = reg;
1054 CHARSET_CCL_PROGRAM (cs) = Qnil;
1055 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
1057 CHARSET_DECODING_TABLE(cs) = Qnil;
1058 CHARSET_UCS_MIN(cs) = ucs_min;
1059 CHARSET_UCS_MAX(cs) = ucs_max;
1060 CHARSET_CODE_OFFSET(cs) = code_offset;
1061 CHARSET_BYTE_OFFSET(cs) = byte_offset;
1064 switch (CHARSET_TYPE (cs))
1066 case CHARSET_TYPE_94:
1067 CHARSET_DIMENSION (cs) = 1;
1068 CHARSET_CHARS (cs) = 94;
1070 case CHARSET_TYPE_96:
1071 CHARSET_DIMENSION (cs) = 1;
1072 CHARSET_CHARS (cs) = 96;
1074 case CHARSET_TYPE_94X94:
1075 CHARSET_DIMENSION (cs) = 2;
1076 CHARSET_CHARS (cs) = 94;
1078 case CHARSET_TYPE_96X96:
1079 CHARSET_DIMENSION (cs) = 2;
1080 CHARSET_CHARS (cs) = 96;
1083 case CHARSET_TYPE_128:
1084 CHARSET_DIMENSION (cs) = 1;
1085 CHARSET_CHARS (cs) = 128;
1087 case CHARSET_TYPE_128X128:
1088 CHARSET_DIMENSION (cs) = 2;
1089 CHARSET_CHARS (cs) = 128;
1091 case CHARSET_TYPE_256:
1092 CHARSET_DIMENSION (cs) = 1;
1093 CHARSET_CHARS (cs) = 256;
1095 case CHARSET_TYPE_256X256:
1096 CHARSET_DIMENSION (cs) = 2;
1097 CHARSET_CHARS (cs) = 256;
1103 if (id == LEADING_BYTE_ASCII)
1104 CHARSET_REP_BYTES (cs) = 1;
1106 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 1;
1108 CHARSET_REP_BYTES (cs) = CHARSET_DIMENSION (cs) + 2;
1113 /* some charsets do not have final characters. This includes
1114 ASCII, Control-1, Composite, and the two faux private
1117 if (code_offset == 0)
1119 assert (NILP (charset_by_attributes[type][final]));
1120 charset_by_attributes[type][final] = obj;
1123 assert (NILP (charset_by_attributes[type][final][direction]));
1124 charset_by_attributes[type][final][direction] = obj;
1128 assert (NILP (charset_by_leading_byte[id - MIN_LEADING_BYTE]));
1129 charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
1132 /* official leading byte */
1133 rep_bytes_by_first_byte[id] = CHARSET_REP_BYTES (cs);
1136 /* Some charsets are "faux" and don't have names or really exist at
1137 all except in the leading-byte table. */
1139 Fputhash (name, obj, Vcharset_hash_table);
1144 get_unallocated_leading_byte (int dimension)
1149 if (next_allocated_leading_byte > MAX_LEADING_BYTE_PRIVATE)
1152 lb = next_allocated_leading_byte++;
1156 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
1159 lb = next_allocated_1_byte_leading_byte++;
1163 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
1166 lb = next_allocated_2_byte_leading_byte++;
1172 ("No more character sets free for this dimension",
1173 make_int (dimension));
1180 range_charset_code_point (Lisp_Object charset, Emchar ch)
1184 if ((XCHARSET_UCS_MIN (charset) <= ch)
1185 && (ch <= XCHARSET_UCS_MAX (charset)))
1187 d = ch - XCHARSET_UCS_MIN (charset) + XCHARSET_CODE_OFFSET (charset);
1189 if (XCHARSET_DIMENSION (charset) == 1)
1190 return list1 (make_int (d + XCHARSET_BYTE_OFFSET (charset)));
1191 else if (XCHARSET_DIMENSION (charset) == 2)
1192 return list2 (make_int (d / XCHARSET_CHARS (charset)
1193 + XCHARSET_BYTE_OFFSET (charset)),
1194 make_int (d % XCHARSET_CHARS (charset)
1195 + XCHARSET_BYTE_OFFSET (charset)));
1196 else if (XCHARSET_DIMENSION (charset) == 3)
1197 return list3 (make_int (d / (XCHARSET_CHARS (charset)
1198 * XCHARSET_CHARS (charset))
1199 + XCHARSET_BYTE_OFFSET (charset)),
1200 make_int (d / XCHARSET_CHARS (charset)
1201 % XCHARSET_CHARS (charset)
1202 + XCHARSET_BYTE_OFFSET (charset)),
1203 make_int (d % XCHARSET_CHARS (charset)
1204 + XCHARSET_BYTE_OFFSET (charset)));
1205 else /* if (XCHARSET_DIMENSION (charset) == 4) */
1206 return list4 (make_int (d / (XCHARSET_CHARS (charset)
1207 * XCHARSET_CHARS (charset)
1208 * XCHARSET_CHARS (charset))
1209 + XCHARSET_BYTE_OFFSET (charset)),
1210 make_int (d / (XCHARSET_CHARS (charset)
1211 * XCHARSET_CHARS (charset))
1212 % XCHARSET_CHARS (charset)
1213 + XCHARSET_BYTE_OFFSET (charset)),
1214 make_int (d / XCHARSET_CHARS (charset)
1215 % XCHARSET_CHARS (charset)
1216 + XCHARSET_BYTE_OFFSET (charset)),
1217 make_int (d % XCHARSET_CHARS (charset)
1218 + XCHARSET_BYTE_OFFSET (charset)));
1220 else if (XCHARSET_CODE_OFFSET (charset) == 0)
1222 if (XCHARSET_DIMENSION (charset) == 1)
1224 if (XCHARSET_CHARS (charset) == 94)
1226 if (((d = ch - (MIN_CHAR_94
1227 + (XCHARSET_FINAL (charset) - '0') * 94)) >= 0)
1229 return list1 (make_int (d + 33));
1231 else if (XCHARSET_CHARS (charset) == 96)
1233 if (((d = ch - (MIN_CHAR_96
1234 + (XCHARSET_FINAL (charset) - '0') * 96)) >= 0)
1236 return list1 (make_int (d + 32));
1241 else if (XCHARSET_DIMENSION (charset) == 2)
1243 if (XCHARSET_CHARS (charset) == 94)
1245 if (((d = ch - (MIN_CHAR_94x94
1246 + (XCHARSET_FINAL (charset) - '0') * 94 * 94))
1249 return list2 ((d / 94) + 33, d % 94 + 33);
1251 else if (XCHARSET_CHARS (charset) == 96)
1253 if (((d = ch - (MIN_CHAR_96x96
1254 + (XCHARSET_FINAL (charset) - '0') * 96 * 96))
1257 return list2 ((d / 96) + 32, d % 96 + 32);
1265 charset_code_point (Lisp_Object charset, Emchar ch)
1267 Lisp_Object cdef = get_char_code_table (ch, Vcharacter_attribute_table);
1269 if (!EQ (cdef, Qnil))
1271 Lisp_Object field = Fassq (charset, cdef);
1273 if (!EQ (field, Qnil))
1274 return Fcdr (field);
1276 return range_charset_code_point (charset, ch);
1279 Lisp_Object Vdefault_coded_charset_priority_list;
1283 /************************************************************************/
1284 /* Basic charset Lisp functions */
1285 /************************************************************************/
1287 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
1288 Return non-nil if OBJECT is a charset.
1292 return CHARSETP (object) ? Qt : Qnil;
1295 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
1296 Retrieve the charset of the given name.
1297 If CHARSET-OR-NAME is a charset object, it is simply returned.
1298 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
1299 nil is returned. Otherwise the associated charset object is returned.
1303 if (CHARSETP (charset_or_name))
1304 return charset_or_name;
1306 CHECK_SYMBOL (charset_or_name);
1307 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
1310 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
1311 Retrieve the charset of the given name.
1312 Same as `find-charset' except an error is signalled if there is no such
1313 charset instead of returning nil.
1317 Lisp_Object charset = Ffind_charset (name);
1320 signal_simple_error ("No such charset", name);
1324 /* We store the charsets in hash tables with the names as the key and the
1325 actual charset object as the value. Occasionally we need to use them
1326 in a list format. These routines provide us with that. */
1327 struct charset_list_closure
1329 Lisp_Object *charset_list;
1333 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
1334 void *charset_list_closure)
1336 /* This function can GC */
1337 struct charset_list_closure *chcl =
1338 (struct charset_list_closure*) charset_list_closure;
1339 Lisp_Object *charset_list = chcl->charset_list;
1341 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
1345 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
1346 Return a list of the names of all defined charsets.
1350 Lisp_Object charset_list = Qnil;
1351 struct gcpro gcpro1;
1352 struct charset_list_closure charset_list_closure;
1354 GCPRO1 (charset_list);
1355 charset_list_closure.charset_list = &charset_list;
1356 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
1357 &charset_list_closure);
1360 return charset_list;
1363 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
1364 Return the name of the given charset.
1368 return XCHARSET_NAME (Fget_charset (charset));
1371 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
1372 Define a new character set.
1373 This function is for use with Mule support.
1374 NAME is a symbol, the name by which the character set is normally referred.
1375 DOC-STRING is a string describing the character set.
1376 PROPS is a property list, describing the specific nature of the
1377 character set. Recognized properties are:
1379 'short-name Short version of the charset name (ex: Latin-1)
1380 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
1381 'registry A regular expression matching the font registry field for
1383 'dimension Number of octets used to index a character in this charset.
1384 Either 1 or 2. Defaults to 1.
1385 'columns Number of columns used to display a character in this charset.
1386 Only used in TTY mode. (Under X, the actual width of a
1387 character can be derived from the font used to display the
1388 characters.) If unspecified, defaults to the dimension
1389 (this is almost always the correct value).
1390 'chars Number of characters in each dimension (94 or 96).
1391 Defaults to 94. Note that if the dimension is 2, the
1392 character set thus described is 94x94 or 96x96.
1393 'final Final byte of ISO 2022 escape sequence. Must be
1394 supplied. Each combination of (DIMENSION, CHARS) defines a
1395 separate namespace for final bytes. Note that ISO
1396 2022 restricts the final byte to the range
1397 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
1398 dimension == 2. Note also that final bytes in the range
1399 0x30 - 0x3F are reserved for user-defined (not official)
1401 'graphic 0 (use left half of font on output) or 1 (use right half
1402 of font on output). Defaults to 0. For example, for
1403 a font whose registry is ISO8859-1, the left half
1404 (octets 0x20 - 0x7F) is the `ascii' character set, while
1405 the right half (octets 0xA0 - 0xFF) is the `latin-1'
1406 character set. With 'graphic set to 0, the octets
1407 will have their high bit cleared; with it set to 1,
1408 the octets will have their high bit set.
1409 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
1411 'ccl-program A compiled CCL program used to convert a character in
1412 this charset into an index into the font. This is in
1413 addition to the 'graphic property. The CCL program
1414 is passed the octets of the character, with the high
1415 bit cleared and set depending upon whether the value
1416 of the 'graphic property is 0 or 1.
1418 (name, doc_string, props))
1420 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
1421 int direction = CHARSET_LEFT_TO_RIGHT;
1423 Lisp_Object registry = Qnil;
1424 Lisp_Object charset;
1425 Lisp_Object rest, keyword, value;
1426 Lisp_Object ccl_program = Qnil;
1427 Lisp_Object short_name = Qnil, long_name = Qnil;
1428 unsigned char byte_offset = 0;
1430 CHECK_SYMBOL (name);
1431 if (!NILP (doc_string))
1432 CHECK_STRING (doc_string);
1434 charset = Ffind_charset (name);
1435 if (!NILP (charset))
1436 signal_simple_error ("Cannot redefine existing charset", name);
1438 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
1440 if (EQ (keyword, Qshort_name))
1442 CHECK_STRING (value);
1446 if (EQ (keyword, Qlong_name))
1448 CHECK_STRING (value);
1452 else if (EQ (keyword, Qdimension))
1455 dimension = XINT (value);
1456 if (dimension < 1 || dimension > 2)
1457 signal_simple_error ("Invalid value for 'dimension", value);
1460 else if (EQ (keyword, Qchars))
1463 chars = XINT (value);
1464 if (chars != 94 && chars != 96)
1465 signal_simple_error ("Invalid value for 'chars", value);
1468 else if (EQ (keyword, Qcolumns))
1471 columns = XINT (value);
1472 if (columns != 1 && columns != 2)
1473 signal_simple_error ("Invalid value for 'columns", value);
1476 else if (EQ (keyword, Qgraphic))
1479 graphic = XINT (value);
1481 if (graphic < 0 || graphic > 2)
1483 if (graphic < 0 || graphic > 1)
1485 signal_simple_error ("Invalid value for 'graphic", value);
1488 else if (EQ (keyword, Qregistry))
1490 CHECK_STRING (value);
1494 else if (EQ (keyword, Qdirection))
1496 if (EQ (value, Ql2r))
1497 direction = CHARSET_LEFT_TO_RIGHT;
1498 else if (EQ (value, Qr2l))
1499 direction = CHARSET_RIGHT_TO_LEFT;
1501 signal_simple_error ("Invalid value for 'direction", value);
1504 else if (EQ (keyword, Qfinal))
1506 CHECK_CHAR_COERCE_INT (value);
1507 final = XCHAR (value);
1508 if (final < '0' || final > '~')
1509 signal_simple_error ("Invalid value for 'final", value);
1512 else if (EQ (keyword, Qccl_program))
1514 CHECK_VECTOR (value);
1515 ccl_program = value;
1519 signal_simple_error ("Unrecognized property", keyword);
1523 error ("'final must be specified");
1524 if (dimension == 2 && final > 0x5F)
1526 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
1530 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1532 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1534 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
1535 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
1537 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
1539 id = get_unallocated_leading_byte (dimension);
1541 if (NILP (doc_string))
1542 doc_string = build_string ("");
1544 if (NILP (registry))
1545 registry = build_string ("");
1547 if (NILP (short_name))
1548 XSETSTRING (short_name, XSYMBOL (name)->name);
1550 if (NILP (long_name))
1551 long_name = doc_string;
1554 columns = dimension;
1555 charset = make_charset (id, name, type, columns, graphic,
1556 final, direction, short_name, long_name,
1557 doc_string, registry,
1558 Qnil, 0, 0, 0, byte_offset);
1559 if (!NILP (ccl_program))
1560 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1564 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
1566 Make a charset equivalent to CHARSET but which goes in the opposite direction.
1567 NEW-NAME is the name of the new charset. Return the new charset.
1569 (charset, new_name))
1571 Lisp_Object new_charset = Qnil;
1572 int id, dimension, columns, graphic, final;
1573 int direction, type;
1574 Lisp_Object registry, doc_string, short_name, long_name;
1575 struct Lisp_Charset *cs;
1577 charset = Fget_charset (charset);
1578 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
1579 signal_simple_error ("Charset already has reverse-direction charset",
1582 CHECK_SYMBOL (new_name);
1583 if (!NILP (Ffind_charset (new_name)))
1584 signal_simple_error ("Cannot redefine existing charset", new_name);
1586 cs = XCHARSET (charset);
1588 type = CHARSET_TYPE (cs);
1589 columns = CHARSET_COLUMNS (cs);
1590 dimension = CHARSET_DIMENSION (cs);
1591 id = get_unallocated_leading_byte (dimension);
1593 graphic = CHARSET_GRAPHIC (cs);
1594 final = CHARSET_FINAL (cs);
1595 direction = CHARSET_RIGHT_TO_LEFT;
1596 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
1597 direction = CHARSET_LEFT_TO_RIGHT;
1598 doc_string = CHARSET_DOC_STRING (cs);
1599 short_name = CHARSET_SHORT_NAME (cs);
1600 long_name = CHARSET_LONG_NAME (cs);
1601 registry = CHARSET_REGISTRY (cs);
1603 new_charset = make_charset (id, new_name, type, columns,
1604 graphic, final, direction, short_name, long_name,
1605 doc_string, registry,
1607 CHARSET_DECODING_TABLE(cs),
1608 CHARSET_UCS_MIN(cs),
1609 CHARSET_UCS_MAX(cs),
1610 CHARSET_CODE_OFFSET(cs),
1611 CHARSET_BYTE_OFFSET(cs)
1617 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
1618 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
1623 DEFUN ("define-charset-alias", Fdefine_charset_alias, 2, 2, 0, /*
1624 Define symbol ALIAS as an alias for CHARSET.
1628 CHECK_SYMBOL (alias);
1629 charset = Fget_charset (charset);
1630 return Fputhash (alias, charset, Vcharset_hash_table);
1633 /* #### Reverse direction charsets not yet implemented. */
1635 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
1637 Return the reverse-direction charset parallel to CHARSET, if any.
1638 This is the charset with the same properties (in particular, the same
1639 dimension, number of characters per dimension, and final byte) as
1640 CHARSET but whose characters are displayed in the opposite direction.
1644 charset = Fget_charset (charset);
1645 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
1649 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
1650 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
1651 If DIRECTION is omitted, both directions will be checked (left-to-right
1652 will be returned if character sets exist for both directions).
1654 (dimension, chars, final, direction))
1656 int dm, ch, fi, di = -1;
1658 Lisp_Object obj = Qnil;
1660 CHECK_INT (dimension);
1661 dm = XINT (dimension);
1662 if (dm < 1 || dm > 2)
1663 signal_simple_error ("Invalid value for DIMENSION", dimension);
1667 if (ch != 94 && ch != 96)
1668 signal_simple_error ("Invalid value for CHARS", chars);
1670 CHECK_CHAR_COERCE_INT (final);
1672 if (fi < '0' || fi > '~')
1673 signal_simple_error ("Invalid value for FINAL", final);
1675 if (EQ (direction, Ql2r))
1676 di = CHARSET_LEFT_TO_RIGHT;
1677 else if (EQ (direction, Qr2l))
1678 di = CHARSET_RIGHT_TO_LEFT;
1679 else if (!NILP (direction))
1680 signal_simple_error ("Invalid value for DIRECTION", direction);
1682 if (dm == 2 && fi > 0x5F)
1684 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
1687 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
1689 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
1693 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
1695 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
1698 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
1701 return XCHARSET_NAME (obj);
1705 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
1706 Return short name of CHARSET.
1710 return XCHARSET_SHORT_NAME (Fget_charset (charset));
1713 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
1714 Return long name of CHARSET.
1718 return XCHARSET_LONG_NAME (Fget_charset (charset));
1721 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
1722 Return description of CHARSET.
1726 return XCHARSET_DOC_STRING (Fget_charset (charset));
1729 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
1730 Return dimension of CHARSET.
1734 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
1737 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
1738 Return property PROP of CHARSET.
1739 Recognized properties are those listed in `make-charset', as well as
1740 'name and 'doc-string.
1744 struct Lisp_Charset *cs;
1746 charset = Fget_charset (charset);
1747 cs = XCHARSET (charset);
1749 CHECK_SYMBOL (prop);
1750 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
1751 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
1752 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
1753 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
1754 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
1755 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
1756 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
1757 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
1758 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
1759 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
1760 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1761 if (EQ (prop, Qdirection))
1762 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1763 if (EQ (prop, Qreverse_direction_charset))
1765 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1769 return XCHARSET_NAME (obj);
1771 signal_simple_error ("Unrecognized charset property name", prop);
1772 return Qnil; /* not reached */
1775 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1776 Return charset identification number of CHARSET.
1780 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1783 /* #### We need to figure out which properties we really want to
1786 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1787 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1789 (charset, ccl_program))
1791 charset = Fget_charset (charset);
1792 CHECK_VECTOR (ccl_program);
1793 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1798 invalidate_charset_font_caches (Lisp_Object charset)
1800 /* Invalidate font cache entries for charset on all devices. */
1801 Lisp_Object devcons, concons, hash_table;
1802 DEVICE_LOOP_NO_BREAK (devcons, concons)
1804 struct device *d = XDEVICE (XCAR (devcons));
1805 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1806 if (!UNBOUNDP (hash_table))
1807 Fclrhash (hash_table);
1811 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1812 Set the 'registry property of CHARSET to REGISTRY.
1814 (charset, registry))
1816 charset = Fget_charset (charset);
1817 CHECK_STRING (registry);
1818 XCHARSET_REGISTRY (charset) = registry;
1819 invalidate_charset_font_caches (charset);
1820 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1825 DEFUN ("charset-mapping-table", Fcharset_mapping_table, 1, 1, 0, /*
1826 Return mapping-table of CHARSET.
1830 return XCHARSET_DECODING_TABLE (Fget_charset (charset));
1833 DEFUN ("set-charset-mapping-table", Fset_charset_mapping_table, 2, 2, 0, /*
1834 Set mapping-table of CHARSET to TABLE.
1838 struct Lisp_Charset *cs;
1839 Lisp_Object old_table;
1842 charset = Fget_charset (charset);
1843 cs = XCHARSET (charset);
1845 if (EQ (table, Qnil))
1847 CHARSET_DECODING_TABLE(cs) = table;
1850 else if (VECTORP (table))
1852 if (XVECTOR_LENGTH (table) > CHARSET_CHARS (cs))
1853 args_out_of_range (table, make_int (CHARSET_CHARS (cs)));
1854 old_table = CHARSET_DECODING_TABLE(cs);
1855 CHARSET_DECODING_TABLE(cs) = table;
1858 signal_error (Qwrong_type_argument,
1859 list2 (build_translated_string ("vector-or-nil-p"),
1861 /* signal_simple_error ("Wrong type argument: vector-or-nil-p", table); */
1863 switch (CHARSET_DIMENSION (cs))
1866 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1868 Lisp_Object c = XVECTOR_DATA(table)[i];
1873 list1 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
1877 for (i = 0; i < XVECTOR_LENGTH (table); i++)
1879 Lisp_Object v = XVECTOR_DATA(table)[i];
1885 if (XVECTOR_LENGTH (v) > CHARSET_CHARS (cs))
1887 CHARSET_DECODING_TABLE(cs) = old_table;
1888 args_out_of_range (v, make_int (CHARSET_CHARS (cs)));
1890 for (j = 0; j < XVECTOR_LENGTH (v); j++)
1892 Lisp_Object c = XVECTOR_DATA(v)[j];
1895 put_char_attribute (c, charset,
1898 (i + CHARSET_BYTE_OFFSET (cs)),
1900 (j + CHARSET_BYTE_OFFSET (cs))));
1904 put_char_attribute (v, charset,
1906 (make_int (i + CHARSET_BYTE_OFFSET (cs))));
1915 /************************************************************************/
1916 /* Lisp primitives for working with characters */
1917 /************************************************************************/
1919 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1920 Make a character from CHARSET and octets ARG1 and ARG2.
1921 ARG2 is required only for characters from two-dimensional charsets.
1922 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1923 character s with caron.
1925 (charset, arg1, arg2))
1927 struct Lisp_Charset *cs;
1929 int lowlim, highlim;
1931 charset = Fget_charset (charset);
1932 cs = XCHARSET (charset);
1934 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1935 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1937 else if (CHARSET_CHARS (cs) == 256) lowlim = 0, highlim = 255;
1939 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1940 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1943 /* It is useful (and safe, according to Olivier Galibert) to strip
1944 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1945 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1946 Latin 2 code of the character. */
1954 if (a1 < lowlim || a1 > highlim)
1955 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1957 if (CHARSET_DIMENSION (cs) == 1)
1961 ("Charset is of dimension one; second octet must be nil", arg2);
1962 return make_char (MAKE_CHAR (charset, a1, 0));
1971 a2 = XINT (arg2) & 0x7f;
1973 if (a2 < lowlim || a2 > highlim)
1974 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1976 return make_char (MAKE_CHAR (charset, a1, a2));
1979 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1980 Return the character set of char CH.
1984 CHECK_CHAR_COERCE_INT (ch);
1986 return XCHARSET_NAME (CHAR_CHARSET (XCHAR (ch)));
1989 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1990 Return list of charset and one or two position-codes of CHAR.
1994 /* This function can GC */
1995 struct gcpro gcpro1, gcpro2;
1996 Lisp_Object charset = Qnil;
1997 Lisp_Object rc = Qnil;
2000 GCPRO2 (charset, rc);
2001 CHECK_CHAR_COERCE_INT (character);
2003 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
2005 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
2007 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
2011 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
2019 #ifdef ENABLE_COMPOSITE_CHARS
2020 /************************************************************************/
2021 /* composite character functions */
2022 /************************************************************************/
2025 lookup_composite_char (Bufbyte *str, int len)
2027 Lisp_Object lispstr = make_string (str, len);
2028 Lisp_Object ch = Fgethash (lispstr,
2029 Vcomposite_char_string2char_hash_table,
2035 if (composite_char_row_next >= 128)
2036 signal_simple_error ("No more composite chars available", lispstr);
2037 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
2038 composite_char_col_next);
2039 Fputhash (make_char (emch), lispstr,
2040 Vcomposite_char_char2string_hash_table);
2041 Fputhash (lispstr, make_char (emch),
2042 Vcomposite_char_string2char_hash_table);
2043 composite_char_col_next++;
2044 if (composite_char_col_next >= 128)
2046 composite_char_col_next = 32;
2047 composite_char_row_next++;
2056 composite_char_string (Emchar ch)
2058 Lisp_Object str = Fgethash (make_char (ch),
2059 Vcomposite_char_char2string_hash_table,
2061 assert (!UNBOUNDP (str));
2065 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
2066 Convert a string into a single composite character.
2067 The character is the result of overstriking all the characters in
2072 CHECK_STRING (string);
2073 return make_char (lookup_composite_char (XSTRING_DATA (string),
2074 XSTRING_LENGTH (string)));
2077 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
2078 Return a string of the characters comprising a composite character.
2086 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
2087 signal_simple_error ("Must be composite char", ch);
2088 return composite_char_string (emch);
2090 #endif /* ENABLE_COMPOSITE_CHARS */
2093 /************************************************************************/
2094 /* initialization */
2095 /************************************************************************/
2098 syms_of_mule_charset (void)
2100 DEFSUBR (Fcharsetp);
2101 DEFSUBR (Ffind_charset);
2102 DEFSUBR (Fget_charset);
2103 DEFSUBR (Fcharset_list);
2104 DEFSUBR (Fcharset_name);
2105 DEFSUBR (Fmake_charset);
2106 DEFSUBR (Fmake_reverse_direction_charset);
2107 /* DEFSUBR (Freverse_direction_charset); */
2108 DEFSUBR (Fdefine_charset_alias);
2109 DEFSUBR (Fcharset_from_attributes);
2110 DEFSUBR (Fcharset_short_name);
2111 DEFSUBR (Fcharset_long_name);
2112 DEFSUBR (Fcharset_description);
2113 DEFSUBR (Fcharset_dimension);
2114 DEFSUBR (Fcharset_property);
2115 DEFSUBR (Fcharset_id);
2116 DEFSUBR (Fset_charset_ccl_program);
2117 DEFSUBR (Fset_charset_registry);
2119 DEFSUBR (Fchar_attribute_alist);
2120 DEFSUBR (Fget_char_attribute);
2121 DEFSUBR (Fput_char_attribute);
2122 DEFSUBR (Fdefine_char);
2123 DEFSUBR (Fcharset_mapping_table);
2124 DEFSUBR (Fset_charset_mapping_table);
2127 DEFSUBR (Fmake_char);
2128 DEFSUBR (Fchar_charset);
2129 DEFSUBR (Fsplit_char);
2131 #ifdef ENABLE_COMPOSITE_CHARS
2132 DEFSUBR (Fmake_composite_char);
2133 DEFSUBR (Fcomposite_char_string);
2136 defsymbol (&Qcharsetp, "charsetp");
2137 defsymbol (&Qregistry, "registry");
2138 defsymbol (&Qfinal, "final");
2139 defsymbol (&Qgraphic, "graphic");
2140 defsymbol (&Qdirection, "direction");
2141 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
2142 defsymbol (&Qshort_name, "short-name");
2143 defsymbol (&Qlong_name, "long-name");
2145 defsymbol (&Ql2r, "l2r");
2146 defsymbol (&Qr2l, "r2l");
2148 /* Charsets, compatible with FSF 20.3
2149 Naming convention is Script-Charset[-Edition] */
2150 defsymbol (&Qascii, "ascii");
2151 defsymbol (&Qcontrol_1, "control-1");
2152 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
2153 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
2154 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
2155 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
2156 defsymbol (&Qthai_tis620, "thai-tis620");
2157 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
2158 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
2159 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
2160 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
2161 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
2162 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
2163 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
2164 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
2165 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
2166 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
2167 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
2168 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
2169 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
2170 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
2172 defsymbol (&Qucs, "ucs");
2173 defsymbol (&Qucs_bmp, "ucs-bmp");
2174 defsymbol (&Qlatin_viscii, "latin-viscii");
2175 defsymbol (&Qlatin_viscii_lower, "latin-viscii-lower");
2176 defsymbol (&Qlatin_viscii_upper, "latin-viscii-upper");
2177 defsymbol (&Qvietnamese_viscii_lower, "vietnamese-viscii-lower");
2178 defsymbol (&Qvietnamese_viscii_upper, "vietnamese-viscii-upper");
2179 defsymbol (&Qhiragana_jisx0208, "hiragana-jisx0208");
2180 defsymbol (&Qkatakana_jisx0208, "katakana-jisx0208");
2182 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
2183 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
2185 defsymbol (&Qcomposite, "composite");
2189 vars_of_mule_charset (void)
2196 /* Table of charsets indexed by leading byte. */
2197 for (i = 0; i < countof (charset_by_leading_byte); i++)
2198 charset_by_leading_byte[i] = Qnil;
2201 /* Table of charsets indexed by type/final-byte. */
2202 for (i = 0; i < countof (charset_by_attributes); i++)
2203 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2204 charset_by_attributes[i][j] = Qnil;
2206 /* Table of charsets indexed by type/final-byte/direction. */
2207 for (i = 0; i < countof (charset_by_attributes); i++)
2208 for (j = 0; j < countof (charset_by_attributes[0]); j++)
2209 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
2210 charset_by_attributes[i][j][k] = Qnil;
2214 next_allocated_leading_byte = MIN_LEADING_BYTE_PRIVATE;
2216 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
2217 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
2221 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2222 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11 /*
2223 Leading-code of private TYPE9N charset of column-width 1.
2225 leading_code_private_11 = PRE_LEADING_BYTE_PRIVATE_1;
2229 Vutf_2000_version = build_string("0.11 (Shiki)");
2230 DEFVAR_LISP ("utf-2000-version", &Vutf_2000_version /*
2231 Version number of UTF-2000.
2234 staticpro (&Vcharacter_attribute_table);
2235 Vcharacter_attribute_table = make_char_code_table (Qnil);
2237 Vdefault_coded_charset_priority_list = Qnil;
2238 DEFVAR_LISP ("default-coded-charset-priority-list",
2239 &Vdefault_coded_charset_priority_list /*
2240 Default order of preferred coded-character-sets.
2246 complex_vars_of_mule_charset (void)
2248 staticpro (&Vcharset_hash_table);
2249 Vcharset_hash_table =
2250 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2252 /* Predefined character sets. We store them into variables for
2257 make_charset (LEADING_BYTE_UCS_BMP, Qucs_bmp,
2258 CHARSET_TYPE_256X256, 1, 2, 0,
2259 CHARSET_LEFT_TO_RIGHT,
2260 build_string ("BMP"),
2261 build_string ("BMP"),
2262 build_string ("ISO/IEC 10646 Group 0 Plane 0 (BMP)"),
2263 build_string ("\\(ISO10646.*-1\\|UNICODE[23]?-0\\)"),
2264 Qnil, 0, 0xFFFF, 0, 0);
2266 # define MIN_CHAR_THAI 0
2267 # define MAX_CHAR_THAI 0
2268 # define MIN_CHAR_GREEK 0
2269 # define MAX_CHAR_GREEK 0
2270 # define MIN_CHAR_HEBREW 0
2271 # define MAX_CHAR_HEBREW 0
2272 # define MIN_CHAR_HALFWIDTH_KATAKANA 0
2273 # define MAX_CHAR_HALFWIDTH_KATAKANA 0
2274 # define MIN_CHAR_CYRILLIC 0
2275 # define MAX_CHAR_CYRILLIC 0
2278 make_charset (LEADING_BYTE_ASCII, Qascii,
2279 CHARSET_TYPE_94, 1, 0, 'B',
2280 CHARSET_LEFT_TO_RIGHT,
2281 build_string ("ASCII"),
2282 build_string ("ASCII)"),
2283 build_string ("ASCII (ISO646 IRV)"),
2284 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"),
2285 Qnil, 0, 0x7F, 0, 0);
2286 Vcharset_control_1 =
2287 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1,
2288 CHARSET_TYPE_94, 1, 1, 0,
2289 CHARSET_LEFT_TO_RIGHT,
2290 build_string ("C1"),
2291 build_string ("Control characters"),
2292 build_string ("Control characters 128-191"),
2294 Qnil, 0x80, 0x9F, 0, 0);
2295 Vcharset_latin_iso8859_1 =
2296 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1,
2297 CHARSET_TYPE_96, 1, 1, 'A',
2298 CHARSET_LEFT_TO_RIGHT,
2299 build_string ("Latin-1"),
2300 build_string ("ISO8859-1 (Latin-1)"),
2301 build_string ("ISO8859-1 (Latin-1)"),
2302 build_string ("iso8859-1"),
2303 Qnil, 0xA0, 0xFF, 0, 32);
2304 Vcharset_latin_iso8859_2 =
2305 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2,
2306 CHARSET_TYPE_96, 1, 1, 'B',
2307 CHARSET_LEFT_TO_RIGHT,
2308 build_string ("Latin-2"),
2309 build_string ("ISO8859-2 (Latin-2)"),
2310 build_string ("ISO8859-2 (Latin-2)"),
2311 build_string ("iso8859-2"),
2313 Vcharset_latin_iso8859_3 =
2314 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3,
2315 CHARSET_TYPE_96, 1, 1, 'C',
2316 CHARSET_LEFT_TO_RIGHT,
2317 build_string ("Latin-3"),
2318 build_string ("ISO8859-3 (Latin-3)"),
2319 build_string ("ISO8859-3 (Latin-3)"),
2320 build_string ("iso8859-3"),
2322 Vcharset_latin_iso8859_4 =
2323 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4,
2324 CHARSET_TYPE_96, 1, 1, 'D',
2325 CHARSET_LEFT_TO_RIGHT,
2326 build_string ("Latin-4"),
2327 build_string ("ISO8859-4 (Latin-4)"),
2328 build_string ("ISO8859-4 (Latin-4)"),
2329 build_string ("iso8859-4"),
2331 Vcharset_thai_tis620 =
2332 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620,
2333 CHARSET_TYPE_96, 1, 1, 'T',
2334 CHARSET_LEFT_TO_RIGHT,
2335 build_string ("TIS620"),
2336 build_string ("TIS620 (Thai)"),
2337 build_string ("TIS620.2529 (Thai)"),
2338 build_string ("tis620"),
2339 Qnil, MIN_CHAR_THAI, MAX_CHAR_THAI, 0, 32);
2340 Vcharset_greek_iso8859_7 =
2341 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7,
2342 CHARSET_TYPE_96, 1, 1, 'F',
2343 CHARSET_LEFT_TO_RIGHT,
2344 build_string ("ISO8859-7"),
2345 build_string ("ISO8859-7 (Greek)"),
2346 build_string ("ISO8859-7 (Greek)"),
2347 build_string ("iso8859-7"),
2348 Qnil, MIN_CHAR_GREEK, MAX_CHAR_GREEK, 0, 32);
2349 Vcharset_arabic_iso8859_6 =
2350 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6,
2351 CHARSET_TYPE_96, 1, 1, 'G',
2352 CHARSET_RIGHT_TO_LEFT,
2353 build_string ("ISO8859-6"),
2354 build_string ("ISO8859-6 (Arabic)"),
2355 build_string ("ISO8859-6 (Arabic)"),
2356 build_string ("iso8859-6"),
2358 Vcharset_hebrew_iso8859_8 =
2359 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8,
2360 CHARSET_TYPE_96, 1, 1, 'H',
2361 CHARSET_RIGHT_TO_LEFT,
2362 build_string ("ISO8859-8"),
2363 build_string ("ISO8859-8 (Hebrew)"),
2364 build_string ("ISO8859-8 (Hebrew)"),
2365 build_string ("iso8859-8"),
2366 Qnil, MIN_CHAR_HEBREW, MAX_CHAR_HEBREW, 0, 32);
2367 Vcharset_katakana_jisx0201 =
2368 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201,
2369 CHARSET_TYPE_94, 1, 1, 'I',
2370 CHARSET_LEFT_TO_RIGHT,
2371 build_string ("JISX0201 Kana"),
2372 build_string ("JISX0201.1976 (Japanese Kana)"),
2373 build_string ("JISX0201.1976 Japanese Kana"),
2374 build_string ("jisx0201\\.1976"),
2376 MIN_CHAR_HALFWIDTH_KATAKANA,
2377 MAX_CHAR_HALFWIDTH_KATAKANA, 0, 33);
2378 Vcharset_latin_jisx0201 =
2379 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201,
2380 CHARSET_TYPE_94, 1, 0, 'J',
2381 CHARSET_LEFT_TO_RIGHT,
2382 build_string ("JISX0201 Roman"),
2383 build_string ("JISX0201.1976 (Japanese Roman)"),
2384 build_string ("JISX0201.1976 Japanese Roman"),
2385 build_string ("jisx0201\\.1976"),
2387 Vcharset_cyrillic_iso8859_5 =
2388 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5,
2389 CHARSET_TYPE_96, 1, 1, 'L',
2390 CHARSET_LEFT_TO_RIGHT,
2391 build_string ("ISO8859-5"),
2392 build_string ("ISO8859-5 (Cyrillic)"),
2393 build_string ("ISO8859-5 (Cyrillic)"),
2394 build_string ("iso8859-5"),
2395 Qnil, MIN_CHAR_CYRILLIC, MAX_CHAR_CYRILLIC, 0, 32);
2396 Vcharset_latin_iso8859_9 =
2397 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9,
2398 CHARSET_TYPE_96, 1, 1, 'M',
2399 CHARSET_LEFT_TO_RIGHT,
2400 build_string ("Latin-5"),
2401 build_string ("ISO8859-9 (Latin-5)"),
2402 build_string ("ISO8859-9 (Latin-5)"),
2403 build_string ("iso8859-9"),
2405 Vcharset_japanese_jisx0208_1978 =
2406 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978,
2407 CHARSET_TYPE_94X94, 2, 0, '@',
2408 CHARSET_LEFT_TO_RIGHT,
2409 build_string ("JIS X0208:1978"),
2410 build_string ("JIS X0208:1978 (Japanese)"),
2412 ("JIS X0208:1978 Japanese Kanji (so called \"old JIS\")"),
2413 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"),
2415 Vcharset_chinese_gb2312 =
2416 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312,
2417 CHARSET_TYPE_94X94, 2, 0, 'A',
2418 CHARSET_LEFT_TO_RIGHT,
2419 build_string ("GB2312"),
2420 build_string ("GB2312)"),
2421 build_string ("GB2312 Chinese simplified"),
2422 build_string ("gb2312"),
2424 Vcharset_japanese_jisx0208 =
2425 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208,
2426 CHARSET_TYPE_94X94, 2, 0, 'B',
2427 CHARSET_LEFT_TO_RIGHT,
2428 build_string ("JISX0208"),
2429 build_string ("JIS X0208:1983 (Japanese)"),
2430 build_string ("JIS X0208:1983 Japanese Kanji"),
2431 build_string ("jisx0208\\.1983"),
2433 Vcharset_korean_ksc5601 =
2434 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601,
2435 CHARSET_TYPE_94X94, 2, 0, 'C',
2436 CHARSET_LEFT_TO_RIGHT,
2437 build_string ("KSC5601"),
2438 build_string ("KSC5601 (Korean"),
2439 build_string ("KSC5601 Korean Hangul and Hanja"),
2440 build_string ("ksc5601"),
2442 Vcharset_japanese_jisx0212 =
2443 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212,
2444 CHARSET_TYPE_94X94, 2, 0, 'D',
2445 CHARSET_LEFT_TO_RIGHT,
2446 build_string ("JISX0212"),
2447 build_string ("JISX0212 (Japanese)"),
2448 build_string ("JISX0212 Japanese Supplement"),
2449 build_string ("jisx0212"),
2452 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
2453 Vcharset_chinese_cns11643_1 =
2454 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1,
2455 CHARSET_TYPE_94X94, 2, 0, 'G',
2456 CHARSET_LEFT_TO_RIGHT,
2457 build_string ("CNS11643-1"),
2458 build_string ("CNS11643-1 (Chinese traditional)"),
2460 ("CNS 11643 Plane 1 Chinese traditional"),
2461 build_string (CHINESE_CNS_PLANE_RE("1")),
2463 Vcharset_chinese_cns11643_2 =
2464 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2,
2465 CHARSET_TYPE_94X94, 2, 0, 'H',
2466 CHARSET_LEFT_TO_RIGHT,
2467 build_string ("CNS11643-2"),
2468 build_string ("CNS11643-2 (Chinese traditional)"),
2470 ("CNS 11643 Plane 2 Chinese traditional"),
2471 build_string (CHINESE_CNS_PLANE_RE("2")),
2474 Vcharset_latin_viscii_lower =
2475 make_charset (LEADING_BYTE_LATIN_VISCII_LOWER, Qlatin_viscii_lower,
2476 CHARSET_TYPE_96, 1, 1, '1',
2477 CHARSET_LEFT_TO_RIGHT,
2478 build_string ("VISCII lower"),
2479 build_string ("VISCII lower (Vietnamese)"),
2480 build_string ("VISCII lower (Vietnamese)"),
2481 build_string ("MULEVISCII-LOWER"),
2483 Vcharset_latin_viscii_upper =
2484 make_charset (LEADING_BYTE_LATIN_VISCII_UPPER, Qlatin_viscii_upper,
2485 CHARSET_TYPE_96, 1, 1, '2',
2486 CHARSET_LEFT_TO_RIGHT,
2487 build_string ("VISCII upper"),
2488 build_string ("VISCII upper (Vietnamese)"),
2489 build_string ("VISCII upper (Vietnamese)"),
2490 build_string ("MULEVISCII-UPPER"),
2492 Vcharset_latin_viscii =
2493 make_charset (LEADING_BYTE_LATIN_VISCII, Qlatin_viscii,
2494 CHARSET_TYPE_256, 1, 2, 0,
2495 CHARSET_LEFT_TO_RIGHT,
2496 build_string ("VISCII"),
2497 build_string ("VISCII 1.1 (Vietnamese)"),
2498 build_string ("VISCII 1.1 (Vietnamese)"),
2499 build_string ("VISCII1\\.1"),
2501 Vcharset_hiragana_jisx0208 =
2502 make_charset (LEADING_BYTE_HIRAGANA_JISX0208, Qhiragana_jisx0208,
2503 CHARSET_TYPE_94X94, 2, 0, 'B',
2504 CHARSET_LEFT_TO_RIGHT,
2505 build_string ("Hiragana"),
2506 build_string ("Hiragana of JIS X0208"),
2507 build_string ("Japanese Hiragana of JIS X0208"),
2508 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2509 Qnil, MIN_CHAR_HIRAGANA, MAX_CHAR_HIRAGANA,
2510 (0x24 - 33) * 94 + (0x21 - 33), 33);
2511 Vcharset_katakana_jisx0208 =
2512 make_charset (LEADING_BYTE_KATAKANA_JISX0208, Qkatakana_jisx0208,
2513 CHARSET_TYPE_94X94, 2, 0, 'B',
2514 CHARSET_LEFT_TO_RIGHT,
2515 build_string ("Katakana"),
2516 build_string ("Katakana of JIS X0208"),
2517 build_string ("Japanese Katakana of JIS X0208"),
2518 build_string ("jisx0208\\.19\\(78\\|83\\|90\\)"),
2519 Qnil, MIN_CHAR_KATAKANA, MAX_CHAR_KATAKANA,
2520 (0x25 - 33) * 94 + (0x21 - 33), 33);
2522 Vcharset_chinese_big5_1 =
2523 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1,
2524 CHARSET_TYPE_94X94, 2, 0, '0',
2525 CHARSET_LEFT_TO_RIGHT,
2526 build_string ("Big5"),
2527 build_string ("Big5 (Level-1)"),
2529 ("Big5 Level-1 Chinese traditional"),
2530 build_string ("big5"),
2532 Vcharset_chinese_big5_2 =
2533 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2,
2534 CHARSET_TYPE_94X94, 2, 0, '1',
2535 CHARSET_LEFT_TO_RIGHT,
2536 build_string ("Big5"),
2537 build_string ("Big5 (Level-2)"),
2539 ("Big5 Level-2 Chinese traditional"),
2540 build_string ("big5"),
2543 #ifdef ENABLE_COMPOSITE_CHARS
2544 /* #### For simplicity, we put composite chars into a 96x96 charset.
2545 This is going to lead to problems because you can run out of
2546 room, esp. as we don't yet recycle numbers. */
2547 Vcharset_composite =
2548 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite,
2549 CHARSET_TYPE_96X96, 2, 0, 0,
2550 CHARSET_LEFT_TO_RIGHT,
2551 build_string ("Composite"),
2552 build_string ("Composite characters"),
2553 build_string ("Composite characters"),
2556 composite_char_row_next = 32;
2557 composite_char_col_next = 32;
2559 Vcomposite_char_string2char_hash_table =
2560 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
2561 Vcomposite_char_char2string_hash_table =
2562 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2563 staticpro (&Vcomposite_char_string2char_hash_table);
2564 staticpro (&Vcomposite_char_char2string_hash_table);
2565 #endif /* ENABLE_COMPOSITE_CHARS */